1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/Basic/BitmaskEnum.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38   /// Kinds of OpenMP regions used in codegen.
39   enum CGOpenMPRegionKind {
40     /// Region with outlined function for standalone 'parallel'
41     /// directive.
42     ParallelOutlinedRegion,
43     /// Region with outlined function for standalone 'task' directive.
44     TaskOutlinedRegion,
45     /// Region for constructs that do not require function outlining,
46     /// like 'for', 'sections', 'atomic' etc. directives.
47     InlinedRegion,
48     /// Region with outlined function for standalone 'target' directive.
49     TargetRegion,
50   };
51 
52   CGOpenMPRegionInfo(const CapturedStmt &CS,
53                      const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63         Kind(Kind), HasCancel(HasCancel) {}
64 
65   /// Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69   /// Emit the captured statement body.
70   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72   /// Get an LValue for the current ThreadID variable.
73   /// \return LValue for thread id variable. This LValue always has type int32*.
74   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82   bool hasCancel() const { return HasCancel; }
83 
84   static bool classof(const CGCapturedStmtInfo *Info) {
85     return Info->getKind() == CR_OpenMP;
86   }
87 
88   ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91   CGOpenMPRegionKind RegionKind;
92   RegionCodeGenTy CodeGen;
93   OpenMPDirectiveKind Kind;
94   bool HasCancel;
95 };
96 
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101                              const RegionCodeGenTy &CodeGen,
102                              OpenMPDirectiveKind Kind, bool HasCancel,
103                              StringRef HelperName)
104       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105                            HasCancel),
106         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108   }
109 
110   /// Get a variable or parameter for storing global thread id
111   /// inside OpenMP construct.
112   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114   /// Get the name of the capture helper.
115   StringRef getHelperName() const override { return HelperName; }
116 
117   static bool classof(const CGCapturedStmtInfo *Info) {
118     return CGOpenMPRegionInfo::classof(Info) &&
119            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120                ParallelOutlinedRegion;
121   }
122 
123 private:
124   /// A variable or parameter storing global thread id for OpenMP
125   /// constructs.
126   const VarDecl *ThreadIDVar;
127   StringRef HelperName;
128 };
129 
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133   class UntiedTaskActionTy final : public PrePostActionTy {
134     bool Untied;
135     const VarDecl *PartIDVar;
136     const RegionCodeGenTy UntiedCodeGen;
137     llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139   public:
140     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141                        const RegionCodeGenTy &UntiedCodeGen)
142         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143     void Enter(CodeGenFunction &CGF) override {
144       if (Untied) {
145         // Emit task switching point.
146         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147             CGF.GetAddrOfLocalVar(PartIDVar),
148             PartIDVar->getType()->castAs<PointerType>());
149         llvm::Value *Res =
150             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190 
191   /// Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195   /// Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198   /// Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210 
211 private:
212   /// A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218 
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230   // Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236 
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244 
245   /// Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253 
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259 
260   /// Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267 
268   /// Get an LValue for the current ThreadID variable.
269   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272     llvm_unreachable("No LValue for inlined OpenMP construct");
273   }
274 
275   /// Get the name of the capture helper.
276   StringRef getHelperName() const override {
277     if (auto *OuterRegionInfo = getOldCSI())
278       return OuterRegionInfo->getHelperName();
279     llvm_unreachable("No helper name for inlined OpenMP construct");
280   }
281 
282   void emitUntiedSwitch(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       OuterRegionInfo->emitUntiedSwitch(CGF);
285   }
286 
287   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289   static bool classof(const CGCapturedStmtInfo *Info) {
290     return CGOpenMPRegionInfo::classof(Info) &&
291            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292   }
293 
294   ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297   /// CodeGen info about outer OpenMP region.
298   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299   CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
311       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312                            /*HasCancel=*/false),
313         HelperName(HelperName) {}
314 
315   /// This is unused for target regions because each starts executing
316   /// with a single thread.
317   const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319   /// Get the name of the capture helper.
320   StringRef getHelperName() const override { return HelperName; }
321 
322   static bool classof(const CGCapturedStmtInfo *Info) {
323     return CGOpenMPRegionInfo::classof(Info) &&
324            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325   }
326 
327 private:
328   StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332   llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340                                   OMPD_unknown,
341                                   /*HasCancel=*/false),
342         PrivScope(CGF) {
343     // Make sure the globals captured in the provided statement are local by
344     // using the privatization logic. We assume the same variable is not
345     // captured more than once.
346     for (const auto &C : CS.captures()) {
347       if (!C.capturesVariable() && !C.capturesVariableByCopy())
348         continue;
349 
350       const VarDecl *VD = C.getCapturedVar();
351       if (VD->isLocalVarDeclOrParm())
352         continue;
353 
354       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355                       /*RefersToEnclosingVariableOrCapture=*/false,
356                       VD->getType().getNonReferenceType(), VK_LValue,
357                       C.getLocation());
358       PrivScope.addPrivate(
359           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360     }
361     (void)PrivScope.Privatize();
362   }
363 
364   /// Lookup the captured field decl for a variable.
365   const FieldDecl *lookup(const VarDecl *VD) const override {
366     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367       return FD;
368     return nullptr;
369   }
370 
371   /// Emit the captured statement body.
372   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373     llvm_unreachable("No body for expressions");
374   }
375 
376   /// Get a variable or parameter for storing global thread id
377   /// inside OpenMP construct.
378   const VarDecl *getThreadIDVariable() const override {
379     llvm_unreachable("No thread id for expressions");
380   }
381 
382   /// Get the name of the capture helper.
383   StringRef getHelperName() const override {
384     llvm_unreachable("No helper name for expressions");
385   }
386 
387   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388 
389 private:
390   /// Private scope to capture global variables.
391   CodeGenFunction::OMPPrivateScope PrivScope;
392 };
393 
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396   CodeGenFunction &CGF;
397   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398   FieldDecl *LambdaThisCaptureField = nullptr;
399   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400 
401 public:
402   /// Constructs region for combined constructs.
403   /// \param CodeGen Code generation sequence for combined directives. Includes
404   /// a list of functions used for code generation of implicitly inlined
405   /// regions.
406   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407                           OpenMPDirectiveKind Kind, bool HasCancel)
408       : CGF(CGF) {
409     // Start emission for the construct.
410     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414     CGF.LambdaThisCaptureField = nullptr;
415     BlockInfo = CGF.BlockInfo;
416     CGF.BlockInfo = nullptr;
417   }
418 
419   ~InlinedOpenMPRegionRAII() {
420     // Restore original CapturedStmtInfo only if we're done with code emission.
421     auto *OldCSI =
422         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423     delete CGF.CapturedStmtInfo;
424     CGF.CapturedStmtInfo = OldCSI;
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427     CGF.BlockInfo = BlockInfo;
428   }
429 };
430 
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435   /// Use trampoline for internal microtask.
436   OMP_IDENT_IMD = 0x01,
437   /// Use c-style ident structure.
438   OMP_IDENT_KMPC = 0x02,
439   /// Atomic reduction option for kmpc_reduce.
440   OMP_ATOMIC_REDUCE = 0x10,
441   /// Explicit 'barrier' directive.
442   OMP_IDENT_BARRIER_EXPL = 0x20,
443   /// Implicit barrier in code.
444   OMP_IDENT_BARRIER_IMPL = 0x40,
445   /// Implicit barrier in 'for' directive.
446   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447   /// Implicit barrier in 'sections' directive.
448   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449   /// Implicit barrier in 'single' directive.
450   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451   /// Call of __kmp_for_static_init for static loop.
452   OMP_IDENT_WORK_LOOP = 0x200,
453   /// Call of __kmp_for_static_init for sections.
454   OMP_IDENT_WORK_SECTIONS = 0x400,
455   /// Call of __kmp_for_static_init for distribute.
456   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459 
460 namespace {
461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
462 /// Values for bit flags for marking which requires clauses have been used.
463 enum OpenMPOffloadingRequiresDirFlags : int64_t {
464   /// flag undefined.
465   OMP_REQ_UNDEFINED               = 0x000,
466   /// no requires clause present.
467   OMP_REQ_NONE                    = 0x001,
468   /// reverse_offload clause.
469   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
470   /// unified_address clause.
471   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
472   /// unified_shared_memory clause.
473   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
474   /// dynamic_allocators clause.
475   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
477 };
478 
479 enum OpenMPOffloadingReservedDeviceIDs {
480   /// Device ID if the device was not defined, runtime should get it
481   /// from environment variables in the spec.
482   OMP_DEVICEID_UNDEF = -1,
483 };
484 } // anonymous namespace
485 
486 /// Describes ident structure that describes a source location.
487 /// All descriptions are taken from
488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
489 /// Original structure:
490 /// typedef struct ident {
491 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
492 ///                                  see above  */
493 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
494 ///                                  KMP_IDENT_KMPC identifies this union
495 ///                                  member  */
496 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
497 ///                                  see above */
498 ///#if USE_ITT_BUILD
499 ///                            /*  but currently used for storing
500 ///                                region-specific ITT */
501 ///                            /*  contextual information. */
502 ///#endif /* USE_ITT_BUILD */
503 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
504 ///                                 C++  */
505 ///    char const *psource;    /**< String describing the source location.
506 ///                            The string is composed of semi-colon separated
507 //                             fields which describe the source file,
508 ///                            the function and a pair of line numbers that
509 ///                            delimit the construct.
510 ///                             */
511 /// } ident_t;
512 enum IdentFieldIndex {
513   /// might be used in Fortran
514   IdentField_Reserved_1,
515   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
516   IdentField_Flags,
517   /// Not really used in Fortran any more
518   IdentField_Reserved_2,
519   /// Source[4] in Fortran, do not use for C++
520   IdentField_Reserved_3,
521   /// String describing the source location. The string is composed of
522   /// semi-colon separated fields which describe the source file, the function
523   /// and a pair of line numbers that delimit the construct.
524   IdentField_PSource
525 };
526 
527 /// Schedule types for 'omp for' loops (these enumerators are taken from
528 /// the enum sched_type in kmp.h).
529 enum OpenMPSchedType {
530   /// Lower bound for default (unordered) versions.
531   OMP_sch_lower = 32,
532   OMP_sch_static_chunked = 33,
533   OMP_sch_static = 34,
534   OMP_sch_dynamic_chunked = 35,
535   OMP_sch_guided_chunked = 36,
536   OMP_sch_runtime = 37,
537   OMP_sch_auto = 38,
538   /// static with chunk adjustment (e.g., simd)
539   OMP_sch_static_balanced_chunked = 45,
540   /// Lower bound for 'ordered' versions.
541   OMP_ord_lower = 64,
542   OMP_ord_static_chunked = 65,
543   OMP_ord_static = 66,
544   OMP_ord_dynamic_chunked = 67,
545   OMP_ord_guided_chunked = 68,
546   OMP_ord_runtime = 69,
547   OMP_ord_auto = 70,
548   OMP_sch_default = OMP_sch_static,
549   /// dist_schedule types
550   OMP_dist_sch_static_chunked = 91,
551   OMP_dist_sch_static = 92,
552   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
553   /// Set if the monotonic schedule modifier was present.
554   OMP_sch_modifier_monotonic = (1 << 29),
555   /// Set if the nonmonotonic schedule modifier was present.
556   OMP_sch_modifier_nonmonotonic = (1 << 30),
557 };
558 
559 enum OpenMPRTLFunction {
560   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
561   /// kmpc_micro microtask, ...);
562   OMPRTL__kmpc_fork_call,
563   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
564   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
565   OMPRTL__kmpc_threadprivate_cached,
566   /// Call to void __kmpc_threadprivate_register( ident_t *,
567   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
568   OMPRTL__kmpc_threadprivate_register,
569   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
570   OMPRTL__kmpc_global_thread_num,
571   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
572   // kmp_critical_name *crit);
573   OMPRTL__kmpc_critical,
574   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
575   // global_tid, kmp_critical_name *crit, uintptr_t hint);
576   OMPRTL__kmpc_critical_with_hint,
577   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
578   // kmp_critical_name *crit);
579   OMPRTL__kmpc_end_critical,
580   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
581   // global_tid);
582   OMPRTL__kmpc_cancel_barrier,
583   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
584   OMPRTL__kmpc_barrier,
585   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
586   OMPRTL__kmpc_for_static_fini,
587   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
588   // global_tid);
589   OMPRTL__kmpc_serialized_parallel,
590   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
591   // global_tid);
592   OMPRTL__kmpc_end_serialized_parallel,
593   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
594   // kmp_int32 num_threads);
595   OMPRTL__kmpc_push_num_threads,
596   // Call to void __kmpc_flush(ident_t *loc);
597   OMPRTL__kmpc_flush,
598   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
599   OMPRTL__kmpc_master,
600   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
601   OMPRTL__kmpc_end_master,
602   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
603   // int end_part);
604   OMPRTL__kmpc_omp_taskyield,
605   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
606   OMPRTL__kmpc_single,
607   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
608   OMPRTL__kmpc_end_single,
609   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
610   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
611   // kmp_routine_entry_t *task_entry);
612   OMPRTL__kmpc_omp_task_alloc,
613   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
614   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
615   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
616   // kmp_int64 device_id);
617   OMPRTL__kmpc_omp_target_task_alloc,
618   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
619   // new_task);
620   OMPRTL__kmpc_omp_task,
621   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
622   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
623   // kmp_int32 didit);
624   OMPRTL__kmpc_copyprivate,
625   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
626   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
627   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
628   OMPRTL__kmpc_reduce,
629   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
630   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
631   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
632   // *lck);
633   OMPRTL__kmpc_reduce_nowait,
634   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
635   // kmp_critical_name *lck);
636   OMPRTL__kmpc_end_reduce,
637   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
638   // kmp_critical_name *lck);
639   OMPRTL__kmpc_end_reduce_nowait,
640   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
641   // kmp_task_t * new_task);
642   OMPRTL__kmpc_omp_task_begin_if0,
643   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
644   // kmp_task_t * new_task);
645   OMPRTL__kmpc_omp_task_complete_if0,
646   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
647   OMPRTL__kmpc_ordered,
648   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
649   OMPRTL__kmpc_end_ordered,
650   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
651   // global_tid);
652   OMPRTL__kmpc_omp_taskwait,
653   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
654   OMPRTL__kmpc_taskgroup,
655   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
656   OMPRTL__kmpc_end_taskgroup,
657   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
658   // int proc_bind);
659   OMPRTL__kmpc_push_proc_bind,
660   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
661   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
662   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
663   OMPRTL__kmpc_omp_task_with_deps,
664   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
665   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
666   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
667   OMPRTL__kmpc_omp_wait_deps,
668   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
669   // global_tid, kmp_int32 cncl_kind);
670   OMPRTL__kmpc_cancellationpoint,
671   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
672   // kmp_int32 cncl_kind);
673   OMPRTL__kmpc_cancel,
674   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
675   // kmp_int32 num_teams, kmp_int32 thread_limit);
676   OMPRTL__kmpc_push_num_teams,
677   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
678   // microtask, ...);
679   OMPRTL__kmpc_fork_teams,
680   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
681   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
682   // sched, kmp_uint64 grainsize, void *task_dup);
683   OMPRTL__kmpc_taskloop,
684   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
685   // num_dims, struct kmp_dim *dims);
686   OMPRTL__kmpc_doacross_init,
687   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
688   OMPRTL__kmpc_doacross_fini,
689   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
690   // *vec);
691   OMPRTL__kmpc_doacross_post,
692   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
693   // *vec);
694   OMPRTL__kmpc_doacross_wait,
695   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
696   // *data);
697   OMPRTL__kmpc_task_reduction_init,
698   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
699   // *d);
700   OMPRTL__kmpc_task_reduction_get_th_data,
701   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
702   OMPRTL__kmpc_alloc,
703   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
704   OMPRTL__kmpc_free,
705 
706   //
707   // Offloading related calls
708   //
709   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
710   // size);
711   OMPRTL__kmpc_push_target_tripcount,
712   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
713   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
714   // *arg_types);
715   OMPRTL__tgt_target,
716   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
717   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
718   // *arg_types);
719   OMPRTL__tgt_target_nowait,
720   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
721   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
722   // *arg_types, int32_t num_teams, int32_t thread_limit);
723   OMPRTL__tgt_target_teams,
724   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
725   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
726   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
727   OMPRTL__tgt_target_teams_nowait,
728   // Call to void __tgt_register_requires(int64_t flags);
729   OMPRTL__tgt_register_requires,
730   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
731   OMPRTL__tgt_register_lib,
732   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
733   OMPRTL__tgt_unregister_lib,
734   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
735   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
736   OMPRTL__tgt_target_data_begin,
737   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
738   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
739   // *arg_types);
740   OMPRTL__tgt_target_data_begin_nowait,
741   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
742   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
743   OMPRTL__tgt_target_data_end,
744   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
745   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
746   // *arg_types);
747   OMPRTL__tgt_target_data_end_nowait,
748   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
749   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
750   OMPRTL__tgt_target_data_update,
751   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
752   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
753   // *arg_types);
754   OMPRTL__tgt_target_data_update_nowait,
755   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
756   OMPRTL__tgt_mapper_num_components,
757   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
758   // *base, void *begin, int64_t size, int64_t type);
759   OMPRTL__tgt_push_mapper_component,
760 };
761 
762 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
763 /// region.
764 class CleanupTy final : public EHScopeStack::Cleanup {
765   PrePostActionTy *Action;
766 
767 public:
768   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
769   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
770     if (!CGF.HaveInsertPoint())
771       return;
772     Action->Exit(CGF);
773   }
774 };
775 
776 } // anonymous namespace
777 
778 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
779   CodeGenFunction::RunCleanupsScope Scope(CGF);
780   if (PrePostAction) {
781     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
782     Callback(CodeGen, CGF, *PrePostAction);
783   } else {
784     PrePostActionTy Action;
785     Callback(CodeGen, CGF, Action);
786   }
787 }
788 
789 /// Check if the combiner is a call to UDR combiner and if it is so return the
790 /// UDR decl used for reduction.
791 static const OMPDeclareReductionDecl *
792 getReductionInit(const Expr *ReductionOp) {
793   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
794     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
795       if (const auto *DRE =
796               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
797         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
798           return DRD;
799   return nullptr;
800 }
801 
802 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
803                                              const OMPDeclareReductionDecl *DRD,
804                                              const Expr *InitOp,
805                                              Address Private, Address Original,
806                                              QualType Ty) {
807   if (DRD->getInitializer()) {
808     std::pair<llvm::Function *, llvm::Function *> Reduction =
809         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
810     const auto *CE = cast<CallExpr>(InitOp);
811     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
812     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
813     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
814     const auto *LHSDRE =
815         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
816     const auto *RHSDRE =
817         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
818     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
819     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
820                             [=]() { return Private; });
821     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
822                             [=]() { return Original; });
823     (void)PrivateScope.Privatize();
824     RValue Func = RValue::get(Reduction.second);
825     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
826     CGF.EmitIgnoredExpr(InitOp);
827   } else {
828     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
829     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
830     auto *GV = new llvm::GlobalVariable(
831         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
832         llvm::GlobalValue::PrivateLinkage, Init, Name);
833     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
834     RValue InitRVal;
835     switch (CGF.getEvaluationKind(Ty)) {
836     case TEK_Scalar:
837       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
838       break;
839     case TEK_Complex:
840       InitRVal =
841           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
842       break;
843     case TEK_Aggregate:
844       InitRVal = RValue::getAggregate(LV.getAddress());
845       break;
846     }
847     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
848     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
849     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
850                          /*IsInitializer=*/false);
851   }
852 }
853 
854 /// Emit initialization of arrays of complex types.
855 /// \param DestAddr Address of the array.
856 /// \param Type Type of array.
857 /// \param Init Initial expression of array.
858 /// \param SrcAddr Address of the original array.
859 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
860                                  QualType Type, bool EmitDeclareReductionInit,
861                                  const Expr *Init,
862                                  const OMPDeclareReductionDecl *DRD,
863                                  Address SrcAddr = Address::invalid()) {
864   // Perform element-by-element initialization.
865   QualType ElementTy;
866 
867   // Drill down to the base element type on both arrays.
868   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
869   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
870   DestAddr =
871       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
872   if (DRD)
873     SrcAddr =
874         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
875 
876   llvm::Value *SrcBegin = nullptr;
877   if (DRD)
878     SrcBegin = SrcAddr.getPointer();
879   llvm::Value *DestBegin = DestAddr.getPointer();
880   // Cast from pointer to array type to pointer to single element.
881   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
882   // The basic structure here is a while-do loop.
883   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
884   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
885   llvm::Value *IsEmpty =
886       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
887   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
888 
889   // Enter the loop body, making that address the current address.
890   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
891   CGF.EmitBlock(BodyBB);
892 
893   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
894 
895   llvm::PHINode *SrcElementPHI = nullptr;
896   Address SrcElementCurrent = Address::invalid();
897   if (DRD) {
898     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
899                                           "omp.arraycpy.srcElementPast");
900     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
901     SrcElementCurrent =
902         Address(SrcElementPHI,
903                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
904   }
905   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
906       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
907   DestElementPHI->addIncoming(DestBegin, EntryBB);
908   Address DestElementCurrent =
909       Address(DestElementPHI,
910               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
911 
912   // Emit copy.
913   {
914     CodeGenFunction::RunCleanupsScope InitScope(CGF);
915     if (EmitDeclareReductionInit) {
916       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
917                                        SrcElementCurrent, ElementTy);
918     } else
919       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
920                            /*IsInitializer=*/false);
921   }
922 
923   if (DRD) {
924     // Shift the address forward by one element.
925     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
926         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
927     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
928   }
929 
930   // Shift the address forward by one element.
931   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
932       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
933   // Check whether we've reached the end.
934   llvm::Value *Done =
935       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
936   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
937   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
938 
939   // Done.
940   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
941 }
942 
943 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
944   return CGF.EmitOMPSharedLValue(E);
945 }
946 
947 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
948                                             const Expr *E) {
949   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
950     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
951   return LValue();
952 }
953 
954 void ReductionCodeGen::emitAggregateInitialization(
955     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
956     const OMPDeclareReductionDecl *DRD) {
957   // Emit VarDecl with copy init for arrays.
958   // Get the address of the original variable captured in current
959   // captured region.
960   const auto *PrivateVD =
961       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
962   bool EmitDeclareReductionInit =
963       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
964   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
965                        EmitDeclareReductionInit,
966                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
967                                                 : PrivateVD->getInit(),
968                        DRD, SharedLVal.getAddress());
969 }
970 
971 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
972                                    ArrayRef<const Expr *> Privates,
973                                    ArrayRef<const Expr *> ReductionOps) {
974   ClausesData.reserve(Shareds.size());
975   SharedAddresses.reserve(Shareds.size());
976   Sizes.reserve(Shareds.size());
977   BaseDecls.reserve(Shareds.size());
978   auto IPriv = Privates.begin();
979   auto IRed = ReductionOps.begin();
980   for (const Expr *Ref : Shareds) {
981     ClausesData.emplace_back(Ref, *IPriv, *IRed);
982     std::advance(IPriv, 1);
983     std::advance(IRed, 1);
984   }
985 }
986 
987 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
988   assert(SharedAddresses.size() == N &&
989          "Number of generated lvalues must be exactly N.");
990   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
991   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
992   SharedAddresses.emplace_back(First, Second);
993 }
994 
995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
996   const auto *PrivateVD =
997       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
998   QualType PrivateType = PrivateVD->getType();
999   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1000   if (!PrivateType->isVariablyModifiedType()) {
1001     Sizes.emplace_back(
1002         CGF.getTypeSize(
1003             SharedAddresses[N].first.getType().getNonReferenceType()),
1004         nullptr);
1005     return;
1006   }
1007   llvm::Value *Size;
1008   llvm::Value *SizeInChars;
1009   auto *ElemType =
1010       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
1011           ->getElementType();
1012   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1013   if (AsArraySection) {
1014     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
1015                                      SharedAddresses[N].first.getPointer());
1016     Size = CGF.Builder.CreateNUWAdd(
1017         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1018     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1019   } else {
1020     SizeInChars = CGF.getTypeSize(
1021         SharedAddresses[N].first.getType().getNonReferenceType());
1022     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1023   }
1024   Sizes.emplace_back(SizeInChars, Size);
1025   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1026       CGF,
1027       cast<OpaqueValueExpr>(
1028           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1029       RValue::get(Size));
1030   CGF.EmitVariablyModifiedType(PrivateType);
1031 }
1032 
1033 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1034                                          llvm::Value *Size) {
1035   const auto *PrivateVD =
1036       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1037   QualType PrivateType = PrivateVD->getType();
1038   if (!PrivateType->isVariablyModifiedType()) {
1039     assert(!Size && !Sizes[N].second &&
1040            "Size should be nullptr for non-variably modified reduction "
1041            "items.");
1042     return;
1043   }
1044   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1045       CGF,
1046       cast<OpaqueValueExpr>(
1047           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1048       RValue::get(Size));
1049   CGF.EmitVariablyModifiedType(PrivateType);
1050 }
1051 
1052 void ReductionCodeGen::emitInitialization(
1053     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1054     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1055   assert(SharedAddresses.size() > N && "No variable was generated");
1056   const auto *PrivateVD =
1057       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1058   const OMPDeclareReductionDecl *DRD =
1059       getReductionInit(ClausesData[N].ReductionOp);
1060   QualType PrivateType = PrivateVD->getType();
1061   PrivateAddr = CGF.Builder.CreateElementBitCast(
1062       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1063   QualType SharedType = SharedAddresses[N].first.getType();
1064   SharedLVal = CGF.MakeAddrLValue(
1065       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1066                                        CGF.ConvertTypeForMem(SharedType)),
1067       SharedType, SharedAddresses[N].first.getBaseInfo(),
1068       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1069   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1070     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1071   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1072     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1073                                      PrivateAddr, SharedLVal.getAddress(),
1074                                      SharedLVal.getType());
1075   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1076              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1077     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1078                          PrivateVD->getType().getQualifiers(),
1079                          /*IsInitializer=*/false);
1080   }
1081 }
1082 
1083 bool ReductionCodeGen::needCleanups(unsigned N) {
1084   const auto *PrivateVD =
1085       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1086   QualType PrivateType = PrivateVD->getType();
1087   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1088   return DTorKind != QualType::DK_none;
1089 }
1090 
1091 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1092                                     Address PrivateAddr) {
1093   const auto *PrivateVD =
1094       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1095   QualType PrivateType = PrivateVD->getType();
1096   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1097   if (needCleanups(N)) {
1098     PrivateAddr = CGF.Builder.CreateElementBitCast(
1099         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1100     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1101   }
1102 }
1103 
1104 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1105                           LValue BaseLV) {
1106   BaseTy = BaseTy.getNonReferenceType();
1107   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1108          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1109     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1110       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1111     } else {
1112       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1113       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1114     }
1115     BaseTy = BaseTy->getPointeeType();
1116   }
1117   return CGF.MakeAddrLValue(
1118       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1119                                        CGF.ConvertTypeForMem(ElTy)),
1120       BaseLV.getType(), BaseLV.getBaseInfo(),
1121       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1122 }
1123 
1124 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1125                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1126                           llvm::Value *Addr) {
1127   Address Tmp = Address::invalid();
1128   Address TopTmp = Address::invalid();
1129   Address MostTopTmp = Address::invalid();
1130   BaseTy = BaseTy.getNonReferenceType();
1131   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1132          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1133     Tmp = CGF.CreateMemTemp(BaseTy);
1134     if (TopTmp.isValid())
1135       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1136     else
1137       MostTopTmp = Tmp;
1138     TopTmp = Tmp;
1139     BaseTy = BaseTy->getPointeeType();
1140   }
1141   llvm::Type *Ty = BaseLVType;
1142   if (Tmp.isValid())
1143     Ty = Tmp.getElementType();
1144   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1145   if (Tmp.isValid()) {
1146     CGF.Builder.CreateStore(Addr, Tmp);
1147     return MostTopTmp;
1148   }
1149   return Address(Addr, BaseLVAlignment);
1150 }
1151 
1152 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1153   const VarDecl *OrigVD = nullptr;
1154   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1155     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1156     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1157       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1158     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1159       Base = TempASE->getBase()->IgnoreParenImpCasts();
1160     DE = cast<DeclRefExpr>(Base);
1161     OrigVD = cast<VarDecl>(DE->getDecl());
1162   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1163     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1164     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1165       Base = TempASE->getBase()->IgnoreParenImpCasts();
1166     DE = cast<DeclRefExpr>(Base);
1167     OrigVD = cast<VarDecl>(DE->getDecl());
1168   }
1169   return OrigVD;
1170 }
1171 
1172 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1173                                                Address PrivateAddr) {
1174   const DeclRefExpr *DE;
1175   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1176     BaseDecls.emplace_back(OrigVD);
1177     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1178     LValue BaseLValue =
1179         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1180                     OriginalBaseLValue);
1181     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1182         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1183     llvm::Value *PrivatePointer =
1184         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1185             PrivateAddr.getPointer(),
1186             SharedAddresses[N].first.getAddress().getType());
1187     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1188     return castToBase(CGF, OrigVD->getType(),
1189                       SharedAddresses[N].first.getType(),
1190                       OriginalBaseLValue.getAddress().getType(),
1191                       OriginalBaseLValue.getAlignment(), Ptr);
1192   }
1193   BaseDecls.emplace_back(
1194       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1195   return PrivateAddr;
1196 }
1197 
1198 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1199   const OMPDeclareReductionDecl *DRD =
1200       getReductionInit(ClausesData[N].ReductionOp);
1201   return DRD && DRD->getInitializer();
1202 }
1203 
1204 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1205   return CGF.EmitLoadOfPointerLValue(
1206       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1207       getThreadIDVariable()->getType()->castAs<PointerType>());
1208 }
1209 
1210 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1211   if (!CGF.HaveInsertPoint())
1212     return;
1213   // 1.2.2 OpenMP Language Terminology
1214   // Structured block - An executable statement with a single entry at the
1215   // top and a single exit at the bottom.
1216   // The point of exit cannot be a branch out of the structured block.
1217   // longjmp() and throw() must not violate the entry/exit criteria.
1218   CGF.EHStack.pushTerminate();
1219   CodeGen(CGF);
1220   CGF.EHStack.popTerminate();
1221 }
1222 
1223 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1224     CodeGenFunction &CGF) {
1225   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1226                             getThreadIDVariable()->getType(),
1227                             AlignmentSource::Decl);
1228 }
1229 
1230 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1231                                        QualType FieldTy) {
1232   auto *Field = FieldDecl::Create(
1233       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1234       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1235       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1236   Field->setAccess(AS_public);
1237   DC->addDecl(Field);
1238   return Field;
1239 }
1240 
1241 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1242                                  StringRef Separator)
1243     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1244       OffloadEntriesInfoManager(CGM) {
1245   ASTContext &C = CGM.getContext();
1246   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1247   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1248   RD->startDefinition();
1249   // reserved_1
1250   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1251   // flags
1252   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1253   // reserved_2
1254   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1255   // reserved_3
1256   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1257   // psource
1258   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1259   RD->completeDefinition();
1260   IdentQTy = C.getRecordType(RD);
1261   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1262   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1263 
1264   loadOffloadInfoMetadata();
1265 }
1266 
1267 void CGOpenMPRuntime::clear() {
1268   InternalVars.clear();
1269   // Clean non-target variable declarations possibly used only in debug info.
1270   for (const auto &Data : EmittedNonTargetVariables) {
1271     if (!Data.getValue().pointsToAliveValue())
1272       continue;
1273     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1274     if (!GV)
1275       continue;
1276     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1277       continue;
1278     GV->eraseFromParent();
1279   }
1280 }
1281 
1282 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1283   SmallString<128> Buffer;
1284   llvm::raw_svector_ostream OS(Buffer);
1285   StringRef Sep = FirstSeparator;
1286   for (StringRef Part : Parts) {
1287     OS << Sep << Part;
1288     Sep = Separator;
1289   }
1290   return OS.str();
1291 }
1292 
1293 static llvm::Function *
1294 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1295                           const Expr *CombinerInitializer, const VarDecl *In,
1296                           const VarDecl *Out, bool IsCombiner) {
1297   // void .omp_combiner.(Ty *in, Ty *out);
1298   ASTContext &C = CGM.getContext();
1299   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1300   FunctionArgList Args;
1301   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1302                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1303   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1304                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1305   Args.push_back(&OmpOutParm);
1306   Args.push_back(&OmpInParm);
1307   const CGFunctionInfo &FnInfo =
1308       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1309   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1310   std::string Name = CGM.getOpenMPRuntime().getName(
1311       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1312   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1313                                     Name, &CGM.getModule());
1314   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1315   if (CGM.getLangOpts().Optimize) {
1316     Fn->removeFnAttr(llvm::Attribute::NoInline);
1317     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1318     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1319   }
1320   CodeGenFunction CGF(CGM);
1321   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1322   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1323   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1324                     Out->getLocation());
1325   CodeGenFunction::OMPPrivateScope Scope(CGF);
1326   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1327   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1328     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1329         .getAddress();
1330   });
1331   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1332   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1333     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1334         .getAddress();
1335   });
1336   (void)Scope.Privatize();
1337   if (!IsCombiner && Out->hasInit() &&
1338       !CGF.isTrivialInitializer(Out->getInit())) {
1339     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1340                          Out->getType().getQualifiers(),
1341                          /*IsInitializer=*/true);
1342   }
1343   if (CombinerInitializer)
1344     CGF.EmitIgnoredExpr(CombinerInitializer);
1345   Scope.ForceCleanup();
1346   CGF.FinishFunction();
1347   return Fn;
1348 }
1349 
1350 void CGOpenMPRuntime::emitUserDefinedReduction(
1351     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1352   if (UDRMap.count(D) > 0)
1353     return;
1354   llvm::Function *Combiner = emitCombinerOrInitializer(
1355       CGM, D->getType(), D->getCombiner(),
1356       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1357       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1358       /*IsCombiner=*/true);
1359   llvm::Function *Initializer = nullptr;
1360   if (const Expr *Init = D->getInitializer()) {
1361     Initializer = emitCombinerOrInitializer(
1362         CGM, D->getType(),
1363         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1364                                                                      : nullptr,
1365         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1366         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1367         /*IsCombiner=*/false);
1368   }
1369   UDRMap.try_emplace(D, Combiner, Initializer);
1370   if (CGF) {
1371     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1372     Decls.second.push_back(D);
1373   }
1374 }
1375 
1376 std::pair<llvm::Function *, llvm::Function *>
1377 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1378   auto I = UDRMap.find(D);
1379   if (I != UDRMap.end())
1380     return I->second;
1381   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1382   return UDRMap.lookup(D);
1383 }
1384 
1385 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1386     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1387     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1388     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1389   assert(ThreadIDVar->getType()->isPointerType() &&
1390          "thread id variable must be of type kmp_int32 *");
1391   CodeGenFunction CGF(CGM, true);
1392   bool HasCancel = false;
1393   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1394     HasCancel = OPD->hasCancel();
1395   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1396     HasCancel = OPSD->hasCancel();
1397   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1398     HasCancel = OPFD->hasCancel();
1399   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1400     HasCancel = OPFD->hasCancel();
1401   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1402     HasCancel = OPFD->hasCancel();
1403   else if (const auto *OPFD =
1404                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1405     HasCancel = OPFD->hasCancel();
1406   else if (const auto *OPFD =
1407                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1408     HasCancel = OPFD->hasCancel();
1409   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1410                                     HasCancel, OutlinedHelperName);
1411   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1412   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1413 }
1414 
1415 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1416     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1417     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1418   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1419   return emitParallelOrTeamsOutlinedFunction(
1420       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1421 }
1422 
1423 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1424     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1425     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1426   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1427   return emitParallelOrTeamsOutlinedFunction(
1428       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1429 }
1430 
1431 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1432     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1433     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1434     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1435     bool Tied, unsigned &NumberOfParts) {
1436   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1437                                               PrePostActionTy &) {
1438     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1439     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1440     llvm::Value *TaskArgs[] = {
1441         UpLoc, ThreadID,
1442         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1443                                     TaskTVar->getType()->castAs<PointerType>())
1444             .getPointer()};
1445     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1446   };
1447   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1448                                                             UntiedCodeGen);
1449   CodeGen.setAction(Action);
1450   assert(!ThreadIDVar->getType()->isPointerType() &&
1451          "thread id variable must be of type kmp_int32 for tasks");
1452   const OpenMPDirectiveKind Region =
1453       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1454                                                       : OMPD_task;
1455   const CapturedStmt *CS = D.getCapturedStmt(Region);
1456   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1457   CodeGenFunction CGF(CGM, true);
1458   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1459                                         InnermostKind,
1460                                         TD ? TD->hasCancel() : false, Action);
1461   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1462   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1463   if (!Tied)
1464     NumberOfParts = Action.getNumberOfParts();
1465   return Res;
1466 }
1467 
1468 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1469                              const RecordDecl *RD, const CGRecordLayout &RL,
1470                              ArrayRef<llvm::Constant *> Data) {
1471   llvm::StructType *StructTy = RL.getLLVMType();
1472   unsigned PrevIdx = 0;
1473   ConstantInitBuilder CIBuilder(CGM);
1474   auto DI = Data.begin();
1475   for (const FieldDecl *FD : RD->fields()) {
1476     unsigned Idx = RL.getLLVMFieldNo(FD);
1477     // Fill the alignment.
1478     for (unsigned I = PrevIdx; I < Idx; ++I)
1479       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1480     PrevIdx = Idx + 1;
1481     Fields.add(*DI);
1482     ++DI;
1483   }
1484 }
1485 
1486 template <class... As>
1487 static llvm::GlobalVariable *
1488 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1489                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1490                    As &&... Args) {
1491   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1492   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1493   ConstantInitBuilder CIBuilder(CGM);
1494   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1495   buildStructValue(Fields, CGM, RD, RL, Data);
1496   return Fields.finishAndCreateGlobal(
1497       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1498       std::forward<As>(Args)...);
1499 }
1500 
1501 template <typename T>
1502 static void
1503 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1504                                          ArrayRef<llvm::Constant *> Data,
1505                                          T &Parent) {
1506   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1507   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1508   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1509   buildStructValue(Fields, CGM, RD, RL, Data);
1510   Fields.finishAndAddTo(Parent);
1511 }
1512 
1513 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1514   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1515   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1516   FlagsTy FlagsKey(Flags, Reserved2Flags);
1517   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1518   if (!Entry) {
1519     if (!DefaultOpenMPPSource) {
1520       // Initialize default location for psource field of ident_t structure of
1521       // all ident_t objects. Format is ";file;function;line;column;;".
1522       // Taken from
1523       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1524       DefaultOpenMPPSource =
1525           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1526       DefaultOpenMPPSource =
1527           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1528     }
1529 
1530     llvm::Constant *Data[] = {
1531         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1532         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1533         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1534         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1535     llvm::GlobalValue *DefaultOpenMPLocation =
1536         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1537                            llvm::GlobalValue::PrivateLinkage);
1538     DefaultOpenMPLocation->setUnnamedAddr(
1539         llvm::GlobalValue::UnnamedAddr::Global);
1540 
1541     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1542   }
1543   return Address(Entry, Align);
1544 }
1545 
1546 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1547                                              bool AtCurrentPoint) {
1548   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1549   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1550 
1551   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1552   if (AtCurrentPoint) {
1553     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1554         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1555   } else {
1556     Elem.second.ServiceInsertPt =
1557         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1558     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1559   }
1560 }
1561 
1562 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1563   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1564   if (Elem.second.ServiceInsertPt) {
1565     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1566     Elem.second.ServiceInsertPt = nullptr;
1567     Ptr->eraseFromParent();
1568   }
1569 }
1570 
1571 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1572                                                  SourceLocation Loc,
1573                                                  unsigned Flags) {
1574   Flags |= OMP_IDENT_KMPC;
1575   // If no debug info is generated - return global default location.
1576   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1577       Loc.isInvalid())
1578     return getOrCreateDefaultLocation(Flags).getPointer();
1579 
1580   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1581 
1582   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1583   Address LocValue = Address::invalid();
1584   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1585   if (I != OpenMPLocThreadIDMap.end())
1586     LocValue = Address(I->second.DebugLoc, Align);
1587 
1588   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1589   // GetOpenMPThreadID was called before this routine.
1590   if (!LocValue.isValid()) {
1591     // Generate "ident_t .kmpc_loc.addr;"
1592     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1593     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1594     Elem.second.DebugLoc = AI.getPointer();
1595     LocValue = AI;
1596 
1597     if (!Elem.second.ServiceInsertPt)
1598       setLocThreadIdInsertPt(CGF);
1599     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1600     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1601     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1602                              CGF.getTypeSize(IdentQTy));
1603   }
1604 
1605   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1606   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1607   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1608   LValue PSource =
1609       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1610 
1611   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1612   if (OMPDebugLoc == nullptr) {
1613     SmallString<128> Buffer2;
1614     llvm::raw_svector_ostream OS2(Buffer2);
1615     // Build debug location
1616     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1617     OS2 << ";" << PLoc.getFilename() << ";";
1618     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1619       OS2 << FD->getQualifiedNameAsString();
1620     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1621     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1622     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1623   }
1624   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1625   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1626 
1627   // Our callers always pass this to a runtime function, so for
1628   // convenience, go ahead and return a naked pointer.
1629   return LocValue.getPointer();
1630 }
1631 
1632 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1633                                           SourceLocation Loc) {
1634   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1635 
1636   llvm::Value *ThreadID = nullptr;
1637   // Check whether we've already cached a load of the thread id in this
1638   // function.
1639   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1640   if (I != OpenMPLocThreadIDMap.end()) {
1641     ThreadID = I->second.ThreadID;
1642     if (ThreadID != nullptr)
1643       return ThreadID;
1644   }
1645   // If exceptions are enabled, do not use parameter to avoid possible crash.
1646   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1647       !CGF.getLangOpts().CXXExceptions ||
1648       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1649     if (auto *OMPRegionInfo =
1650             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1651       if (OMPRegionInfo->getThreadIDVariable()) {
1652         // Check if this an outlined function with thread id passed as argument.
1653         LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1654         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1655         // If value loaded in entry block, cache it and use it everywhere in
1656         // function.
1657         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1658           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1659           Elem.second.ThreadID = ThreadID;
1660         }
1661         return ThreadID;
1662       }
1663     }
1664   }
1665 
1666   // This is not an outlined function region - need to call __kmpc_int32
1667   // kmpc_global_thread_num(ident_t *loc).
1668   // Generate thread id value and cache this value for use across the
1669   // function.
1670   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1671   if (!Elem.second.ServiceInsertPt)
1672     setLocThreadIdInsertPt(CGF);
1673   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1674   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1675   llvm::CallInst *Call = CGF.Builder.CreateCall(
1676       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1677       emitUpdateLocation(CGF, Loc));
1678   Call->setCallingConv(CGF.getRuntimeCC());
1679   Elem.second.ThreadID = Call;
1680   return Call;
1681 }
1682 
1683 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1684   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1685   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1686     clearLocThreadIdInsertPt(CGF);
1687     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1688   }
1689   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1690     for(auto *D : FunctionUDRMap[CGF.CurFn])
1691       UDRMap.erase(D);
1692     FunctionUDRMap.erase(CGF.CurFn);
1693   }
1694   auto I = FunctionUDMMap.find(CGF.CurFn);
1695   if (I != FunctionUDMMap.end()) {
1696     for(auto *D : I->second)
1697       UDMMap.erase(D);
1698     FunctionUDMMap.erase(I);
1699   }
1700 }
1701 
1702 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1703   return IdentTy->getPointerTo();
1704 }
1705 
1706 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1707   if (!Kmpc_MicroTy) {
1708     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1709     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1710                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1711     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1712   }
1713   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1714 }
1715 
1716 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1717   llvm::FunctionCallee RTLFn = nullptr;
1718   switch (static_cast<OpenMPRTLFunction>(Function)) {
1719   case OMPRTL__kmpc_fork_call: {
1720     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1721     // microtask, ...);
1722     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1723                                 getKmpc_MicroPointerTy()};
1724     auto *FnTy =
1725         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1726     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1727     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1728       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1729         llvm::LLVMContext &Ctx = F->getContext();
1730         llvm::MDBuilder MDB(Ctx);
1731         // Annotate the callback behavior of the __kmpc_fork_call:
1732         //  - The callback callee is argument number 2 (microtask).
1733         //  - The first two arguments of the callback callee are unknown (-1).
1734         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1735         //    callback callee.
1736         F->addMetadata(
1737             llvm::LLVMContext::MD_callback,
1738             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1739                                         2, {-1, -1},
1740                                         /* VarArgsArePassed */ true)}));
1741       }
1742     }
1743     break;
1744   }
1745   case OMPRTL__kmpc_global_thread_num: {
1746     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1747     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1748     auto *FnTy =
1749         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1750     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1751     break;
1752   }
1753   case OMPRTL__kmpc_threadprivate_cached: {
1754     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1755     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1756     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1757                                 CGM.VoidPtrTy, CGM.SizeTy,
1758                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1759     auto *FnTy =
1760         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1761     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1762     break;
1763   }
1764   case OMPRTL__kmpc_critical: {
1765     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1766     // kmp_critical_name *crit);
1767     llvm::Type *TypeParams[] = {
1768         getIdentTyPointerTy(), CGM.Int32Ty,
1769         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1770     auto *FnTy =
1771         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1772     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1773     break;
1774   }
1775   case OMPRTL__kmpc_critical_with_hint: {
1776     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1777     // kmp_critical_name *crit, uintptr_t hint);
1778     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1779                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1780                                 CGM.IntPtrTy};
1781     auto *FnTy =
1782         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1783     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1784     break;
1785   }
1786   case OMPRTL__kmpc_threadprivate_register: {
1787     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1788     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1789     // typedef void *(*kmpc_ctor)(void *);
1790     auto *KmpcCtorTy =
1791         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1792                                 /*isVarArg*/ false)->getPointerTo();
1793     // typedef void *(*kmpc_cctor)(void *, void *);
1794     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1795     auto *KmpcCopyCtorTy =
1796         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1797                                 /*isVarArg*/ false)
1798             ->getPointerTo();
1799     // typedef void (*kmpc_dtor)(void *);
1800     auto *KmpcDtorTy =
1801         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1802             ->getPointerTo();
1803     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1804                               KmpcCopyCtorTy, KmpcDtorTy};
1805     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1806                                         /*isVarArg*/ false);
1807     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1808     break;
1809   }
1810   case OMPRTL__kmpc_end_critical: {
1811     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1812     // kmp_critical_name *crit);
1813     llvm::Type *TypeParams[] = {
1814         getIdentTyPointerTy(), CGM.Int32Ty,
1815         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1816     auto *FnTy =
1817         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1818     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1819     break;
1820   }
1821   case OMPRTL__kmpc_cancel_barrier: {
1822     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1823     // global_tid);
1824     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1825     auto *FnTy =
1826         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1827     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1828     break;
1829   }
1830   case OMPRTL__kmpc_barrier: {
1831     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1832     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1833     auto *FnTy =
1834         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1835     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1836     break;
1837   }
1838   case OMPRTL__kmpc_for_static_fini: {
1839     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1840     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1841     auto *FnTy =
1842         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1843     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1844     break;
1845   }
1846   case OMPRTL__kmpc_push_num_threads: {
1847     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1848     // kmp_int32 num_threads)
1849     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1850                                 CGM.Int32Ty};
1851     auto *FnTy =
1852         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1853     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1854     break;
1855   }
1856   case OMPRTL__kmpc_serialized_parallel: {
1857     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1858     // global_tid);
1859     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1860     auto *FnTy =
1861         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1862     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1863     break;
1864   }
1865   case OMPRTL__kmpc_end_serialized_parallel: {
1866     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1867     // global_tid);
1868     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1869     auto *FnTy =
1870         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1871     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1872     break;
1873   }
1874   case OMPRTL__kmpc_flush: {
1875     // Build void __kmpc_flush(ident_t *loc);
1876     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1877     auto *FnTy =
1878         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1879     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1880     break;
1881   }
1882   case OMPRTL__kmpc_master: {
1883     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1884     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1885     auto *FnTy =
1886         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1887     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1888     break;
1889   }
1890   case OMPRTL__kmpc_end_master: {
1891     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1892     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1893     auto *FnTy =
1894         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1895     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1896     break;
1897   }
1898   case OMPRTL__kmpc_omp_taskyield: {
1899     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1900     // int end_part);
1901     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1902     auto *FnTy =
1903         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1904     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1905     break;
1906   }
1907   case OMPRTL__kmpc_single: {
1908     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1909     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1910     auto *FnTy =
1911         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1912     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1913     break;
1914   }
1915   case OMPRTL__kmpc_end_single: {
1916     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1917     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1918     auto *FnTy =
1919         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1920     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1921     break;
1922   }
1923   case OMPRTL__kmpc_omp_task_alloc: {
1924     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1925     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1926     // kmp_routine_entry_t *task_entry);
1927     assert(KmpRoutineEntryPtrTy != nullptr &&
1928            "Type kmp_routine_entry_t must be created.");
1929     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1930                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1931     // Return void * and then cast to particular kmp_task_t type.
1932     auto *FnTy =
1933         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1934     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1935     break;
1936   }
1937   case OMPRTL__kmpc_omp_target_task_alloc: {
1938     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
1939     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1940     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
1941     assert(KmpRoutineEntryPtrTy != nullptr &&
1942            "Type kmp_routine_entry_t must be created.");
1943     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1944                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
1945                                 CGM.Int64Ty};
1946     // Return void * and then cast to particular kmp_task_t type.
1947     auto *FnTy =
1948         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1949     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
1950     break;
1951   }
1952   case OMPRTL__kmpc_omp_task: {
1953     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1954     // *new_task);
1955     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1956                                 CGM.VoidPtrTy};
1957     auto *FnTy =
1958         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1959     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1960     break;
1961   }
1962   case OMPRTL__kmpc_copyprivate: {
1963     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1964     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1965     // kmp_int32 didit);
1966     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1967     auto *CpyFnTy =
1968         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1969     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1970                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1971                                 CGM.Int32Ty};
1972     auto *FnTy =
1973         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1974     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1975     break;
1976   }
1977   case OMPRTL__kmpc_reduce: {
1978     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1979     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1980     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1981     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1982     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1983                                                /*isVarArg=*/false);
1984     llvm::Type *TypeParams[] = {
1985         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1986         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1987         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1988     auto *FnTy =
1989         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1990     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1991     break;
1992   }
1993   case OMPRTL__kmpc_reduce_nowait: {
1994     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1995     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1996     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1997     // *lck);
1998     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1999     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2000                                                /*isVarArg=*/false);
2001     llvm::Type *TypeParams[] = {
2002         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2003         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2004         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2005     auto *FnTy =
2006         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2007     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2008     break;
2009   }
2010   case OMPRTL__kmpc_end_reduce: {
2011     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2012     // kmp_critical_name *lck);
2013     llvm::Type *TypeParams[] = {
2014         getIdentTyPointerTy(), CGM.Int32Ty,
2015         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2016     auto *FnTy =
2017         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2018     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2019     break;
2020   }
2021   case OMPRTL__kmpc_end_reduce_nowait: {
2022     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2023     // kmp_critical_name *lck);
2024     llvm::Type *TypeParams[] = {
2025         getIdentTyPointerTy(), CGM.Int32Ty,
2026         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2027     auto *FnTy =
2028         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2029     RTLFn =
2030         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2031     break;
2032   }
2033   case OMPRTL__kmpc_omp_task_begin_if0: {
2034     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2035     // *new_task);
2036     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2037                                 CGM.VoidPtrTy};
2038     auto *FnTy =
2039         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2040     RTLFn =
2041         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2042     break;
2043   }
2044   case OMPRTL__kmpc_omp_task_complete_if0: {
2045     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2046     // *new_task);
2047     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2048                                 CGM.VoidPtrTy};
2049     auto *FnTy =
2050         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2051     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2052                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2053     break;
2054   }
2055   case OMPRTL__kmpc_ordered: {
2056     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2057     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2058     auto *FnTy =
2059         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2060     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2061     break;
2062   }
2063   case OMPRTL__kmpc_end_ordered: {
2064     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2065     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2066     auto *FnTy =
2067         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2068     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2069     break;
2070   }
2071   case OMPRTL__kmpc_omp_taskwait: {
2072     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2073     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2074     auto *FnTy =
2075         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2076     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2077     break;
2078   }
2079   case OMPRTL__kmpc_taskgroup: {
2080     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2081     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2082     auto *FnTy =
2083         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2084     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2085     break;
2086   }
2087   case OMPRTL__kmpc_end_taskgroup: {
2088     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2089     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2090     auto *FnTy =
2091         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2092     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2093     break;
2094   }
2095   case OMPRTL__kmpc_push_proc_bind: {
2096     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2097     // int proc_bind)
2098     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2099     auto *FnTy =
2100         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2101     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2102     break;
2103   }
2104   case OMPRTL__kmpc_omp_task_with_deps: {
2105     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2106     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2107     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2108     llvm::Type *TypeParams[] = {
2109         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2110         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2111     auto *FnTy =
2112         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2113     RTLFn =
2114         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2115     break;
2116   }
2117   case OMPRTL__kmpc_omp_wait_deps: {
2118     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2119     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2120     // kmp_depend_info_t *noalias_dep_list);
2121     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2122                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2123                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2124     auto *FnTy =
2125         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2126     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2127     break;
2128   }
2129   case OMPRTL__kmpc_cancellationpoint: {
2130     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2131     // global_tid, kmp_int32 cncl_kind)
2132     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2133     auto *FnTy =
2134         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2135     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2136     break;
2137   }
2138   case OMPRTL__kmpc_cancel: {
2139     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2140     // kmp_int32 cncl_kind)
2141     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2142     auto *FnTy =
2143         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2144     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2145     break;
2146   }
2147   case OMPRTL__kmpc_push_num_teams: {
2148     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2149     // kmp_int32 num_teams, kmp_int32 num_threads)
2150     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2151         CGM.Int32Ty};
2152     auto *FnTy =
2153         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2154     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2155     break;
2156   }
2157   case OMPRTL__kmpc_fork_teams: {
2158     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2159     // microtask, ...);
2160     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2161                                 getKmpc_MicroPointerTy()};
2162     auto *FnTy =
2163         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2164     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2165     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2166       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2167         llvm::LLVMContext &Ctx = F->getContext();
2168         llvm::MDBuilder MDB(Ctx);
2169         // Annotate the callback behavior of the __kmpc_fork_teams:
2170         //  - The callback callee is argument number 2 (microtask).
2171         //  - The first two arguments of the callback callee are unknown (-1).
2172         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2173         //    callback callee.
2174         F->addMetadata(
2175             llvm::LLVMContext::MD_callback,
2176             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2177                                         2, {-1, -1},
2178                                         /* VarArgsArePassed */ true)}));
2179       }
2180     }
2181     break;
2182   }
2183   case OMPRTL__kmpc_taskloop: {
2184     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2185     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2186     // sched, kmp_uint64 grainsize, void *task_dup);
2187     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2188                                 CGM.IntTy,
2189                                 CGM.VoidPtrTy,
2190                                 CGM.IntTy,
2191                                 CGM.Int64Ty->getPointerTo(),
2192                                 CGM.Int64Ty->getPointerTo(),
2193                                 CGM.Int64Ty,
2194                                 CGM.IntTy,
2195                                 CGM.IntTy,
2196                                 CGM.Int64Ty,
2197                                 CGM.VoidPtrTy};
2198     auto *FnTy =
2199         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2200     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2201     break;
2202   }
2203   case OMPRTL__kmpc_doacross_init: {
2204     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2205     // num_dims, struct kmp_dim *dims);
2206     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2207                                 CGM.Int32Ty,
2208                                 CGM.Int32Ty,
2209                                 CGM.VoidPtrTy};
2210     auto *FnTy =
2211         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2212     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2213     break;
2214   }
2215   case OMPRTL__kmpc_doacross_fini: {
2216     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2217     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2218     auto *FnTy =
2219         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2220     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2221     break;
2222   }
2223   case OMPRTL__kmpc_doacross_post: {
2224     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2225     // *vec);
2226     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2227                                 CGM.Int64Ty->getPointerTo()};
2228     auto *FnTy =
2229         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2230     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2231     break;
2232   }
2233   case OMPRTL__kmpc_doacross_wait: {
2234     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2235     // *vec);
2236     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2237                                 CGM.Int64Ty->getPointerTo()};
2238     auto *FnTy =
2239         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2240     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2241     break;
2242   }
2243   case OMPRTL__kmpc_task_reduction_init: {
2244     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2245     // *data);
2246     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2247     auto *FnTy =
2248         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2249     RTLFn =
2250         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2251     break;
2252   }
2253   case OMPRTL__kmpc_task_reduction_get_th_data: {
2254     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2255     // *d);
2256     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2257     auto *FnTy =
2258         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2259     RTLFn = CGM.CreateRuntimeFunction(
2260         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2261     break;
2262   }
2263   case OMPRTL__kmpc_alloc: {
2264     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2265     // al); omp_allocator_handle_t type is void *.
2266     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2267     auto *FnTy =
2268         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2269     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2270     break;
2271   }
2272   case OMPRTL__kmpc_free: {
2273     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2274     // al); omp_allocator_handle_t type is void *.
2275     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2276     auto *FnTy =
2277         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2278     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2279     break;
2280   }
2281   case OMPRTL__kmpc_push_target_tripcount: {
2282     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2283     // size);
2284     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2285     llvm::FunctionType *FnTy =
2286         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2287     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2288     break;
2289   }
2290   case OMPRTL__tgt_target: {
2291     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2292     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2293     // *arg_types);
2294     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2295                                 CGM.VoidPtrTy,
2296                                 CGM.Int32Ty,
2297                                 CGM.VoidPtrPtrTy,
2298                                 CGM.VoidPtrPtrTy,
2299                                 CGM.Int64Ty->getPointerTo(),
2300                                 CGM.Int64Ty->getPointerTo()};
2301     auto *FnTy =
2302         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2303     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2304     break;
2305   }
2306   case OMPRTL__tgt_target_nowait: {
2307     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2308     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2309     // int64_t *arg_types);
2310     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2311                                 CGM.VoidPtrTy,
2312                                 CGM.Int32Ty,
2313                                 CGM.VoidPtrPtrTy,
2314                                 CGM.VoidPtrPtrTy,
2315                                 CGM.Int64Ty->getPointerTo(),
2316                                 CGM.Int64Ty->getPointerTo()};
2317     auto *FnTy =
2318         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2319     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2320     break;
2321   }
2322   case OMPRTL__tgt_target_teams: {
2323     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2324     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2325     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2326     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2327                                 CGM.VoidPtrTy,
2328                                 CGM.Int32Ty,
2329                                 CGM.VoidPtrPtrTy,
2330                                 CGM.VoidPtrPtrTy,
2331                                 CGM.Int64Ty->getPointerTo(),
2332                                 CGM.Int64Ty->getPointerTo(),
2333                                 CGM.Int32Ty,
2334                                 CGM.Int32Ty};
2335     auto *FnTy =
2336         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2337     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2338     break;
2339   }
2340   case OMPRTL__tgt_target_teams_nowait: {
2341     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2342     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2343     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2344     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2345                                 CGM.VoidPtrTy,
2346                                 CGM.Int32Ty,
2347                                 CGM.VoidPtrPtrTy,
2348                                 CGM.VoidPtrPtrTy,
2349                                 CGM.Int64Ty->getPointerTo(),
2350                                 CGM.Int64Ty->getPointerTo(),
2351                                 CGM.Int32Ty,
2352                                 CGM.Int32Ty};
2353     auto *FnTy =
2354         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2355     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2356     break;
2357   }
2358   case OMPRTL__tgt_register_requires: {
2359     // Build void __tgt_register_requires(int64_t flags);
2360     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2361     auto *FnTy =
2362         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2363     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2364     break;
2365   }
2366   case OMPRTL__tgt_register_lib: {
2367     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2368     QualType ParamTy =
2369         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2370     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2371     auto *FnTy =
2372         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2373     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2374     break;
2375   }
2376   case OMPRTL__tgt_unregister_lib: {
2377     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2378     QualType ParamTy =
2379         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2380     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2381     auto *FnTy =
2382         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2383     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2384     break;
2385   }
2386   case OMPRTL__tgt_target_data_begin: {
2387     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2388     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2389     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2390                                 CGM.Int32Ty,
2391                                 CGM.VoidPtrPtrTy,
2392                                 CGM.VoidPtrPtrTy,
2393                                 CGM.Int64Ty->getPointerTo(),
2394                                 CGM.Int64Ty->getPointerTo()};
2395     auto *FnTy =
2396         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2397     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2398     break;
2399   }
2400   case OMPRTL__tgt_target_data_begin_nowait: {
2401     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2402     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2403     // *arg_types);
2404     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2405                                 CGM.Int32Ty,
2406                                 CGM.VoidPtrPtrTy,
2407                                 CGM.VoidPtrPtrTy,
2408                                 CGM.Int64Ty->getPointerTo(),
2409                                 CGM.Int64Ty->getPointerTo()};
2410     auto *FnTy =
2411         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2412     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2413     break;
2414   }
2415   case OMPRTL__tgt_target_data_end: {
2416     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2417     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2418     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2419                                 CGM.Int32Ty,
2420                                 CGM.VoidPtrPtrTy,
2421                                 CGM.VoidPtrPtrTy,
2422                                 CGM.Int64Ty->getPointerTo(),
2423                                 CGM.Int64Ty->getPointerTo()};
2424     auto *FnTy =
2425         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2426     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2427     break;
2428   }
2429   case OMPRTL__tgt_target_data_end_nowait: {
2430     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2431     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2432     // *arg_types);
2433     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2434                                 CGM.Int32Ty,
2435                                 CGM.VoidPtrPtrTy,
2436                                 CGM.VoidPtrPtrTy,
2437                                 CGM.Int64Ty->getPointerTo(),
2438                                 CGM.Int64Ty->getPointerTo()};
2439     auto *FnTy =
2440         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2441     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2442     break;
2443   }
2444   case OMPRTL__tgt_target_data_update: {
2445     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2446     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2447     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2448                                 CGM.Int32Ty,
2449                                 CGM.VoidPtrPtrTy,
2450                                 CGM.VoidPtrPtrTy,
2451                                 CGM.Int64Ty->getPointerTo(),
2452                                 CGM.Int64Ty->getPointerTo()};
2453     auto *FnTy =
2454         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2455     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2456     break;
2457   }
2458   case OMPRTL__tgt_target_data_update_nowait: {
2459     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2460     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2461     // *arg_types);
2462     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2463                                 CGM.Int32Ty,
2464                                 CGM.VoidPtrPtrTy,
2465                                 CGM.VoidPtrPtrTy,
2466                                 CGM.Int64Ty->getPointerTo(),
2467                                 CGM.Int64Ty->getPointerTo()};
2468     auto *FnTy =
2469         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2470     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2471     break;
2472   }
2473   case OMPRTL__tgt_mapper_num_components: {
2474     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2475     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2476     auto *FnTy =
2477         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2478     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2479     break;
2480   }
2481   case OMPRTL__tgt_push_mapper_component: {
2482     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2483     // *base, void *begin, int64_t size, int64_t type);
2484     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2485                                 CGM.Int64Ty, CGM.Int64Ty};
2486     auto *FnTy =
2487         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2488     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2489     break;
2490   }
2491   }
2492   assert(RTLFn && "Unable to find OpenMP runtime function");
2493   return RTLFn;
2494 }
2495 
2496 llvm::FunctionCallee
2497 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2498   assert((IVSize == 32 || IVSize == 64) &&
2499          "IV size is not compatible with the omp runtime");
2500   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2501                                             : "__kmpc_for_static_init_4u")
2502                                 : (IVSigned ? "__kmpc_for_static_init_8"
2503                                             : "__kmpc_for_static_init_8u");
2504   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2505   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2506   llvm::Type *TypeParams[] = {
2507     getIdentTyPointerTy(),                     // loc
2508     CGM.Int32Ty,                               // tid
2509     CGM.Int32Ty,                               // schedtype
2510     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2511     PtrTy,                                     // p_lower
2512     PtrTy,                                     // p_upper
2513     PtrTy,                                     // p_stride
2514     ITy,                                       // incr
2515     ITy                                        // chunk
2516   };
2517   auto *FnTy =
2518       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2519   return CGM.CreateRuntimeFunction(FnTy, Name);
2520 }
2521 
2522 llvm::FunctionCallee
2523 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2524   assert((IVSize == 32 || IVSize == 64) &&
2525          "IV size is not compatible with the omp runtime");
2526   StringRef Name =
2527       IVSize == 32
2528           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2529           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2530   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2531   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2532                                CGM.Int32Ty,           // tid
2533                                CGM.Int32Ty,           // schedtype
2534                                ITy,                   // lower
2535                                ITy,                   // upper
2536                                ITy,                   // stride
2537                                ITy                    // chunk
2538   };
2539   auto *FnTy =
2540       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2541   return CGM.CreateRuntimeFunction(FnTy, Name);
2542 }
2543 
2544 llvm::FunctionCallee
2545 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2546   assert((IVSize == 32 || IVSize == 64) &&
2547          "IV size is not compatible with the omp runtime");
2548   StringRef Name =
2549       IVSize == 32
2550           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2551           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2552   llvm::Type *TypeParams[] = {
2553       getIdentTyPointerTy(), // loc
2554       CGM.Int32Ty,           // tid
2555   };
2556   auto *FnTy =
2557       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2558   return CGM.CreateRuntimeFunction(FnTy, Name);
2559 }
2560 
2561 llvm::FunctionCallee
2562 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2563   assert((IVSize == 32 || IVSize == 64) &&
2564          "IV size is not compatible with the omp runtime");
2565   StringRef Name =
2566       IVSize == 32
2567           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2568           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2569   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2570   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2571   llvm::Type *TypeParams[] = {
2572     getIdentTyPointerTy(),                     // loc
2573     CGM.Int32Ty,                               // tid
2574     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2575     PtrTy,                                     // p_lower
2576     PtrTy,                                     // p_upper
2577     PtrTy                                      // p_stride
2578   };
2579   auto *FnTy =
2580       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2581   return CGM.CreateRuntimeFunction(FnTy, Name);
2582 }
2583 
2584 /// Obtain information that uniquely identifies a target entry. This
2585 /// consists of the file and device IDs as well as line number associated with
2586 /// the relevant entry source location.
2587 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2588                                      unsigned &DeviceID, unsigned &FileID,
2589                                      unsigned &LineNum) {
2590   SourceManager &SM = C.getSourceManager();
2591 
2592   // The loc should be always valid and have a file ID (the user cannot use
2593   // #pragma directives in macros)
2594 
2595   assert(Loc.isValid() && "Source location is expected to be always valid.");
2596 
2597   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2598   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2599 
2600   llvm::sys::fs::UniqueID ID;
2601   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2602     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2603         << PLoc.getFilename() << EC.message();
2604 
2605   DeviceID = ID.getDevice();
2606   FileID = ID.getFile();
2607   LineNum = PLoc.getLine();
2608 }
2609 
2610 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2611   if (CGM.getLangOpts().OpenMPSimd)
2612     return Address::invalid();
2613   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2614       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2615   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2616               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2617                HasRequiresUnifiedSharedMemory))) {
2618     SmallString<64> PtrName;
2619     {
2620       llvm::raw_svector_ostream OS(PtrName);
2621       OS << CGM.getMangledName(GlobalDecl(VD));
2622       if (!VD->isExternallyVisible()) {
2623         unsigned DeviceID, FileID, Line;
2624         getTargetEntryUniqueInfo(CGM.getContext(),
2625                                  VD->getCanonicalDecl()->getBeginLoc(),
2626                                  DeviceID, FileID, Line);
2627         OS << llvm::format("_%x", FileID);
2628       }
2629       OS << "_decl_tgt_ref_ptr";
2630     }
2631     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2632     if (!Ptr) {
2633       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2634       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2635                                         PtrName);
2636 
2637       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2638       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2639 
2640       if (!CGM.getLangOpts().OpenMPIsDevice)
2641         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2642       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2643     }
2644     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2645   }
2646   return Address::invalid();
2647 }
2648 
2649 llvm::Constant *
2650 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2651   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2652          !CGM.getContext().getTargetInfo().isTLSSupported());
2653   // Lookup the entry, lazily creating it if necessary.
2654   std::string Suffix = getName({"cache", ""});
2655   return getOrCreateInternalVariable(
2656       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2657 }
2658 
2659 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2660                                                 const VarDecl *VD,
2661                                                 Address VDAddr,
2662                                                 SourceLocation Loc) {
2663   if (CGM.getLangOpts().OpenMPUseTLS &&
2664       CGM.getContext().getTargetInfo().isTLSSupported())
2665     return VDAddr;
2666 
2667   llvm::Type *VarTy = VDAddr.getElementType();
2668   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2669                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2670                                                        CGM.Int8PtrTy),
2671                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2672                          getOrCreateThreadPrivateCache(VD)};
2673   return Address(CGF.EmitRuntimeCall(
2674       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2675                  VDAddr.getAlignment());
2676 }
2677 
2678 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2679     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2680     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2681   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2682   // library.
2683   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2684   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2685                       OMPLoc);
2686   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2687   // to register constructor/destructor for variable.
2688   llvm::Value *Args[] = {
2689       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2690       Ctor, CopyCtor, Dtor};
2691   CGF.EmitRuntimeCall(
2692       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2693 }
2694 
2695 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2696     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2697     bool PerformInit, CodeGenFunction *CGF) {
2698   if (CGM.getLangOpts().OpenMPUseTLS &&
2699       CGM.getContext().getTargetInfo().isTLSSupported())
2700     return nullptr;
2701 
2702   VD = VD->getDefinition(CGM.getContext());
2703   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2704     QualType ASTTy = VD->getType();
2705 
2706     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2707     const Expr *Init = VD->getAnyInitializer();
2708     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2709       // Generate function that re-emits the declaration's initializer into the
2710       // threadprivate copy of the variable VD
2711       CodeGenFunction CtorCGF(CGM);
2712       FunctionArgList Args;
2713       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2714                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2715                             ImplicitParamDecl::Other);
2716       Args.push_back(&Dst);
2717 
2718       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2719           CGM.getContext().VoidPtrTy, Args);
2720       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2721       std::string Name = getName({"__kmpc_global_ctor_", ""});
2722       llvm::Function *Fn =
2723           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2724       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2725                             Args, Loc, Loc);
2726       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2727           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2728           CGM.getContext().VoidPtrTy, Dst.getLocation());
2729       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2730       Arg = CtorCGF.Builder.CreateElementBitCast(
2731           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2732       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2733                                /*IsInitializer=*/true);
2734       ArgVal = CtorCGF.EmitLoadOfScalar(
2735           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2736           CGM.getContext().VoidPtrTy, Dst.getLocation());
2737       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2738       CtorCGF.FinishFunction();
2739       Ctor = Fn;
2740     }
2741     if (VD->getType().isDestructedType() != QualType::DK_none) {
2742       // Generate function that emits destructor call for the threadprivate copy
2743       // of the variable VD
2744       CodeGenFunction DtorCGF(CGM);
2745       FunctionArgList Args;
2746       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2747                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2748                             ImplicitParamDecl::Other);
2749       Args.push_back(&Dst);
2750 
2751       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2752           CGM.getContext().VoidTy, Args);
2753       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2754       std::string Name = getName({"__kmpc_global_dtor_", ""});
2755       llvm::Function *Fn =
2756           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2757       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2758       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2759                             Loc, Loc);
2760       // Create a scope with an artificial location for the body of this function.
2761       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2762       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2763           DtorCGF.GetAddrOfLocalVar(&Dst),
2764           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2765       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2766                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2767                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2768       DtorCGF.FinishFunction();
2769       Dtor = Fn;
2770     }
2771     // Do not emit init function if it is not required.
2772     if (!Ctor && !Dtor)
2773       return nullptr;
2774 
2775     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2776     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2777                                                /*isVarArg=*/false)
2778                            ->getPointerTo();
2779     // Copying constructor for the threadprivate variable.
2780     // Must be NULL - reserved by runtime, but currently it requires that this
2781     // parameter is always NULL. Otherwise it fires assertion.
2782     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2783     if (Ctor == nullptr) {
2784       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2785                                              /*isVarArg=*/false)
2786                          ->getPointerTo();
2787       Ctor = llvm::Constant::getNullValue(CtorTy);
2788     }
2789     if (Dtor == nullptr) {
2790       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2791                                              /*isVarArg=*/false)
2792                          ->getPointerTo();
2793       Dtor = llvm::Constant::getNullValue(DtorTy);
2794     }
2795     if (!CGF) {
2796       auto *InitFunctionTy =
2797           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2798       std::string Name = getName({"__omp_threadprivate_init_", ""});
2799       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2800           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2801       CodeGenFunction InitCGF(CGM);
2802       FunctionArgList ArgList;
2803       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2804                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2805                             Loc, Loc);
2806       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2807       InitCGF.FinishFunction();
2808       return InitFunction;
2809     }
2810     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2811   }
2812   return nullptr;
2813 }
2814 
2815 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2816                                                      llvm::GlobalVariable *Addr,
2817                                                      bool PerformInit) {
2818   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2819       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2820   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2821       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2822        HasRequiresUnifiedSharedMemory))
2823     return CGM.getLangOpts().OpenMPIsDevice;
2824   VD = VD->getDefinition(CGM.getContext());
2825   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2826     return CGM.getLangOpts().OpenMPIsDevice;
2827 
2828   QualType ASTTy = VD->getType();
2829 
2830   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2831   // Produce the unique prefix to identify the new target regions. We use
2832   // the source location of the variable declaration which we know to not
2833   // conflict with any target region.
2834   unsigned DeviceID;
2835   unsigned FileID;
2836   unsigned Line;
2837   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2838   SmallString<128> Buffer, Out;
2839   {
2840     llvm::raw_svector_ostream OS(Buffer);
2841     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2842        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2843   }
2844 
2845   const Expr *Init = VD->getAnyInitializer();
2846   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2847     llvm::Constant *Ctor;
2848     llvm::Constant *ID;
2849     if (CGM.getLangOpts().OpenMPIsDevice) {
2850       // Generate function that re-emits the declaration's initializer into
2851       // the threadprivate copy of the variable VD
2852       CodeGenFunction CtorCGF(CGM);
2853 
2854       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2855       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2856       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2857           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2858       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2859       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2860                             FunctionArgList(), Loc, Loc);
2861       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2862       CtorCGF.EmitAnyExprToMem(Init,
2863                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2864                                Init->getType().getQualifiers(),
2865                                /*IsInitializer=*/true);
2866       CtorCGF.FinishFunction();
2867       Ctor = Fn;
2868       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2869       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2870     } else {
2871       Ctor = new llvm::GlobalVariable(
2872           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2873           llvm::GlobalValue::PrivateLinkage,
2874           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2875       ID = Ctor;
2876     }
2877 
2878     // Register the information for the entry associated with the constructor.
2879     Out.clear();
2880     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2881         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2882         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2883   }
2884   if (VD->getType().isDestructedType() != QualType::DK_none) {
2885     llvm::Constant *Dtor;
2886     llvm::Constant *ID;
2887     if (CGM.getLangOpts().OpenMPIsDevice) {
2888       // Generate function that emits destructor call for the threadprivate
2889       // copy of the variable VD
2890       CodeGenFunction DtorCGF(CGM);
2891 
2892       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2893       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2894       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2895           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2896       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2897       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2898                             FunctionArgList(), Loc, Loc);
2899       // Create a scope with an artificial location for the body of this
2900       // function.
2901       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2902       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2903                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2904                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2905       DtorCGF.FinishFunction();
2906       Dtor = Fn;
2907       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2908       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2909     } else {
2910       Dtor = new llvm::GlobalVariable(
2911           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2912           llvm::GlobalValue::PrivateLinkage,
2913           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2914       ID = Dtor;
2915     }
2916     // Register the information for the entry associated with the destructor.
2917     Out.clear();
2918     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2919         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2920         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2921   }
2922   return CGM.getLangOpts().OpenMPIsDevice;
2923 }
2924 
2925 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2926                                                           QualType VarType,
2927                                                           StringRef Name) {
2928   std::string Suffix = getName({"artificial", ""});
2929   std::string CacheSuffix = getName({"cache", ""});
2930   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2931   llvm::Value *GAddr =
2932       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2933   llvm::Value *Args[] = {
2934       emitUpdateLocation(CGF, SourceLocation()),
2935       getThreadID(CGF, SourceLocation()),
2936       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2937       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2938                                 /*isSigned=*/false),
2939       getOrCreateInternalVariable(
2940           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2941   return Address(
2942       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2943           CGF.EmitRuntimeCall(
2944               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2945           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2946       CGM.getPointerAlign());
2947 }
2948 
2949 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2950                                       const RegionCodeGenTy &ThenGen,
2951                                       const RegionCodeGenTy &ElseGen) {
2952   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2953 
2954   // If the condition constant folds and can be elided, try to avoid emitting
2955   // the condition and the dead arm of the if/else.
2956   bool CondConstant;
2957   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2958     if (CondConstant)
2959       ThenGen(CGF);
2960     else
2961       ElseGen(CGF);
2962     return;
2963   }
2964 
2965   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2966   // emit the conditional branch.
2967   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2968   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2969   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2970   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2971 
2972   // Emit the 'then' code.
2973   CGF.EmitBlock(ThenBlock);
2974   ThenGen(CGF);
2975   CGF.EmitBranch(ContBlock);
2976   // Emit the 'else' code if present.
2977   // There is no need to emit line number for unconditional branch.
2978   (void)ApplyDebugLocation::CreateEmpty(CGF);
2979   CGF.EmitBlock(ElseBlock);
2980   ElseGen(CGF);
2981   // There is no need to emit line number for unconditional branch.
2982   (void)ApplyDebugLocation::CreateEmpty(CGF);
2983   CGF.EmitBranch(ContBlock);
2984   // Emit the continuation block for code after the if.
2985   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2986 }
2987 
2988 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2989                                        llvm::Function *OutlinedFn,
2990                                        ArrayRef<llvm::Value *> CapturedVars,
2991                                        const Expr *IfCond) {
2992   if (!CGF.HaveInsertPoint())
2993     return;
2994   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2995   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2996                                                      PrePostActionTy &) {
2997     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2998     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2999     llvm::Value *Args[] = {
3000         RTLoc,
3001         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3002         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3003     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3004     RealArgs.append(std::begin(Args), std::end(Args));
3005     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3006 
3007     llvm::FunctionCallee RTLFn =
3008         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3009     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3010   };
3011   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3012                                                           PrePostActionTy &) {
3013     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3014     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3015     // Build calls:
3016     // __kmpc_serialized_parallel(&Loc, GTid);
3017     llvm::Value *Args[] = {RTLoc, ThreadID};
3018     CGF.EmitRuntimeCall(
3019         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3020 
3021     // OutlinedFn(&GTid, &zero, CapturedStruct);
3022     Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3023                                                         /*Name*/ ".zero.addr");
3024     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
3025     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3026     // ThreadId for serialized parallels is 0.
3027     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
3028     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
3029     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3030     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3031 
3032     // __kmpc_end_serialized_parallel(&Loc, GTid);
3033     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3034     CGF.EmitRuntimeCall(
3035         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3036         EndArgs);
3037   };
3038   if (IfCond) {
3039     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3040   } else {
3041     RegionCodeGenTy ThenRCG(ThenGen);
3042     ThenRCG(CGF);
3043   }
3044 }
3045 
3046 // If we're inside an (outlined) parallel region, use the region info's
3047 // thread-ID variable (it is passed in a first argument of the outlined function
3048 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3049 // regular serial code region, get thread ID by calling kmp_int32
3050 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3051 // return the address of that temp.
3052 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3053                                              SourceLocation Loc) {
3054   if (auto *OMPRegionInfo =
3055           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3056     if (OMPRegionInfo->getThreadIDVariable())
3057       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
3058 
3059   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3060   QualType Int32Ty =
3061       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3062   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3063   CGF.EmitStoreOfScalar(ThreadID,
3064                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3065 
3066   return ThreadIDTemp;
3067 }
3068 
3069 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3070     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3071   SmallString<256> Buffer;
3072   llvm::raw_svector_ostream Out(Buffer);
3073   Out << Name;
3074   StringRef RuntimeName = Out.str();
3075   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3076   if (Elem.second) {
3077     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3078            "OMP internal variable has different type than requested");
3079     return &*Elem.second;
3080   }
3081 
3082   return Elem.second = new llvm::GlobalVariable(
3083              CGM.getModule(), Ty, /*IsConstant*/ false,
3084              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3085              Elem.first(), /*InsertBefore=*/nullptr,
3086              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3087 }
3088 
3089 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3090   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3091   std::string Name = getName({Prefix, "var"});
3092   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3093 }
3094 
3095 namespace {
3096 /// Common pre(post)-action for different OpenMP constructs.
3097 class CommonActionTy final : public PrePostActionTy {
3098   llvm::FunctionCallee EnterCallee;
3099   ArrayRef<llvm::Value *> EnterArgs;
3100   llvm::FunctionCallee ExitCallee;
3101   ArrayRef<llvm::Value *> ExitArgs;
3102   bool Conditional;
3103   llvm::BasicBlock *ContBlock = nullptr;
3104 
3105 public:
3106   CommonActionTy(llvm::FunctionCallee EnterCallee,
3107                  ArrayRef<llvm::Value *> EnterArgs,
3108                  llvm::FunctionCallee ExitCallee,
3109                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3110       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3111         ExitArgs(ExitArgs), Conditional(Conditional) {}
3112   void Enter(CodeGenFunction &CGF) override {
3113     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3114     if (Conditional) {
3115       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3116       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3117       ContBlock = CGF.createBasicBlock("omp_if.end");
3118       // Generate the branch (If-stmt)
3119       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3120       CGF.EmitBlock(ThenBlock);
3121     }
3122   }
3123   void Done(CodeGenFunction &CGF) {
3124     // Emit the rest of blocks/branches
3125     CGF.EmitBranch(ContBlock);
3126     CGF.EmitBlock(ContBlock, true);
3127   }
3128   void Exit(CodeGenFunction &CGF) override {
3129     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3130   }
3131 };
3132 } // anonymous namespace
3133 
3134 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3135                                          StringRef CriticalName,
3136                                          const RegionCodeGenTy &CriticalOpGen,
3137                                          SourceLocation Loc, const Expr *Hint) {
3138   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3139   // CriticalOpGen();
3140   // __kmpc_end_critical(ident_t *, gtid, Lock);
3141   // Prepare arguments and build a call to __kmpc_critical
3142   if (!CGF.HaveInsertPoint())
3143     return;
3144   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3145                          getCriticalRegionLock(CriticalName)};
3146   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3147                                                 std::end(Args));
3148   if (Hint) {
3149     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3150         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3151   }
3152   CommonActionTy Action(
3153       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3154                                  : OMPRTL__kmpc_critical),
3155       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3156   CriticalOpGen.setAction(Action);
3157   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3158 }
3159 
3160 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3161                                        const RegionCodeGenTy &MasterOpGen,
3162                                        SourceLocation Loc) {
3163   if (!CGF.HaveInsertPoint())
3164     return;
3165   // if(__kmpc_master(ident_t *, gtid)) {
3166   //   MasterOpGen();
3167   //   __kmpc_end_master(ident_t *, gtid);
3168   // }
3169   // Prepare arguments and build a call to __kmpc_master
3170   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3171   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3172                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3173                         /*Conditional=*/true);
3174   MasterOpGen.setAction(Action);
3175   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3176   Action.Done(CGF);
3177 }
3178 
3179 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3180                                         SourceLocation Loc) {
3181   if (!CGF.HaveInsertPoint())
3182     return;
3183   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3184   llvm::Value *Args[] = {
3185       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3186       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3187   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3188   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3189     Region->emitUntiedSwitch(CGF);
3190 }
3191 
3192 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3193                                           const RegionCodeGenTy &TaskgroupOpGen,
3194                                           SourceLocation Loc) {
3195   if (!CGF.HaveInsertPoint())
3196     return;
3197   // __kmpc_taskgroup(ident_t *, gtid);
3198   // TaskgroupOpGen();
3199   // __kmpc_end_taskgroup(ident_t *, gtid);
3200   // Prepare arguments and build a call to __kmpc_taskgroup
3201   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3202   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3203                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3204                         Args);
3205   TaskgroupOpGen.setAction(Action);
3206   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3207 }
3208 
3209 /// Given an array of pointers to variables, project the address of a
3210 /// given variable.
3211 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3212                                       unsigned Index, const VarDecl *Var) {
3213   // Pull out the pointer to the variable.
3214   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3215   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3216 
3217   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3218   Addr = CGF.Builder.CreateElementBitCast(
3219       Addr, CGF.ConvertTypeForMem(Var->getType()));
3220   return Addr;
3221 }
3222 
3223 static llvm::Value *emitCopyprivateCopyFunction(
3224     CodeGenModule &CGM, llvm::Type *ArgsType,
3225     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3226     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3227     SourceLocation Loc) {
3228   ASTContext &C = CGM.getContext();
3229   // void copy_func(void *LHSArg, void *RHSArg);
3230   FunctionArgList Args;
3231   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3232                            ImplicitParamDecl::Other);
3233   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3234                            ImplicitParamDecl::Other);
3235   Args.push_back(&LHSArg);
3236   Args.push_back(&RHSArg);
3237   const auto &CGFI =
3238       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3239   std::string Name =
3240       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3241   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3242                                     llvm::GlobalValue::InternalLinkage, Name,
3243                                     &CGM.getModule());
3244   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3245   Fn->setDoesNotRecurse();
3246   CodeGenFunction CGF(CGM);
3247   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3248   // Dest = (void*[n])(LHSArg);
3249   // Src = (void*[n])(RHSArg);
3250   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3251       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3252       ArgsType), CGF.getPointerAlign());
3253   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3254       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3255       ArgsType), CGF.getPointerAlign());
3256   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3257   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3258   // ...
3259   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3260   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3261     const auto *DestVar =
3262         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3263     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3264 
3265     const auto *SrcVar =
3266         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3267     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3268 
3269     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3270     QualType Type = VD->getType();
3271     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3272   }
3273   CGF.FinishFunction();
3274   return Fn;
3275 }
3276 
3277 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3278                                        const RegionCodeGenTy &SingleOpGen,
3279                                        SourceLocation Loc,
3280                                        ArrayRef<const Expr *> CopyprivateVars,
3281                                        ArrayRef<const Expr *> SrcExprs,
3282                                        ArrayRef<const Expr *> DstExprs,
3283                                        ArrayRef<const Expr *> AssignmentOps) {
3284   if (!CGF.HaveInsertPoint())
3285     return;
3286   assert(CopyprivateVars.size() == SrcExprs.size() &&
3287          CopyprivateVars.size() == DstExprs.size() &&
3288          CopyprivateVars.size() == AssignmentOps.size());
3289   ASTContext &C = CGM.getContext();
3290   // int32 did_it = 0;
3291   // if(__kmpc_single(ident_t *, gtid)) {
3292   //   SingleOpGen();
3293   //   __kmpc_end_single(ident_t *, gtid);
3294   //   did_it = 1;
3295   // }
3296   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3297   // <copy_func>, did_it);
3298 
3299   Address DidIt = Address::invalid();
3300   if (!CopyprivateVars.empty()) {
3301     // int32 did_it = 0;
3302     QualType KmpInt32Ty =
3303         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3304     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3305     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3306   }
3307   // Prepare arguments and build a call to __kmpc_single
3308   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3309   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3310                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3311                         /*Conditional=*/true);
3312   SingleOpGen.setAction(Action);
3313   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3314   if (DidIt.isValid()) {
3315     // did_it = 1;
3316     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3317   }
3318   Action.Done(CGF);
3319   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3320   // <copy_func>, did_it);
3321   if (DidIt.isValid()) {
3322     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3323     QualType CopyprivateArrayTy =
3324         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3325                                /*IndexTypeQuals=*/0);
3326     // Create a list of all private variables for copyprivate.
3327     Address CopyprivateList =
3328         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3329     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3330       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3331       CGF.Builder.CreateStore(
3332           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3333               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3334           Elem);
3335     }
3336     // Build function that copies private values from single region to all other
3337     // threads in the corresponding parallel region.
3338     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3339         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3340         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3341     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3342     Address CL =
3343       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3344                                                       CGF.VoidPtrTy);
3345     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3346     llvm::Value *Args[] = {
3347         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3348         getThreadID(CGF, Loc),        // i32 <gtid>
3349         BufSize,                      // size_t <buf_size>
3350         CL.getPointer(),              // void *<copyprivate list>
3351         CpyFn,                        // void (*) (void *, void *) <copy_func>
3352         DidItVal                      // i32 did_it
3353     };
3354     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3355   }
3356 }
3357 
3358 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3359                                         const RegionCodeGenTy &OrderedOpGen,
3360                                         SourceLocation Loc, bool IsThreads) {
3361   if (!CGF.HaveInsertPoint())
3362     return;
3363   // __kmpc_ordered(ident_t *, gtid);
3364   // OrderedOpGen();
3365   // __kmpc_end_ordered(ident_t *, gtid);
3366   // Prepare arguments and build a call to __kmpc_ordered
3367   if (IsThreads) {
3368     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3369     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3370                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3371                           Args);
3372     OrderedOpGen.setAction(Action);
3373     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3374     return;
3375   }
3376   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3377 }
3378 
3379 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3380   unsigned Flags;
3381   if (Kind == OMPD_for)
3382     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3383   else if (Kind == OMPD_sections)
3384     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3385   else if (Kind == OMPD_single)
3386     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3387   else if (Kind == OMPD_barrier)
3388     Flags = OMP_IDENT_BARRIER_EXPL;
3389   else
3390     Flags = OMP_IDENT_BARRIER_IMPL;
3391   return Flags;
3392 }
3393 
3394 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3395     CodeGenFunction &CGF, const OMPLoopDirective &S,
3396     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3397   // Check if the loop directive is actually a doacross loop directive. In this
3398   // case choose static, 1 schedule.
3399   if (llvm::any_of(
3400           S.getClausesOfKind<OMPOrderedClause>(),
3401           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3402     ScheduleKind = OMPC_SCHEDULE_static;
3403     // Chunk size is 1 in this case.
3404     llvm::APInt ChunkSize(32, 1);
3405     ChunkExpr = IntegerLiteral::Create(
3406         CGF.getContext(), ChunkSize,
3407         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3408         SourceLocation());
3409   }
3410 }
3411 
3412 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3413                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3414                                       bool ForceSimpleCall) {
3415   if (!CGF.HaveInsertPoint())
3416     return;
3417   // Build call __kmpc_cancel_barrier(loc, thread_id);
3418   // Build call __kmpc_barrier(loc, thread_id);
3419   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3420   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3421   // thread_id);
3422   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3423                          getThreadID(CGF, Loc)};
3424   if (auto *OMPRegionInfo =
3425           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3426     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3427       llvm::Value *Result = CGF.EmitRuntimeCall(
3428           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3429       if (EmitChecks) {
3430         // if (__kmpc_cancel_barrier()) {
3431         //   exit from construct;
3432         // }
3433         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3434         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3435         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3436         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3437         CGF.EmitBlock(ExitBB);
3438         //   exit from construct;
3439         CodeGenFunction::JumpDest CancelDestination =
3440             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3441         CGF.EmitBranchThroughCleanup(CancelDestination);
3442         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3443       }
3444       return;
3445     }
3446   }
3447   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3448 }
3449 
3450 /// Map the OpenMP loop schedule to the runtime enumeration.
3451 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3452                                           bool Chunked, bool Ordered) {
3453   switch (ScheduleKind) {
3454   case OMPC_SCHEDULE_static:
3455     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3456                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3457   case OMPC_SCHEDULE_dynamic:
3458     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3459   case OMPC_SCHEDULE_guided:
3460     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3461   case OMPC_SCHEDULE_runtime:
3462     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3463   case OMPC_SCHEDULE_auto:
3464     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3465   case OMPC_SCHEDULE_unknown:
3466     assert(!Chunked && "chunk was specified but schedule kind not known");
3467     return Ordered ? OMP_ord_static : OMP_sch_static;
3468   }
3469   llvm_unreachable("Unexpected runtime schedule");
3470 }
3471 
3472 /// Map the OpenMP distribute schedule to the runtime enumeration.
3473 static OpenMPSchedType
3474 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3475   // only static is allowed for dist_schedule
3476   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3477 }
3478 
3479 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3480                                          bool Chunked) const {
3481   OpenMPSchedType Schedule =
3482       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3483   return Schedule == OMP_sch_static;
3484 }
3485 
3486 bool CGOpenMPRuntime::isStaticNonchunked(
3487     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3488   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3489   return Schedule == OMP_dist_sch_static;
3490 }
3491 
3492 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3493                                       bool Chunked) const {
3494   OpenMPSchedType Schedule =
3495       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3496   return Schedule == OMP_sch_static_chunked;
3497 }
3498 
3499 bool CGOpenMPRuntime::isStaticChunked(
3500     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3501   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3502   return Schedule == OMP_dist_sch_static_chunked;
3503 }
3504 
3505 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3506   OpenMPSchedType Schedule =
3507       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3508   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3509   return Schedule != OMP_sch_static;
3510 }
3511 
3512 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3513                                   OpenMPScheduleClauseModifier M1,
3514                                   OpenMPScheduleClauseModifier M2) {
3515   int Modifier = 0;
3516   switch (M1) {
3517   case OMPC_SCHEDULE_MODIFIER_monotonic:
3518     Modifier = OMP_sch_modifier_monotonic;
3519     break;
3520   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3521     Modifier = OMP_sch_modifier_nonmonotonic;
3522     break;
3523   case OMPC_SCHEDULE_MODIFIER_simd:
3524     if (Schedule == OMP_sch_static_chunked)
3525       Schedule = OMP_sch_static_balanced_chunked;
3526     break;
3527   case OMPC_SCHEDULE_MODIFIER_last:
3528   case OMPC_SCHEDULE_MODIFIER_unknown:
3529     break;
3530   }
3531   switch (M2) {
3532   case OMPC_SCHEDULE_MODIFIER_monotonic:
3533     Modifier = OMP_sch_modifier_monotonic;
3534     break;
3535   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3536     Modifier = OMP_sch_modifier_nonmonotonic;
3537     break;
3538   case OMPC_SCHEDULE_MODIFIER_simd:
3539     if (Schedule == OMP_sch_static_chunked)
3540       Schedule = OMP_sch_static_balanced_chunked;
3541     break;
3542   case OMPC_SCHEDULE_MODIFIER_last:
3543   case OMPC_SCHEDULE_MODIFIER_unknown:
3544     break;
3545   }
3546   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3547   // If the static schedule kind is specified or if the ordered clause is
3548   // specified, and if the nonmonotonic modifier is not specified, the effect is
3549   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3550   // modifier is specified, the effect is as if the nonmonotonic modifier is
3551   // specified.
3552   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3553     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3554           Schedule == OMP_sch_static_balanced_chunked ||
3555           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static))
3556       Modifier = OMP_sch_modifier_nonmonotonic;
3557   }
3558   return Schedule | Modifier;
3559 }
3560 
3561 void CGOpenMPRuntime::emitForDispatchInit(
3562     CodeGenFunction &CGF, SourceLocation Loc,
3563     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3564     bool Ordered, const DispatchRTInput &DispatchValues) {
3565   if (!CGF.HaveInsertPoint())
3566     return;
3567   OpenMPSchedType Schedule = getRuntimeSchedule(
3568       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3569   assert(Ordered ||
3570          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3571           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3572           Schedule != OMP_sch_static_balanced_chunked));
3573   // Call __kmpc_dispatch_init(
3574   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3575   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3576   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3577 
3578   // If the Chunk was not specified in the clause - use default value 1.
3579   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3580                                             : CGF.Builder.getIntN(IVSize, 1);
3581   llvm::Value *Args[] = {
3582       emitUpdateLocation(CGF, Loc),
3583       getThreadID(CGF, Loc),
3584       CGF.Builder.getInt32(addMonoNonMonoModifier(
3585           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3586       DispatchValues.LB,                                     // Lower
3587       DispatchValues.UB,                                     // Upper
3588       CGF.Builder.getIntN(IVSize, 1),                        // Stride
3589       Chunk                                                  // Chunk
3590   };
3591   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3592 }
3593 
3594 static void emitForStaticInitCall(
3595     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3596     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3597     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3598     const CGOpenMPRuntime::StaticRTInput &Values) {
3599   if (!CGF.HaveInsertPoint())
3600     return;
3601 
3602   assert(!Values.Ordered);
3603   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3604          Schedule == OMP_sch_static_balanced_chunked ||
3605          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3606          Schedule == OMP_dist_sch_static ||
3607          Schedule == OMP_dist_sch_static_chunked);
3608 
3609   // Call __kmpc_for_static_init(
3610   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3611   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3612   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3613   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3614   llvm::Value *Chunk = Values.Chunk;
3615   if (Chunk == nullptr) {
3616     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3617             Schedule == OMP_dist_sch_static) &&
3618            "expected static non-chunked schedule");
3619     // If the Chunk was not specified in the clause - use default value 1.
3620     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3621   } else {
3622     assert((Schedule == OMP_sch_static_chunked ||
3623             Schedule == OMP_sch_static_balanced_chunked ||
3624             Schedule == OMP_ord_static_chunked ||
3625             Schedule == OMP_dist_sch_static_chunked) &&
3626            "expected static chunked schedule");
3627   }
3628   llvm::Value *Args[] = {
3629       UpdateLocation,
3630       ThreadId,
3631       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3632                                                   M2)), // Schedule type
3633       Values.IL.getPointer(),                           // &isLastIter
3634       Values.LB.getPointer(),                           // &LB
3635       Values.UB.getPointer(),                           // &UB
3636       Values.ST.getPointer(),                           // &Stride
3637       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3638       Chunk                                             // Chunk
3639   };
3640   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3641 }
3642 
3643 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3644                                         SourceLocation Loc,
3645                                         OpenMPDirectiveKind DKind,
3646                                         const OpenMPScheduleTy &ScheduleKind,
3647                                         const StaticRTInput &Values) {
3648   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3649       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3650   assert(isOpenMPWorksharingDirective(DKind) &&
3651          "Expected loop-based or sections-based directive.");
3652   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3653                                              isOpenMPLoopDirective(DKind)
3654                                                  ? OMP_IDENT_WORK_LOOP
3655                                                  : OMP_IDENT_WORK_SECTIONS);
3656   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3657   llvm::FunctionCallee StaticInitFunction =
3658       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3659   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3660                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3661 }
3662 
3663 void CGOpenMPRuntime::emitDistributeStaticInit(
3664     CodeGenFunction &CGF, SourceLocation Loc,
3665     OpenMPDistScheduleClauseKind SchedKind,
3666     const CGOpenMPRuntime::StaticRTInput &Values) {
3667   OpenMPSchedType ScheduleNum =
3668       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3669   llvm::Value *UpdatedLocation =
3670       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3671   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3672   llvm::FunctionCallee StaticInitFunction =
3673       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3674   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3675                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3676                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3677 }
3678 
3679 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3680                                           SourceLocation Loc,
3681                                           OpenMPDirectiveKind DKind) {
3682   if (!CGF.HaveInsertPoint())
3683     return;
3684   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3685   llvm::Value *Args[] = {
3686       emitUpdateLocation(CGF, Loc,
3687                          isOpenMPDistributeDirective(DKind)
3688                              ? OMP_IDENT_WORK_DISTRIBUTE
3689                              : isOpenMPLoopDirective(DKind)
3690                                    ? OMP_IDENT_WORK_LOOP
3691                                    : OMP_IDENT_WORK_SECTIONS),
3692       getThreadID(CGF, Loc)};
3693   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3694                       Args);
3695 }
3696 
3697 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3698                                                  SourceLocation Loc,
3699                                                  unsigned IVSize,
3700                                                  bool IVSigned) {
3701   if (!CGF.HaveInsertPoint())
3702     return;
3703   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3704   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3705   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3706 }
3707 
3708 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3709                                           SourceLocation Loc, unsigned IVSize,
3710                                           bool IVSigned, Address IL,
3711                                           Address LB, Address UB,
3712                                           Address ST) {
3713   // Call __kmpc_dispatch_next(
3714   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3715   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3716   //          kmp_int[32|64] *p_stride);
3717   llvm::Value *Args[] = {
3718       emitUpdateLocation(CGF, Loc),
3719       getThreadID(CGF, Loc),
3720       IL.getPointer(), // &isLastIter
3721       LB.getPointer(), // &Lower
3722       UB.getPointer(), // &Upper
3723       ST.getPointer()  // &Stride
3724   };
3725   llvm::Value *Call =
3726       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3727   return CGF.EmitScalarConversion(
3728       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3729       CGF.getContext().BoolTy, Loc);
3730 }
3731 
3732 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3733                                            llvm::Value *NumThreads,
3734                                            SourceLocation Loc) {
3735   if (!CGF.HaveInsertPoint())
3736     return;
3737   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3738   llvm::Value *Args[] = {
3739       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3740       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3741   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3742                       Args);
3743 }
3744 
3745 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3746                                          OpenMPProcBindClauseKind ProcBind,
3747                                          SourceLocation Loc) {
3748   if (!CGF.HaveInsertPoint())
3749     return;
3750   // Constants for proc bind value accepted by the runtime.
3751   enum ProcBindTy {
3752     ProcBindFalse = 0,
3753     ProcBindTrue,
3754     ProcBindMaster,
3755     ProcBindClose,
3756     ProcBindSpread,
3757     ProcBindIntel,
3758     ProcBindDefault
3759   } RuntimeProcBind;
3760   switch (ProcBind) {
3761   case OMPC_PROC_BIND_master:
3762     RuntimeProcBind = ProcBindMaster;
3763     break;
3764   case OMPC_PROC_BIND_close:
3765     RuntimeProcBind = ProcBindClose;
3766     break;
3767   case OMPC_PROC_BIND_spread:
3768     RuntimeProcBind = ProcBindSpread;
3769     break;
3770   case OMPC_PROC_BIND_unknown:
3771     llvm_unreachable("Unsupported proc_bind value.");
3772   }
3773   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3774   llvm::Value *Args[] = {
3775       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3776       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3777   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3778 }
3779 
3780 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3781                                 SourceLocation Loc) {
3782   if (!CGF.HaveInsertPoint())
3783     return;
3784   // Build call void __kmpc_flush(ident_t *loc)
3785   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3786                       emitUpdateLocation(CGF, Loc));
3787 }
3788 
3789 namespace {
3790 /// Indexes of fields for type kmp_task_t.
3791 enum KmpTaskTFields {
3792   /// List of shared variables.
3793   KmpTaskTShareds,
3794   /// Task routine.
3795   KmpTaskTRoutine,
3796   /// Partition id for the untied tasks.
3797   KmpTaskTPartId,
3798   /// Function with call of destructors for private variables.
3799   Data1,
3800   /// Task priority.
3801   Data2,
3802   /// (Taskloops only) Lower bound.
3803   KmpTaskTLowerBound,
3804   /// (Taskloops only) Upper bound.
3805   KmpTaskTUpperBound,
3806   /// (Taskloops only) Stride.
3807   KmpTaskTStride,
3808   /// (Taskloops only) Is last iteration flag.
3809   KmpTaskTLastIter,
3810   /// (Taskloops only) Reduction data.
3811   KmpTaskTReductions,
3812 };
3813 } // anonymous namespace
3814 
3815 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3816   return OffloadEntriesTargetRegion.empty() &&
3817          OffloadEntriesDeviceGlobalVar.empty();
3818 }
3819 
3820 /// Initialize target region entry.
3821 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3822     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3823                                     StringRef ParentName, unsigned LineNum,
3824                                     unsigned Order) {
3825   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3826                                              "only required for the device "
3827                                              "code generation.");
3828   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3829       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3830                                    OMPTargetRegionEntryTargetRegion);
3831   ++OffloadingEntriesNum;
3832 }
3833 
3834 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3835     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3836                                   StringRef ParentName, unsigned LineNum,
3837                                   llvm::Constant *Addr, llvm::Constant *ID,
3838                                   OMPTargetRegionEntryKind Flags) {
3839   // If we are emitting code for a target, the entry is already initialized,
3840   // only has to be registered.
3841   if (CGM.getLangOpts().OpenMPIsDevice) {
3842     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3843       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3844           DiagnosticsEngine::Error,
3845           "Unable to find target region on line '%0' in the device code.");
3846       CGM.getDiags().Report(DiagID) << LineNum;
3847       return;
3848     }
3849     auto &Entry =
3850         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3851     assert(Entry.isValid() && "Entry not initialized!");
3852     Entry.setAddress(Addr);
3853     Entry.setID(ID);
3854     Entry.setFlags(Flags);
3855   } else {
3856     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3857     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3858     ++OffloadingEntriesNum;
3859   }
3860 }
3861 
3862 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3863     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3864     unsigned LineNum) const {
3865   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3866   if (PerDevice == OffloadEntriesTargetRegion.end())
3867     return false;
3868   auto PerFile = PerDevice->second.find(FileID);
3869   if (PerFile == PerDevice->second.end())
3870     return false;
3871   auto PerParentName = PerFile->second.find(ParentName);
3872   if (PerParentName == PerFile->second.end())
3873     return false;
3874   auto PerLine = PerParentName->second.find(LineNum);
3875   if (PerLine == PerParentName->second.end())
3876     return false;
3877   // Fail if this entry is already registered.
3878   if (PerLine->second.getAddress() || PerLine->second.getID())
3879     return false;
3880   return true;
3881 }
3882 
3883 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3884     const OffloadTargetRegionEntryInfoActTy &Action) {
3885   // Scan all target region entries and perform the provided action.
3886   for (const auto &D : OffloadEntriesTargetRegion)
3887     for (const auto &F : D.second)
3888       for (const auto &P : F.second)
3889         for (const auto &L : P.second)
3890           Action(D.first, F.first, P.first(), L.first, L.second);
3891 }
3892 
3893 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3894     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3895                                        OMPTargetGlobalVarEntryKind Flags,
3896                                        unsigned Order) {
3897   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3898                                              "only required for the device "
3899                                              "code generation.");
3900   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3901   ++OffloadingEntriesNum;
3902 }
3903 
3904 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3905     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3906                                      CharUnits VarSize,
3907                                      OMPTargetGlobalVarEntryKind Flags,
3908                                      llvm::GlobalValue::LinkageTypes Linkage) {
3909   if (CGM.getLangOpts().OpenMPIsDevice) {
3910     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3911     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3912            "Entry not initialized!");
3913     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3914            "Resetting with the new address.");
3915     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3916       if (Entry.getVarSize().isZero()) {
3917         Entry.setVarSize(VarSize);
3918         Entry.setLinkage(Linkage);
3919       }
3920       return;
3921     }
3922     Entry.setVarSize(VarSize);
3923     Entry.setLinkage(Linkage);
3924     Entry.setAddress(Addr);
3925   } else {
3926     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3927       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3928       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3929              "Entry not initialized!");
3930       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3931              "Resetting with the new address.");
3932       if (Entry.getVarSize().isZero()) {
3933         Entry.setVarSize(VarSize);
3934         Entry.setLinkage(Linkage);
3935       }
3936       return;
3937     }
3938     OffloadEntriesDeviceGlobalVar.try_emplace(
3939         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3940     ++OffloadingEntriesNum;
3941   }
3942 }
3943 
3944 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3945     actOnDeviceGlobalVarEntriesInfo(
3946         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3947   // Scan all target region entries and perform the provided action.
3948   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3949     Action(E.getKey(), E.getValue());
3950 }
3951 
3952 llvm::Function *
3953 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3954   // If we don't have entries or if we are emitting code for the device, we
3955   // don't need to do anything.
3956   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3957     return nullptr;
3958 
3959   llvm::Module &M = CGM.getModule();
3960   ASTContext &C = CGM.getContext();
3961 
3962   // Get list of devices we care about
3963   const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3964 
3965   // We should be creating an offloading descriptor only if there are devices
3966   // specified.
3967   assert(!Devices.empty() && "No OpenMP offloading devices??");
3968 
3969   // Create the external variables that will point to the begin and end of the
3970   // host entries section. These will be defined by the linker.
3971   llvm::Type *OffloadEntryTy =
3972       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3973   std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3974   auto *HostEntriesBegin = new llvm::GlobalVariable(
3975       M, OffloadEntryTy, /*isConstant=*/true,
3976       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3977       EntriesBeginName);
3978   std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3979   auto *HostEntriesEnd =
3980       new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3981                                llvm::GlobalValue::ExternalLinkage,
3982                                /*Initializer=*/nullptr, EntriesEndName);
3983 
3984   // Create all device images
3985   auto *DeviceImageTy = cast<llvm::StructType>(
3986       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3987   ConstantInitBuilder DeviceImagesBuilder(CGM);
3988   ConstantArrayBuilder DeviceImagesEntries =
3989       DeviceImagesBuilder.beginArray(DeviceImageTy);
3990 
3991   for (const llvm::Triple &Device : Devices) {
3992     StringRef T = Device.getTriple();
3993     std::string BeginName = getName({"omp_offloading", "img_start", ""});
3994     auto *ImgBegin = new llvm::GlobalVariable(
3995         M, CGM.Int8Ty, /*isConstant=*/true,
3996         llvm::GlobalValue::ExternalWeakLinkage,
3997         /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3998     std::string EndName = getName({"omp_offloading", "img_end", ""});
3999     auto *ImgEnd = new llvm::GlobalVariable(
4000         M, CGM.Int8Ty, /*isConstant=*/true,
4001         llvm::GlobalValue::ExternalWeakLinkage,
4002         /*Initializer=*/nullptr, Twine(EndName).concat(T));
4003 
4004     llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
4005                               HostEntriesEnd};
4006     createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
4007                                              DeviceImagesEntries);
4008   }
4009 
4010   // Create device images global array.
4011   std::string ImagesName = getName({"omp_offloading", "device_images"});
4012   llvm::GlobalVariable *DeviceImages =
4013       DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
4014                                                 CGM.getPointerAlign(),
4015                                                 /*isConstant=*/true);
4016   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4017 
4018   // This is a Zero array to be used in the creation of the constant expressions
4019   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
4020                              llvm::Constant::getNullValue(CGM.Int32Ty)};
4021 
4022   // Create the target region descriptor.
4023   llvm::Constant *Data[] = {
4024       llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
4025       llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
4026                                            DeviceImages, Index),
4027       HostEntriesBegin, HostEntriesEnd};
4028   std::string Descriptor = getName({"omp_offloading", "descriptor"});
4029   llvm::GlobalVariable *Desc = createGlobalStruct(
4030       CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
4031 
4032   // Emit code to register or unregister the descriptor at execution
4033   // startup or closing, respectively.
4034 
4035   llvm::Function *UnRegFn;
4036   {
4037     FunctionArgList Args;
4038     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
4039     Args.push_back(&DummyPtr);
4040 
4041     CodeGenFunction CGF(CGM);
4042     // Disable debug info for global (de-)initializer because they are not part
4043     // of some particular construct.
4044     CGF.disableDebugInfo();
4045     const auto &FI =
4046         CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4047     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
4048     std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
4049     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
4050     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
4051     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
4052                         Desc);
4053     CGF.FinishFunction();
4054   }
4055   llvm::Function *RegFn;
4056   {
4057     CodeGenFunction CGF(CGM);
4058     // Disable debug info for global (de-)initializer because they are not part
4059     // of some particular construct.
4060     CGF.disableDebugInfo();
4061     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
4062     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
4063 
4064     // Encode offload target triples into the registration function name. It
4065     // will serve as a comdat key for the registration/unregistration code for
4066     // this particular combination of offloading targets.
4067     SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
4068     RegFnNameParts[0] = "omp_offloading";
4069     RegFnNameParts[1] = "descriptor_reg";
4070     llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
4071                     [](const llvm::Triple &T) -> const std::string& {
4072                       return T.getTriple();
4073                     });
4074     llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
4075     std::string Descriptor = getName(RegFnNameParts);
4076     RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
4077     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
4078     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
4079     // Create a variable to drive the registration and unregistration of the
4080     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
4081     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
4082                                   SourceLocation(), nullptr, C.CharTy,
4083                                   ImplicitParamDecl::Other);
4084     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
4085     CGF.FinishFunction();
4086   }
4087   if (CGM.supportsCOMDAT()) {
4088     // It is sufficient to call registration function only once, so create a
4089     // COMDAT group for registration/unregistration functions and associated
4090     // data. That would reduce startup time and code size. Registration
4091     // function serves as a COMDAT group key.
4092     llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
4093     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
4094     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
4095     RegFn->setComdat(ComdatKey);
4096     UnRegFn->setComdat(ComdatKey);
4097     DeviceImages->setComdat(ComdatKey);
4098     Desc->setComdat(ComdatKey);
4099   }
4100   return RegFn;
4101 }
4102 
4103 void CGOpenMPRuntime::createOffloadEntry(
4104     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4105     llvm::GlobalValue::LinkageTypes Linkage) {
4106   StringRef Name = Addr->getName();
4107   llvm::Module &M = CGM.getModule();
4108   llvm::LLVMContext &C = M.getContext();
4109 
4110   // Create constant string with the name.
4111   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4112 
4113   std::string StringName = getName({"omp_offloading", "entry_name"});
4114   auto *Str = new llvm::GlobalVariable(
4115       M, StrPtrInit->getType(), /*isConstant=*/true,
4116       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4117   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4118 
4119   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4120                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4121                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4122                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4123                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4124   std::string EntryName = getName({"omp_offloading", "entry", ""});
4125   llvm::GlobalVariable *Entry = createGlobalStruct(
4126       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4127       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4128 
4129   // The entry has to be created in the section the linker expects it to be.
4130   std::string Section = getName({"omp_offloading", "entries"});
4131   Entry->setSection(Section);
4132 }
4133 
4134 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4135   // Emit the offloading entries and metadata so that the device codegen side
4136   // can easily figure out what to emit. The produced metadata looks like
4137   // this:
4138   //
4139   // !omp_offload.info = !{!1, ...}
4140   //
4141   // Right now we only generate metadata for function that contain target
4142   // regions.
4143 
4144   // If we do not have entries, we don't need to do anything.
4145   if (OffloadEntriesInfoManager.empty())
4146     return;
4147 
4148   llvm::Module &M = CGM.getModule();
4149   llvm::LLVMContext &C = M.getContext();
4150   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
4151       OrderedEntries(OffloadEntriesInfoManager.size());
4152   llvm::SmallVector<StringRef, 16> ParentFunctions(
4153       OffloadEntriesInfoManager.size());
4154 
4155   // Auxiliary methods to create metadata values and strings.
4156   auto &&GetMDInt = [this](unsigned V) {
4157     return llvm::ConstantAsMetadata::get(
4158         llvm::ConstantInt::get(CGM.Int32Ty, V));
4159   };
4160 
4161   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4162 
4163   // Create the offloading info metadata node.
4164   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4165 
4166   // Create function that emits metadata for each target region entry;
4167   auto &&TargetRegionMetadataEmitter =
4168       [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4169           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4170           unsigned Line,
4171           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4172         // Generate metadata for target regions. Each entry of this metadata
4173         // contains:
4174         // - Entry 0 -> Kind of this type of metadata (0).
4175         // - Entry 1 -> Device ID of the file where the entry was identified.
4176         // - Entry 2 -> File ID of the file where the entry was identified.
4177         // - Entry 3 -> Mangled name of the function where the entry was
4178         // identified.
4179         // - Entry 4 -> Line in the file where the entry was identified.
4180         // - Entry 5 -> Order the entry was created.
4181         // The first element of the metadata node is the kind.
4182         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4183                                  GetMDInt(FileID),      GetMDString(ParentName),
4184                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4185 
4186         // Save this entry in the right position of the ordered entries array.
4187         OrderedEntries[E.getOrder()] = &E;
4188         ParentFunctions[E.getOrder()] = ParentName;
4189 
4190         // Add metadata to the named metadata node.
4191         MD->addOperand(llvm::MDNode::get(C, Ops));
4192       };
4193 
4194   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4195       TargetRegionMetadataEmitter);
4196 
4197   // Create function that emits metadata for each device global variable entry;
4198   auto &&DeviceGlobalVarMetadataEmitter =
4199       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4200        MD](StringRef MangledName,
4201            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4202                &E) {
4203         // Generate metadata for global variables. Each entry of this metadata
4204         // contains:
4205         // - Entry 0 -> Kind of this type of metadata (1).
4206         // - Entry 1 -> Mangled name of the variable.
4207         // - Entry 2 -> Declare target kind.
4208         // - Entry 3 -> Order the entry was created.
4209         // The first element of the metadata node is the kind.
4210         llvm::Metadata *Ops[] = {
4211             GetMDInt(E.getKind()), GetMDString(MangledName),
4212             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4213 
4214         // Save this entry in the right position of the ordered entries array.
4215         OrderedEntries[E.getOrder()] = &E;
4216 
4217         // Add metadata to the named metadata node.
4218         MD->addOperand(llvm::MDNode::get(C, Ops));
4219       };
4220 
4221   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4222       DeviceGlobalVarMetadataEmitter);
4223 
4224   for (const auto *E : OrderedEntries) {
4225     assert(E && "All ordered entries must exist!");
4226     if (const auto *CE =
4227             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4228                 E)) {
4229       if (!CE->getID() || !CE->getAddress()) {
4230         // Do not blame the entry if the parent funtion is not emitted.
4231         StringRef FnName = ParentFunctions[CE->getOrder()];
4232         if (!CGM.GetGlobalValue(FnName))
4233           continue;
4234         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4235             DiagnosticsEngine::Error,
4236             "Offloading entry for target region is incorrect: either the "
4237             "address or the ID is invalid.");
4238         CGM.getDiags().Report(DiagID);
4239         continue;
4240       }
4241       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4242                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4243     } else if (const auto *CE =
4244                    dyn_cast<OffloadEntriesInfoManagerTy::
4245                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
4246       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4247           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4248               CE->getFlags());
4249       switch (Flags) {
4250       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4251         if (CGM.getLangOpts().OpenMPIsDevice &&
4252             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4253           continue;
4254         if (!CE->getAddress()) {
4255           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4256               DiagnosticsEngine::Error,
4257               "Offloading entry for declare target variable is incorrect: the "
4258               "address is invalid.");
4259           CGM.getDiags().Report(DiagID);
4260           continue;
4261         }
4262         // The vaiable has no definition - no need to add the entry.
4263         if (CE->getVarSize().isZero())
4264           continue;
4265         break;
4266       }
4267       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4268         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4269                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4270                "Declaret target link address is set.");
4271         if (CGM.getLangOpts().OpenMPIsDevice)
4272           continue;
4273         if (!CE->getAddress()) {
4274           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4275               DiagnosticsEngine::Error,
4276               "Offloading entry for declare target variable is incorrect: the "
4277               "address is invalid.");
4278           CGM.getDiags().Report(DiagID);
4279           continue;
4280         }
4281         break;
4282       }
4283       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4284                          CE->getVarSize().getQuantity(), Flags,
4285                          CE->getLinkage());
4286     } else {
4287       llvm_unreachable("Unsupported entry kind.");
4288     }
4289   }
4290 }
4291 
4292 /// Loads all the offload entries information from the host IR
4293 /// metadata.
4294 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4295   // If we are in target mode, load the metadata from the host IR. This code has
4296   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4297 
4298   if (!CGM.getLangOpts().OpenMPIsDevice)
4299     return;
4300 
4301   if (CGM.getLangOpts().OMPHostIRFile.empty())
4302     return;
4303 
4304   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4305   if (auto EC = Buf.getError()) {
4306     CGM.getDiags().Report(diag::err_cannot_open_file)
4307         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4308     return;
4309   }
4310 
4311   llvm::LLVMContext C;
4312   auto ME = expectedToErrorOrAndEmitErrors(
4313       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4314 
4315   if (auto EC = ME.getError()) {
4316     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4317         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4318     CGM.getDiags().Report(DiagID)
4319         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4320     return;
4321   }
4322 
4323   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4324   if (!MD)
4325     return;
4326 
4327   for (llvm::MDNode *MN : MD->operands()) {
4328     auto &&GetMDInt = [MN](unsigned Idx) {
4329       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4330       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4331     };
4332 
4333     auto &&GetMDString = [MN](unsigned Idx) {
4334       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4335       return V->getString();
4336     };
4337 
4338     switch (GetMDInt(0)) {
4339     default:
4340       llvm_unreachable("Unexpected metadata!");
4341       break;
4342     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4343         OffloadingEntryInfoTargetRegion:
4344       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4345           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4346           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4347           /*Order=*/GetMDInt(5));
4348       break;
4349     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4350         OffloadingEntryInfoDeviceGlobalVar:
4351       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4352           /*MangledName=*/GetMDString(1),
4353           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4354               /*Flags=*/GetMDInt(2)),
4355           /*Order=*/GetMDInt(3));
4356       break;
4357     }
4358   }
4359 }
4360 
4361 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4362   if (!KmpRoutineEntryPtrTy) {
4363     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4364     ASTContext &C = CGM.getContext();
4365     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4366     FunctionProtoType::ExtProtoInfo EPI;
4367     KmpRoutineEntryPtrQTy = C.getPointerType(
4368         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4369     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4370   }
4371 }
4372 
4373 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4374   // Make sure the type of the entry is already created. This is the type we
4375   // have to create:
4376   // struct __tgt_offload_entry{
4377   //   void      *addr;       // Pointer to the offload entry info.
4378   //                          // (function or global)
4379   //   char      *name;       // Name of the function or global.
4380   //   size_t     size;       // Size of the entry info (0 if it a function).
4381   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4382   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4383   // };
4384   if (TgtOffloadEntryQTy.isNull()) {
4385     ASTContext &C = CGM.getContext();
4386     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4387     RD->startDefinition();
4388     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4389     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4390     addFieldToRecordDecl(C, RD, C.getSizeType());
4391     addFieldToRecordDecl(
4392         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4393     addFieldToRecordDecl(
4394         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4395     RD->completeDefinition();
4396     RD->addAttr(PackedAttr::CreateImplicit(C));
4397     TgtOffloadEntryQTy = C.getRecordType(RD);
4398   }
4399   return TgtOffloadEntryQTy;
4400 }
4401 
4402 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4403   // These are the types we need to build:
4404   // struct __tgt_device_image{
4405   // void   *ImageStart;       // Pointer to the target code start.
4406   // void   *ImageEnd;         // Pointer to the target code end.
4407   // // We also add the host entries to the device image, as it may be useful
4408   // // for the target runtime to have access to that information.
4409   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4410   //                                       // the entries.
4411   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4412   //                                       // entries (non inclusive).
4413   // };
4414   if (TgtDeviceImageQTy.isNull()) {
4415     ASTContext &C = CGM.getContext();
4416     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4417     RD->startDefinition();
4418     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4419     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4420     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4421     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4422     RD->completeDefinition();
4423     TgtDeviceImageQTy = C.getRecordType(RD);
4424   }
4425   return TgtDeviceImageQTy;
4426 }
4427 
4428 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4429   // struct __tgt_bin_desc{
4430   //   int32_t              NumDevices;      // Number of devices supported.
4431   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4432   //                                         // (one per device).
4433   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4434   //                                         // entries.
4435   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4436   //                                         // entries (non inclusive).
4437   // };
4438   if (TgtBinaryDescriptorQTy.isNull()) {
4439     ASTContext &C = CGM.getContext();
4440     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4441     RD->startDefinition();
4442     addFieldToRecordDecl(
4443         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4444     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4445     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4446     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4447     RD->completeDefinition();
4448     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4449   }
4450   return TgtBinaryDescriptorQTy;
4451 }
4452 
4453 namespace {
4454 struct PrivateHelpersTy {
4455   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4456                    const VarDecl *PrivateElemInit)
4457       : Original(Original), PrivateCopy(PrivateCopy),
4458         PrivateElemInit(PrivateElemInit) {}
4459   const VarDecl *Original;
4460   const VarDecl *PrivateCopy;
4461   const VarDecl *PrivateElemInit;
4462 };
4463 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4464 } // anonymous namespace
4465 
4466 static RecordDecl *
4467 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4468   if (!Privates.empty()) {
4469     ASTContext &C = CGM.getContext();
4470     // Build struct .kmp_privates_t. {
4471     //         /*  private vars  */
4472     //       };
4473     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4474     RD->startDefinition();
4475     for (const auto &Pair : Privates) {
4476       const VarDecl *VD = Pair.second.Original;
4477       QualType Type = VD->getType().getNonReferenceType();
4478       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4479       if (VD->hasAttrs()) {
4480         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4481              E(VD->getAttrs().end());
4482              I != E; ++I)
4483           FD->addAttr(*I);
4484       }
4485     }
4486     RD->completeDefinition();
4487     return RD;
4488   }
4489   return nullptr;
4490 }
4491 
4492 static RecordDecl *
4493 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4494                          QualType KmpInt32Ty,
4495                          QualType KmpRoutineEntryPointerQTy) {
4496   ASTContext &C = CGM.getContext();
4497   // Build struct kmp_task_t {
4498   //         void *              shareds;
4499   //         kmp_routine_entry_t routine;
4500   //         kmp_int32           part_id;
4501   //         kmp_cmplrdata_t data1;
4502   //         kmp_cmplrdata_t data2;
4503   // For taskloops additional fields:
4504   //         kmp_uint64          lb;
4505   //         kmp_uint64          ub;
4506   //         kmp_int64           st;
4507   //         kmp_int32           liter;
4508   //         void *              reductions;
4509   //       };
4510   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4511   UD->startDefinition();
4512   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4513   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4514   UD->completeDefinition();
4515   QualType KmpCmplrdataTy = C.getRecordType(UD);
4516   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4517   RD->startDefinition();
4518   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4519   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4520   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4521   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4522   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4523   if (isOpenMPTaskLoopDirective(Kind)) {
4524     QualType KmpUInt64Ty =
4525         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4526     QualType KmpInt64Ty =
4527         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4528     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4529     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4530     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4531     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4532     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4533   }
4534   RD->completeDefinition();
4535   return RD;
4536 }
4537 
4538 static RecordDecl *
4539 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4540                                      ArrayRef<PrivateDataTy> Privates) {
4541   ASTContext &C = CGM.getContext();
4542   // Build struct kmp_task_t_with_privates {
4543   //         kmp_task_t task_data;
4544   //         .kmp_privates_t. privates;
4545   //       };
4546   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4547   RD->startDefinition();
4548   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4549   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4550     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4551   RD->completeDefinition();
4552   return RD;
4553 }
4554 
4555 /// Emit a proxy function which accepts kmp_task_t as the second
4556 /// argument.
4557 /// \code
4558 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4559 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4560 ///   For taskloops:
4561 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4562 ///   tt->reductions, tt->shareds);
4563 ///   return 0;
4564 /// }
4565 /// \endcode
4566 static llvm::Function *
4567 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4568                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4569                       QualType KmpTaskTWithPrivatesPtrQTy,
4570                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4571                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4572                       llvm::Value *TaskPrivatesMap) {
4573   ASTContext &C = CGM.getContext();
4574   FunctionArgList Args;
4575   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4576                             ImplicitParamDecl::Other);
4577   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4578                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4579                                 ImplicitParamDecl::Other);
4580   Args.push_back(&GtidArg);
4581   Args.push_back(&TaskTypeArg);
4582   const auto &TaskEntryFnInfo =
4583       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4584   llvm::FunctionType *TaskEntryTy =
4585       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4586   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4587   auto *TaskEntry = llvm::Function::Create(
4588       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4589   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4590   TaskEntry->setDoesNotRecurse();
4591   CodeGenFunction CGF(CGM);
4592   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4593                     Loc, Loc);
4594 
4595   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4596   // tt,
4597   // For taskloops:
4598   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4599   // tt->task_data.shareds);
4600   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4601       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4602   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4603       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4604       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4605   const auto *KmpTaskTWithPrivatesQTyRD =
4606       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4607   LValue Base =
4608       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4609   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4610   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4611   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4612   llvm::Value *PartidParam = PartIdLVal.getPointer();
4613 
4614   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4615   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4616   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4617       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4618       CGF.ConvertTypeForMem(SharedsPtrTy));
4619 
4620   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4621   llvm::Value *PrivatesParam;
4622   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4623     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4624     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4625         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4626   } else {
4627     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4628   }
4629 
4630   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4631                                TaskPrivatesMap,
4632                                CGF.Builder
4633                                    .CreatePointerBitCastOrAddrSpaceCast(
4634                                        TDBase.getAddress(), CGF.VoidPtrTy)
4635                                    .getPointer()};
4636   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4637                                           std::end(CommonArgs));
4638   if (isOpenMPTaskLoopDirective(Kind)) {
4639     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4640     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4641     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4642     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4643     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4644     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4645     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4646     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4647     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4648     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4649     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4650     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4651     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4652     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4653     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4654     CallArgs.push_back(LBParam);
4655     CallArgs.push_back(UBParam);
4656     CallArgs.push_back(StParam);
4657     CallArgs.push_back(LIParam);
4658     CallArgs.push_back(RParam);
4659   }
4660   CallArgs.push_back(SharedsParam);
4661 
4662   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4663                                                   CallArgs);
4664   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4665                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4666   CGF.FinishFunction();
4667   return TaskEntry;
4668 }
4669 
4670 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4671                                             SourceLocation Loc,
4672                                             QualType KmpInt32Ty,
4673                                             QualType KmpTaskTWithPrivatesPtrQTy,
4674                                             QualType KmpTaskTWithPrivatesQTy) {
4675   ASTContext &C = CGM.getContext();
4676   FunctionArgList Args;
4677   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4678                             ImplicitParamDecl::Other);
4679   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4680                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4681                                 ImplicitParamDecl::Other);
4682   Args.push_back(&GtidArg);
4683   Args.push_back(&TaskTypeArg);
4684   const auto &DestructorFnInfo =
4685       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4686   llvm::FunctionType *DestructorFnTy =
4687       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4688   std::string Name =
4689       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4690   auto *DestructorFn =
4691       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4692                              Name, &CGM.getModule());
4693   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4694                                     DestructorFnInfo);
4695   DestructorFn->setDoesNotRecurse();
4696   CodeGenFunction CGF(CGM);
4697   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4698                     Args, Loc, Loc);
4699 
4700   LValue Base = CGF.EmitLoadOfPointerLValue(
4701       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4702       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4703   const auto *KmpTaskTWithPrivatesQTyRD =
4704       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4705   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4706   Base = CGF.EmitLValueForField(Base, *FI);
4707   for (const auto *Field :
4708        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4709     if (QualType::DestructionKind DtorKind =
4710             Field->getType().isDestructedType()) {
4711       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4712       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4713     }
4714   }
4715   CGF.FinishFunction();
4716   return DestructorFn;
4717 }
4718 
4719 /// Emit a privates mapping function for correct handling of private and
4720 /// firstprivate variables.
4721 /// \code
4722 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4723 /// **noalias priv1,...,  <tyn> **noalias privn) {
4724 ///   *priv1 = &.privates.priv1;
4725 ///   ...;
4726 ///   *privn = &.privates.privn;
4727 /// }
4728 /// \endcode
4729 static llvm::Value *
4730 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4731                                ArrayRef<const Expr *> PrivateVars,
4732                                ArrayRef<const Expr *> FirstprivateVars,
4733                                ArrayRef<const Expr *> LastprivateVars,
4734                                QualType PrivatesQTy,
4735                                ArrayRef<PrivateDataTy> Privates) {
4736   ASTContext &C = CGM.getContext();
4737   FunctionArgList Args;
4738   ImplicitParamDecl TaskPrivatesArg(
4739       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4740       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4741       ImplicitParamDecl::Other);
4742   Args.push_back(&TaskPrivatesArg);
4743   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4744   unsigned Counter = 1;
4745   for (const Expr *E : PrivateVars) {
4746     Args.push_back(ImplicitParamDecl::Create(
4747         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4748         C.getPointerType(C.getPointerType(E->getType()))
4749             .withConst()
4750             .withRestrict(),
4751         ImplicitParamDecl::Other));
4752     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4753     PrivateVarsPos[VD] = Counter;
4754     ++Counter;
4755   }
4756   for (const Expr *E : FirstprivateVars) {
4757     Args.push_back(ImplicitParamDecl::Create(
4758         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4759         C.getPointerType(C.getPointerType(E->getType()))
4760             .withConst()
4761             .withRestrict(),
4762         ImplicitParamDecl::Other));
4763     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4764     PrivateVarsPos[VD] = Counter;
4765     ++Counter;
4766   }
4767   for (const Expr *E : LastprivateVars) {
4768     Args.push_back(ImplicitParamDecl::Create(
4769         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4770         C.getPointerType(C.getPointerType(E->getType()))
4771             .withConst()
4772             .withRestrict(),
4773         ImplicitParamDecl::Other));
4774     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4775     PrivateVarsPos[VD] = Counter;
4776     ++Counter;
4777   }
4778   const auto &TaskPrivatesMapFnInfo =
4779       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4780   llvm::FunctionType *TaskPrivatesMapTy =
4781       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4782   std::string Name =
4783       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4784   auto *TaskPrivatesMap = llvm::Function::Create(
4785       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4786       &CGM.getModule());
4787   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4788                                     TaskPrivatesMapFnInfo);
4789   if (CGM.getLangOpts().Optimize) {
4790     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4791     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4792     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4793   }
4794   CodeGenFunction CGF(CGM);
4795   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4796                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4797 
4798   // *privi = &.privates.privi;
4799   LValue Base = CGF.EmitLoadOfPointerLValue(
4800       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4801       TaskPrivatesArg.getType()->castAs<PointerType>());
4802   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4803   Counter = 0;
4804   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4805     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4806     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4807     LValue RefLVal =
4808         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4809     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4810         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4811     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4812     ++Counter;
4813   }
4814   CGF.FinishFunction();
4815   return TaskPrivatesMap;
4816 }
4817 
4818 /// Emit initialization for private variables in task-based directives.
4819 static void emitPrivatesInit(CodeGenFunction &CGF,
4820                              const OMPExecutableDirective &D,
4821                              Address KmpTaskSharedsPtr, LValue TDBase,
4822                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4823                              QualType SharedsTy, QualType SharedsPtrTy,
4824                              const OMPTaskDataTy &Data,
4825                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4826   ASTContext &C = CGF.getContext();
4827   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4828   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4829   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4830                                  ? OMPD_taskloop
4831                                  : OMPD_task;
4832   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4833   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4834   LValue SrcBase;
4835   bool IsTargetTask =
4836       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4837       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4838   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4839   // PointersArray and SizesArray. The original variables for these arrays are
4840   // not captured and we get their addresses explicitly.
4841   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4842       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4843     SrcBase = CGF.MakeAddrLValue(
4844         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4845             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4846         SharedsTy);
4847   }
4848   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4849   for (const PrivateDataTy &Pair : Privates) {
4850     const VarDecl *VD = Pair.second.PrivateCopy;
4851     const Expr *Init = VD->getAnyInitializer();
4852     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4853                              !CGF.isTrivialInitializer(Init)))) {
4854       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4855       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4856         const VarDecl *OriginalVD = Pair.second.Original;
4857         // Check if the variable is the target-based BasePointersArray,
4858         // PointersArray or SizesArray.
4859         LValue SharedRefLValue;
4860         QualType Type = PrivateLValue.getType();
4861         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4862         if (IsTargetTask && !SharedField) {
4863           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4864                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4865                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4866                          ->getNumParams() == 0 &&
4867                  isa<TranslationUnitDecl>(
4868                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4869                          ->getDeclContext()) &&
4870                  "Expected artificial target data variable.");
4871           SharedRefLValue =
4872               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4873         } else {
4874           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4875           SharedRefLValue = CGF.MakeAddrLValue(
4876               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4877               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4878               SharedRefLValue.getTBAAInfo());
4879         }
4880         if (Type->isArrayType()) {
4881           // Initialize firstprivate array.
4882           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4883             // Perform simple memcpy.
4884             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4885           } else {
4886             // Initialize firstprivate array using element-by-element
4887             // initialization.
4888             CGF.EmitOMPAggregateAssign(
4889                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4890                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4891                                                   Address SrcElement) {
4892                   // Clean up any temporaries needed by the initialization.
4893                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4894                   InitScope.addPrivate(
4895                       Elem, [SrcElement]() -> Address { return SrcElement; });
4896                   (void)InitScope.Privatize();
4897                   // Emit initialization for single element.
4898                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4899                       CGF, &CapturesInfo);
4900                   CGF.EmitAnyExprToMem(Init, DestElement,
4901                                        Init->getType().getQualifiers(),
4902                                        /*IsInitializer=*/false);
4903                 });
4904           }
4905         } else {
4906           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4907           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4908             return SharedRefLValue.getAddress();
4909           });
4910           (void)InitScope.Privatize();
4911           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4912           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4913                              /*capturedByInit=*/false);
4914         }
4915       } else {
4916         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4917       }
4918     }
4919     ++FI;
4920   }
4921 }
4922 
4923 /// Check if duplication function is required for taskloops.
4924 static bool checkInitIsRequired(CodeGenFunction &CGF,
4925                                 ArrayRef<PrivateDataTy> Privates) {
4926   bool InitRequired = false;
4927   for (const PrivateDataTy &Pair : Privates) {
4928     const VarDecl *VD = Pair.second.PrivateCopy;
4929     const Expr *Init = VD->getAnyInitializer();
4930     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4931                                     !CGF.isTrivialInitializer(Init));
4932     if (InitRequired)
4933       break;
4934   }
4935   return InitRequired;
4936 }
4937 
4938 
4939 /// Emit task_dup function (for initialization of
4940 /// private/firstprivate/lastprivate vars and last_iter flag)
4941 /// \code
4942 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4943 /// lastpriv) {
4944 /// // setup lastprivate flag
4945 ///    task_dst->last = lastpriv;
4946 /// // could be constructor calls here...
4947 /// }
4948 /// \endcode
4949 static llvm::Value *
4950 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4951                     const OMPExecutableDirective &D,
4952                     QualType KmpTaskTWithPrivatesPtrQTy,
4953                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4954                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4955                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4956                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4957   ASTContext &C = CGM.getContext();
4958   FunctionArgList Args;
4959   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4960                            KmpTaskTWithPrivatesPtrQTy,
4961                            ImplicitParamDecl::Other);
4962   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4963                            KmpTaskTWithPrivatesPtrQTy,
4964                            ImplicitParamDecl::Other);
4965   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4966                                 ImplicitParamDecl::Other);
4967   Args.push_back(&DstArg);
4968   Args.push_back(&SrcArg);
4969   Args.push_back(&LastprivArg);
4970   const auto &TaskDupFnInfo =
4971       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4972   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4973   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4974   auto *TaskDup = llvm::Function::Create(
4975       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4976   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4977   TaskDup->setDoesNotRecurse();
4978   CodeGenFunction CGF(CGM);
4979   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4980                     Loc);
4981 
4982   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4983       CGF.GetAddrOfLocalVar(&DstArg),
4984       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4985   // task_dst->liter = lastpriv;
4986   if (WithLastIter) {
4987     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4988     LValue Base = CGF.EmitLValueForField(
4989         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4990     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4991     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4992         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4993     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4994   }
4995 
4996   // Emit initial values for private copies (if any).
4997   assert(!Privates.empty());
4998   Address KmpTaskSharedsPtr = Address::invalid();
4999   if (!Data.FirstprivateVars.empty()) {
5000     LValue TDBase = CGF.EmitLoadOfPointerLValue(
5001         CGF.GetAddrOfLocalVar(&SrcArg),
5002         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
5003     LValue Base = CGF.EmitLValueForField(
5004         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
5005     KmpTaskSharedsPtr = Address(
5006         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
5007                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
5008                                                   KmpTaskTShareds)),
5009                              Loc),
5010         CGF.getNaturalTypeAlignment(SharedsTy));
5011   }
5012   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
5013                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
5014   CGF.FinishFunction();
5015   return TaskDup;
5016 }
5017 
5018 /// Checks if destructor function is required to be generated.
5019 /// \return true if cleanups are required, false otherwise.
5020 static bool
5021 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
5022   bool NeedsCleanup = false;
5023   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
5024   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
5025   for (const FieldDecl *FD : PrivateRD->fields()) {
5026     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
5027     if (NeedsCleanup)
5028       break;
5029   }
5030   return NeedsCleanup;
5031 }
5032 
5033 CGOpenMPRuntime::TaskResultTy
5034 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
5035                               const OMPExecutableDirective &D,
5036                               llvm::Function *TaskFunction, QualType SharedsTy,
5037                               Address Shareds, const OMPTaskDataTy &Data) {
5038   ASTContext &C = CGM.getContext();
5039   llvm::SmallVector<PrivateDataTy, 4> Privates;
5040   // Aggregate privates and sort them by the alignment.
5041   auto I = Data.PrivateCopies.begin();
5042   for (const Expr *E : Data.PrivateVars) {
5043     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5044     Privates.emplace_back(
5045         C.getDeclAlign(VD),
5046         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5047                          /*PrivateElemInit=*/nullptr));
5048     ++I;
5049   }
5050   I = Data.FirstprivateCopies.begin();
5051   auto IElemInitRef = Data.FirstprivateInits.begin();
5052   for (const Expr *E : Data.FirstprivateVars) {
5053     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5054     Privates.emplace_back(
5055         C.getDeclAlign(VD),
5056         PrivateHelpersTy(
5057             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5058             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
5059     ++I;
5060     ++IElemInitRef;
5061   }
5062   I = Data.LastprivateCopies.begin();
5063   for (const Expr *E : Data.LastprivateVars) {
5064     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5065     Privates.emplace_back(
5066         C.getDeclAlign(VD),
5067         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5068                          /*PrivateElemInit=*/nullptr));
5069     ++I;
5070   }
5071   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5072     return L.first > R.first;
5073   });
5074   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5075   // Build type kmp_routine_entry_t (if not built yet).
5076   emitKmpRoutineEntryT(KmpInt32Ty);
5077   // Build type kmp_task_t (if not built yet).
5078   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5079     if (SavedKmpTaskloopTQTy.isNull()) {
5080       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5081           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5082     }
5083     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5084   } else {
5085     assert((D.getDirectiveKind() == OMPD_task ||
5086             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5087             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5088            "Expected taskloop, task or target directive");
5089     if (SavedKmpTaskTQTy.isNull()) {
5090       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5091           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5092     }
5093     KmpTaskTQTy = SavedKmpTaskTQTy;
5094   }
5095   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5096   // Build particular struct kmp_task_t for the given task.
5097   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5098       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5099   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5100   QualType KmpTaskTWithPrivatesPtrQTy =
5101       C.getPointerType(KmpTaskTWithPrivatesQTy);
5102   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5103   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5104       KmpTaskTWithPrivatesTy->getPointerTo();
5105   llvm::Value *KmpTaskTWithPrivatesTySize =
5106       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5107   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5108 
5109   // Emit initial values for private copies (if any).
5110   llvm::Value *TaskPrivatesMap = nullptr;
5111   llvm::Type *TaskPrivatesMapTy =
5112       std::next(TaskFunction->arg_begin(), 3)->getType();
5113   if (!Privates.empty()) {
5114     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5115     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5116         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5117         FI->getType(), Privates);
5118     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5119         TaskPrivatesMap, TaskPrivatesMapTy);
5120   } else {
5121     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5122         cast<llvm::PointerType>(TaskPrivatesMapTy));
5123   }
5124   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5125   // kmp_task_t *tt);
5126   llvm::Function *TaskEntry = emitProxyTaskFunction(
5127       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5128       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5129       TaskPrivatesMap);
5130 
5131   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5132   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5133   // kmp_routine_entry_t *task_entry);
5134   // Task flags. Format is taken from
5135   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5136   // description of kmp_tasking_flags struct.
5137   enum {
5138     TiedFlag = 0x1,
5139     FinalFlag = 0x2,
5140     DestructorsFlag = 0x8,
5141     PriorityFlag = 0x20
5142   };
5143   unsigned Flags = Data.Tied ? TiedFlag : 0;
5144   bool NeedsCleanup = false;
5145   if (!Privates.empty()) {
5146     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5147     if (NeedsCleanup)
5148       Flags = Flags | DestructorsFlag;
5149   }
5150   if (Data.Priority.getInt())
5151     Flags = Flags | PriorityFlag;
5152   llvm::Value *TaskFlags =
5153       Data.Final.getPointer()
5154           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5155                                      CGF.Builder.getInt32(FinalFlag),
5156                                      CGF.Builder.getInt32(/*C=*/0))
5157           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5158   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5159   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5160   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5161       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5162       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5163           TaskEntry, KmpRoutineEntryPtrTy)};
5164   llvm::Value *NewTask;
5165   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5166     // Check if we have any device clause associated with the directive.
5167     const Expr *Device = nullptr;
5168     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5169       Device = C->getDevice();
5170     // Emit device ID if any otherwise use default value.
5171     llvm::Value *DeviceID;
5172     if (Device)
5173       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5174                                            CGF.Int64Ty, /*isSigned=*/true);
5175     else
5176       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5177     AllocArgs.push_back(DeviceID);
5178     NewTask = CGF.EmitRuntimeCall(
5179       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5180   } else {
5181     NewTask = CGF.EmitRuntimeCall(
5182       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5183   }
5184   llvm::Value *NewTaskNewTaskTTy =
5185       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5186           NewTask, KmpTaskTWithPrivatesPtrTy);
5187   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5188                                                KmpTaskTWithPrivatesQTy);
5189   LValue TDBase =
5190       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5191   // Fill the data in the resulting kmp_task_t record.
5192   // Copy shareds if there are any.
5193   Address KmpTaskSharedsPtr = Address::invalid();
5194   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5195     KmpTaskSharedsPtr =
5196         Address(CGF.EmitLoadOfScalar(
5197                     CGF.EmitLValueForField(
5198                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5199                                            KmpTaskTShareds)),
5200                     Loc),
5201                 CGF.getNaturalTypeAlignment(SharedsTy));
5202     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5203     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5204     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5205   }
5206   // Emit initial values for private copies (if any).
5207   TaskResultTy Result;
5208   if (!Privates.empty()) {
5209     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5210                      SharedsTy, SharedsPtrTy, Data, Privates,
5211                      /*ForDup=*/false);
5212     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5213         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5214       Result.TaskDupFn = emitTaskDupFunction(
5215           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5216           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5217           /*WithLastIter=*/!Data.LastprivateVars.empty());
5218     }
5219   }
5220   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5221   enum { Priority = 0, Destructors = 1 };
5222   // Provide pointer to function with destructors for privates.
5223   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5224   const RecordDecl *KmpCmplrdataUD =
5225       (*FI)->getType()->getAsUnionType()->getDecl();
5226   if (NeedsCleanup) {
5227     llvm::Value *DestructorFn = emitDestructorsFunction(
5228         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5229         KmpTaskTWithPrivatesQTy);
5230     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5231     LValue DestructorsLV = CGF.EmitLValueForField(
5232         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5233     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5234                               DestructorFn, KmpRoutineEntryPtrTy),
5235                           DestructorsLV);
5236   }
5237   // Set priority.
5238   if (Data.Priority.getInt()) {
5239     LValue Data2LV = CGF.EmitLValueForField(
5240         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5241     LValue PriorityLV = CGF.EmitLValueForField(
5242         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5243     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5244   }
5245   Result.NewTask = NewTask;
5246   Result.TaskEntry = TaskEntry;
5247   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5248   Result.TDBase = TDBase;
5249   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5250   return Result;
5251 }
5252 
5253 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5254                                    const OMPExecutableDirective &D,
5255                                    llvm::Function *TaskFunction,
5256                                    QualType SharedsTy, Address Shareds,
5257                                    const Expr *IfCond,
5258                                    const OMPTaskDataTy &Data) {
5259   if (!CGF.HaveInsertPoint())
5260     return;
5261 
5262   TaskResultTy Result =
5263       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5264   llvm::Value *NewTask = Result.NewTask;
5265   llvm::Function *TaskEntry = Result.TaskEntry;
5266   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5267   LValue TDBase = Result.TDBase;
5268   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5269   ASTContext &C = CGM.getContext();
5270   // Process list of dependences.
5271   Address DependenciesArray = Address::invalid();
5272   unsigned NumDependencies = Data.Dependences.size();
5273   if (NumDependencies) {
5274     // Dependence kind for RTL.
5275     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5276     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5277     RecordDecl *KmpDependInfoRD;
5278     QualType FlagsTy =
5279         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5280     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5281     if (KmpDependInfoTy.isNull()) {
5282       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5283       KmpDependInfoRD->startDefinition();
5284       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5285       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5286       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5287       KmpDependInfoRD->completeDefinition();
5288       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5289     } else {
5290       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5291     }
5292     // Define type kmp_depend_info[<Dependences.size()>];
5293     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5294         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5295         ArrayType::Normal, /*IndexTypeQuals=*/0);
5296     // kmp_depend_info[<Dependences.size()>] deps;
5297     DependenciesArray =
5298         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5299     for (unsigned I = 0; I < NumDependencies; ++I) {
5300       const Expr *E = Data.Dependences[I].second;
5301       LValue Addr = CGF.EmitLValue(E);
5302       llvm::Value *Size;
5303       QualType Ty = E->getType();
5304       if (const auto *ASE =
5305               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5306         LValue UpAddrLVal =
5307             CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5308         llvm::Value *UpAddr =
5309             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5310         llvm::Value *LowIntPtr =
5311             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5312         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5313         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5314       } else {
5315         Size = CGF.getTypeSize(Ty);
5316       }
5317       LValue Base = CGF.MakeAddrLValue(
5318           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5319           KmpDependInfoTy);
5320       // deps[i].base_addr = &<Dependences[i].second>;
5321       LValue BaseAddrLVal = CGF.EmitLValueForField(
5322           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5323       CGF.EmitStoreOfScalar(
5324           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5325           BaseAddrLVal);
5326       // deps[i].len = sizeof(<Dependences[i].second>);
5327       LValue LenLVal = CGF.EmitLValueForField(
5328           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5329       CGF.EmitStoreOfScalar(Size, LenLVal);
5330       // deps[i].flags = <Dependences[i].first>;
5331       RTLDependenceKindTy DepKind;
5332       switch (Data.Dependences[I].first) {
5333       case OMPC_DEPEND_in:
5334         DepKind = DepIn;
5335         break;
5336       // Out and InOut dependencies must use the same code.
5337       case OMPC_DEPEND_out:
5338       case OMPC_DEPEND_inout:
5339         DepKind = DepInOut;
5340         break;
5341       case OMPC_DEPEND_mutexinoutset:
5342         DepKind = DepMutexInOutSet;
5343         break;
5344       case OMPC_DEPEND_source:
5345       case OMPC_DEPEND_sink:
5346       case OMPC_DEPEND_unknown:
5347         llvm_unreachable("Unknown task dependence type");
5348       }
5349       LValue FlagsLVal = CGF.EmitLValueForField(
5350           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5351       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5352                             FlagsLVal);
5353     }
5354     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5355         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5356   }
5357 
5358   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5359   // libcall.
5360   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5361   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5362   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5363   // list is not empty
5364   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5365   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5366   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5367   llvm::Value *DepTaskArgs[7];
5368   if (NumDependencies) {
5369     DepTaskArgs[0] = UpLoc;
5370     DepTaskArgs[1] = ThreadID;
5371     DepTaskArgs[2] = NewTask;
5372     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5373     DepTaskArgs[4] = DependenciesArray.getPointer();
5374     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5375     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5376   }
5377   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5378                         &TaskArgs,
5379                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5380     if (!Data.Tied) {
5381       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5382       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5383       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5384     }
5385     if (NumDependencies) {
5386       CGF.EmitRuntimeCall(
5387           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5388     } else {
5389       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5390                           TaskArgs);
5391     }
5392     // Check if parent region is untied and build return for untied task;
5393     if (auto *Region =
5394             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5395       Region->emitUntiedSwitch(CGF);
5396   };
5397 
5398   llvm::Value *DepWaitTaskArgs[6];
5399   if (NumDependencies) {
5400     DepWaitTaskArgs[0] = UpLoc;
5401     DepWaitTaskArgs[1] = ThreadID;
5402     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5403     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5404     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5405     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5406   }
5407   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5408                         NumDependencies, &DepWaitTaskArgs,
5409                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5410     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5411     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5412     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5413     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5414     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5415     // is specified.
5416     if (NumDependencies)
5417       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5418                           DepWaitTaskArgs);
5419     // Call proxy_task_entry(gtid, new_task);
5420     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5421                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5422       Action.Enter(CGF);
5423       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5424       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5425                                                           OutlinedFnArgs);
5426     };
5427 
5428     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5429     // kmp_task_t *new_task);
5430     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5431     // kmp_task_t *new_task);
5432     RegionCodeGenTy RCG(CodeGen);
5433     CommonActionTy Action(
5434         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5435         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5436     RCG.setAction(Action);
5437     RCG(CGF);
5438   };
5439 
5440   if (IfCond) {
5441     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5442   } else {
5443     RegionCodeGenTy ThenRCG(ThenCodeGen);
5444     ThenRCG(CGF);
5445   }
5446 }
5447 
5448 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5449                                        const OMPLoopDirective &D,
5450                                        llvm::Function *TaskFunction,
5451                                        QualType SharedsTy, Address Shareds,
5452                                        const Expr *IfCond,
5453                                        const OMPTaskDataTy &Data) {
5454   if (!CGF.HaveInsertPoint())
5455     return;
5456   TaskResultTy Result =
5457       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5458   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5459   // libcall.
5460   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5461   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5462   // sched, kmp_uint64 grainsize, void *task_dup);
5463   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5464   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5465   llvm::Value *IfVal;
5466   if (IfCond) {
5467     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5468                                       /*isSigned=*/true);
5469   } else {
5470     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5471   }
5472 
5473   LValue LBLVal = CGF.EmitLValueForField(
5474       Result.TDBase,
5475       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5476   const auto *LBVar =
5477       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5478   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5479                        /*IsInitializer=*/true);
5480   LValue UBLVal = CGF.EmitLValueForField(
5481       Result.TDBase,
5482       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5483   const auto *UBVar =
5484       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5485   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5486                        /*IsInitializer=*/true);
5487   LValue StLVal = CGF.EmitLValueForField(
5488       Result.TDBase,
5489       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5490   const auto *StVar =
5491       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5492   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5493                        /*IsInitializer=*/true);
5494   // Store reductions address.
5495   LValue RedLVal = CGF.EmitLValueForField(
5496       Result.TDBase,
5497       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5498   if (Data.Reductions) {
5499     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5500   } else {
5501     CGF.EmitNullInitialization(RedLVal.getAddress(),
5502                                CGF.getContext().VoidPtrTy);
5503   }
5504   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5505   llvm::Value *TaskArgs[] = {
5506       UpLoc,
5507       ThreadID,
5508       Result.NewTask,
5509       IfVal,
5510       LBLVal.getPointer(),
5511       UBLVal.getPointer(),
5512       CGF.EmitLoadOfScalar(StLVal, Loc),
5513       llvm::ConstantInt::getSigned(
5514               CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5515       llvm::ConstantInt::getSigned(
5516           CGF.IntTy, Data.Schedule.getPointer()
5517                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5518                          : NoSchedule),
5519       Data.Schedule.getPointer()
5520           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5521                                       /*isSigned=*/false)
5522           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5523       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5524                              Result.TaskDupFn, CGF.VoidPtrTy)
5525                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5526   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5527 }
5528 
5529 /// Emit reduction operation for each element of array (required for
5530 /// array sections) LHS op = RHS.
5531 /// \param Type Type of array.
5532 /// \param LHSVar Variable on the left side of the reduction operation
5533 /// (references element of array in original variable).
5534 /// \param RHSVar Variable on the right side of the reduction operation
5535 /// (references element of array in original variable).
5536 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5537 /// RHSVar.
5538 static void EmitOMPAggregateReduction(
5539     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5540     const VarDecl *RHSVar,
5541     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5542                                   const Expr *, const Expr *)> &RedOpGen,
5543     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5544     const Expr *UpExpr = nullptr) {
5545   // Perform element-by-element initialization.
5546   QualType ElementTy;
5547   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5548   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5549 
5550   // Drill down to the base element type on both arrays.
5551   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5552   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5553 
5554   llvm::Value *RHSBegin = RHSAddr.getPointer();
5555   llvm::Value *LHSBegin = LHSAddr.getPointer();
5556   // Cast from pointer to array type to pointer to single element.
5557   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5558   // The basic structure here is a while-do loop.
5559   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5560   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5561   llvm::Value *IsEmpty =
5562       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5563   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5564 
5565   // Enter the loop body, making that address the current address.
5566   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5567   CGF.EmitBlock(BodyBB);
5568 
5569   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5570 
5571   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5572       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5573   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5574   Address RHSElementCurrent =
5575       Address(RHSElementPHI,
5576               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5577 
5578   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5579       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5580   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5581   Address LHSElementCurrent =
5582       Address(LHSElementPHI,
5583               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5584 
5585   // Emit copy.
5586   CodeGenFunction::OMPPrivateScope Scope(CGF);
5587   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5588   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5589   Scope.Privatize();
5590   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5591   Scope.ForceCleanup();
5592 
5593   // Shift the address forward by one element.
5594   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5595       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5596   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5597       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5598   // Check whether we've reached the end.
5599   llvm::Value *Done =
5600       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5601   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5602   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5603   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5604 
5605   // Done.
5606   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5607 }
5608 
5609 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5610 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5611 /// UDR combiner function.
5612 static void emitReductionCombiner(CodeGenFunction &CGF,
5613                                   const Expr *ReductionOp) {
5614   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5615     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5616       if (const auto *DRE =
5617               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5618         if (const auto *DRD =
5619                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5620           std::pair<llvm::Function *, llvm::Function *> Reduction =
5621               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5622           RValue Func = RValue::get(Reduction.first);
5623           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5624           CGF.EmitIgnoredExpr(ReductionOp);
5625           return;
5626         }
5627   CGF.EmitIgnoredExpr(ReductionOp);
5628 }
5629 
5630 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5631     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5632     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5633     ArrayRef<const Expr *> ReductionOps) {
5634   ASTContext &C = CGM.getContext();
5635 
5636   // void reduction_func(void *LHSArg, void *RHSArg);
5637   FunctionArgList Args;
5638   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5639                            ImplicitParamDecl::Other);
5640   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5641                            ImplicitParamDecl::Other);
5642   Args.push_back(&LHSArg);
5643   Args.push_back(&RHSArg);
5644   const auto &CGFI =
5645       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5646   std::string Name = getName({"omp", "reduction", "reduction_func"});
5647   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5648                                     llvm::GlobalValue::InternalLinkage, Name,
5649                                     &CGM.getModule());
5650   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5651   Fn->setDoesNotRecurse();
5652   CodeGenFunction CGF(CGM);
5653   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5654 
5655   // Dst = (void*[n])(LHSArg);
5656   // Src = (void*[n])(RHSArg);
5657   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5658       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5659       ArgsType), CGF.getPointerAlign());
5660   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5661       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5662       ArgsType), CGF.getPointerAlign());
5663 
5664   //  ...
5665   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5666   //  ...
5667   CodeGenFunction::OMPPrivateScope Scope(CGF);
5668   auto IPriv = Privates.begin();
5669   unsigned Idx = 0;
5670   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5671     const auto *RHSVar =
5672         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5673     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5674       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5675     });
5676     const auto *LHSVar =
5677         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5678     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5679       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5680     });
5681     QualType PrivTy = (*IPriv)->getType();
5682     if (PrivTy->isVariablyModifiedType()) {
5683       // Get array size and emit VLA type.
5684       ++Idx;
5685       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5686       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5687       const VariableArrayType *VLA =
5688           CGF.getContext().getAsVariableArrayType(PrivTy);
5689       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5690       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5691           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5692       CGF.EmitVariablyModifiedType(PrivTy);
5693     }
5694   }
5695   Scope.Privatize();
5696   IPriv = Privates.begin();
5697   auto ILHS = LHSExprs.begin();
5698   auto IRHS = RHSExprs.begin();
5699   for (const Expr *E : ReductionOps) {
5700     if ((*IPriv)->getType()->isArrayType()) {
5701       // Emit reduction for array section.
5702       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5703       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5704       EmitOMPAggregateReduction(
5705           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5706           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5707             emitReductionCombiner(CGF, E);
5708           });
5709     } else {
5710       // Emit reduction for array subscript or single variable.
5711       emitReductionCombiner(CGF, E);
5712     }
5713     ++IPriv;
5714     ++ILHS;
5715     ++IRHS;
5716   }
5717   Scope.ForceCleanup();
5718   CGF.FinishFunction();
5719   return Fn;
5720 }
5721 
5722 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5723                                                   const Expr *ReductionOp,
5724                                                   const Expr *PrivateRef,
5725                                                   const DeclRefExpr *LHS,
5726                                                   const DeclRefExpr *RHS) {
5727   if (PrivateRef->getType()->isArrayType()) {
5728     // Emit reduction for array section.
5729     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5730     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5731     EmitOMPAggregateReduction(
5732         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5733         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5734           emitReductionCombiner(CGF, ReductionOp);
5735         });
5736   } else {
5737     // Emit reduction for array subscript or single variable.
5738     emitReductionCombiner(CGF, ReductionOp);
5739   }
5740 }
5741 
5742 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5743                                     ArrayRef<const Expr *> Privates,
5744                                     ArrayRef<const Expr *> LHSExprs,
5745                                     ArrayRef<const Expr *> RHSExprs,
5746                                     ArrayRef<const Expr *> ReductionOps,
5747                                     ReductionOptionsTy Options) {
5748   if (!CGF.HaveInsertPoint())
5749     return;
5750 
5751   bool WithNowait = Options.WithNowait;
5752   bool SimpleReduction = Options.SimpleReduction;
5753 
5754   // Next code should be emitted for reduction:
5755   //
5756   // static kmp_critical_name lock = { 0 };
5757   //
5758   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5759   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5760   //  ...
5761   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5762   //  *(Type<n>-1*)rhs[<n>-1]);
5763   // }
5764   //
5765   // ...
5766   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5767   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5768   // RedList, reduce_func, &<lock>)) {
5769   // case 1:
5770   //  ...
5771   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5772   //  ...
5773   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5774   // break;
5775   // case 2:
5776   //  ...
5777   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5778   //  ...
5779   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5780   // break;
5781   // default:;
5782   // }
5783   //
5784   // if SimpleReduction is true, only the next code is generated:
5785   //  ...
5786   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5787   //  ...
5788 
5789   ASTContext &C = CGM.getContext();
5790 
5791   if (SimpleReduction) {
5792     CodeGenFunction::RunCleanupsScope Scope(CGF);
5793     auto IPriv = Privates.begin();
5794     auto ILHS = LHSExprs.begin();
5795     auto IRHS = RHSExprs.begin();
5796     for (const Expr *E : ReductionOps) {
5797       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5798                                   cast<DeclRefExpr>(*IRHS));
5799       ++IPriv;
5800       ++ILHS;
5801       ++IRHS;
5802     }
5803     return;
5804   }
5805 
5806   // 1. Build a list of reduction variables.
5807   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5808   auto Size = RHSExprs.size();
5809   for (const Expr *E : Privates) {
5810     if (E->getType()->isVariablyModifiedType())
5811       // Reserve place for array size.
5812       ++Size;
5813   }
5814   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5815   QualType ReductionArrayTy =
5816       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5817                              /*IndexTypeQuals=*/0);
5818   Address ReductionList =
5819       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5820   auto IPriv = Privates.begin();
5821   unsigned Idx = 0;
5822   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5823     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5824     CGF.Builder.CreateStore(
5825         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5826             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5827         Elem);
5828     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5829       // Store array size.
5830       ++Idx;
5831       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5832       llvm::Value *Size = CGF.Builder.CreateIntCast(
5833           CGF.getVLASize(
5834                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5835               .NumElts,
5836           CGF.SizeTy, /*isSigned=*/false);
5837       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5838                               Elem);
5839     }
5840   }
5841 
5842   // 2. Emit reduce_func().
5843   llvm::Function *ReductionFn = emitReductionFunction(
5844       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5845       LHSExprs, RHSExprs, ReductionOps);
5846 
5847   // 3. Create static kmp_critical_name lock = { 0 };
5848   std::string Name = getName({"reduction"});
5849   llvm::Value *Lock = getCriticalRegionLock(Name);
5850 
5851   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5852   // RedList, reduce_func, &<lock>);
5853   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5854   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5855   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5856   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5857       ReductionList.getPointer(), CGF.VoidPtrTy);
5858   llvm::Value *Args[] = {
5859       IdentTLoc,                             // ident_t *<loc>
5860       ThreadId,                              // i32 <gtid>
5861       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5862       ReductionArrayTySize,                  // size_type sizeof(RedList)
5863       RL,                                    // void *RedList
5864       ReductionFn, // void (*) (void *, void *) <reduce_func>
5865       Lock         // kmp_critical_name *&<lock>
5866   };
5867   llvm::Value *Res = CGF.EmitRuntimeCall(
5868       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5869                                        : OMPRTL__kmpc_reduce),
5870       Args);
5871 
5872   // 5. Build switch(res)
5873   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5874   llvm::SwitchInst *SwInst =
5875       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5876 
5877   // 6. Build case 1:
5878   //  ...
5879   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5880   //  ...
5881   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5882   // break;
5883   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5884   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5885   CGF.EmitBlock(Case1BB);
5886 
5887   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5888   llvm::Value *EndArgs[] = {
5889       IdentTLoc, // ident_t *<loc>
5890       ThreadId,  // i32 <gtid>
5891       Lock       // kmp_critical_name *&<lock>
5892   };
5893   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5894                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5895     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5896     auto IPriv = Privates.begin();
5897     auto ILHS = LHSExprs.begin();
5898     auto IRHS = RHSExprs.begin();
5899     for (const Expr *E : ReductionOps) {
5900       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5901                                      cast<DeclRefExpr>(*IRHS));
5902       ++IPriv;
5903       ++ILHS;
5904       ++IRHS;
5905     }
5906   };
5907   RegionCodeGenTy RCG(CodeGen);
5908   CommonActionTy Action(
5909       nullptr, llvm::None,
5910       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5911                                        : OMPRTL__kmpc_end_reduce),
5912       EndArgs);
5913   RCG.setAction(Action);
5914   RCG(CGF);
5915 
5916   CGF.EmitBranch(DefaultBB);
5917 
5918   // 7. Build case 2:
5919   //  ...
5920   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5921   //  ...
5922   // break;
5923   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5924   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5925   CGF.EmitBlock(Case2BB);
5926 
5927   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5928                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5929     auto ILHS = LHSExprs.begin();
5930     auto IRHS = RHSExprs.begin();
5931     auto IPriv = Privates.begin();
5932     for (const Expr *E : ReductionOps) {
5933       const Expr *XExpr = nullptr;
5934       const Expr *EExpr = nullptr;
5935       const Expr *UpExpr = nullptr;
5936       BinaryOperatorKind BO = BO_Comma;
5937       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5938         if (BO->getOpcode() == BO_Assign) {
5939           XExpr = BO->getLHS();
5940           UpExpr = BO->getRHS();
5941         }
5942       }
5943       // Try to emit update expression as a simple atomic.
5944       const Expr *RHSExpr = UpExpr;
5945       if (RHSExpr) {
5946         // Analyze RHS part of the whole expression.
5947         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5948                 RHSExpr->IgnoreParenImpCasts())) {
5949           // If this is a conditional operator, analyze its condition for
5950           // min/max reduction operator.
5951           RHSExpr = ACO->getCond();
5952         }
5953         if (const auto *BORHS =
5954                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5955           EExpr = BORHS->getRHS();
5956           BO = BORHS->getOpcode();
5957         }
5958       }
5959       if (XExpr) {
5960         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5961         auto &&AtomicRedGen = [BO, VD,
5962                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5963                                     const Expr *EExpr, const Expr *UpExpr) {
5964           LValue X = CGF.EmitLValue(XExpr);
5965           RValue E;
5966           if (EExpr)
5967             E = CGF.EmitAnyExpr(EExpr);
5968           CGF.EmitOMPAtomicSimpleUpdateExpr(
5969               X, E, BO, /*IsXLHSInRHSPart=*/true,
5970               llvm::AtomicOrdering::Monotonic, Loc,
5971               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5972                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5973                 PrivateScope.addPrivate(
5974                     VD, [&CGF, VD, XRValue, Loc]() {
5975                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5976                       CGF.emitOMPSimpleStore(
5977                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5978                           VD->getType().getNonReferenceType(), Loc);
5979                       return LHSTemp;
5980                     });
5981                 (void)PrivateScope.Privatize();
5982                 return CGF.EmitAnyExpr(UpExpr);
5983               });
5984         };
5985         if ((*IPriv)->getType()->isArrayType()) {
5986           // Emit atomic reduction for array section.
5987           const auto *RHSVar =
5988               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5989           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5990                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5991         } else {
5992           // Emit atomic reduction for array subscript or single variable.
5993           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5994         }
5995       } else {
5996         // Emit as a critical region.
5997         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5998                                            const Expr *, const Expr *) {
5999           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6000           std::string Name = RT.getName({"atomic_reduction"});
6001           RT.emitCriticalRegion(
6002               CGF, Name,
6003               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
6004                 Action.Enter(CGF);
6005                 emitReductionCombiner(CGF, E);
6006               },
6007               Loc);
6008         };
6009         if ((*IPriv)->getType()->isArrayType()) {
6010           const auto *LHSVar =
6011               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6012           const auto *RHSVar =
6013               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6014           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
6015                                     CritRedGen);
6016         } else {
6017           CritRedGen(CGF, nullptr, nullptr, nullptr);
6018         }
6019       }
6020       ++ILHS;
6021       ++IRHS;
6022       ++IPriv;
6023     }
6024   };
6025   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
6026   if (!WithNowait) {
6027     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
6028     llvm::Value *EndArgs[] = {
6029         IdentTLoc, // ident_t *<loc>
6030         ThreadId,  // i32 <gtid>
6031         Lock       // kmp_critical_name *&<lock>
6032     };
6033     CommonActionTy Action(nullptr, llvm::None,
6034                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
6035                           EndArgs);
6036     AtomicRCG.setAction(Action);
6037     AtomicRCG(CGF);
6038   } else {
6039     AtomicRCG(CGF);
6040   }
6041 
6042   CGF.EmitBranch(DefaultBB);
6043   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
6044 }
6045 
6046 /// Generates unique name for artificial threadprivate variables.
6047 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
6048 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
6049                                       const Expr *Ref) {
6050   SmallString<256> Buffer;
6051   llvm::raw_svector_ostream Out(Buffer);
6052   const clang::DeclRefExpr *DE;
6053   const VarDecl *D = ::getBaseDecl(Ref, DE);
6054   if (!D)
6055     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6056   D = D->getCanonicalDecl();
6057   std::string Name = CGM.getOpenMPRuntime().getName(
6058       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6059   Out << Prefix << Name << "_"
6060       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6061   return Out.str();
6062 }
6063 
6064 /// Emits reduction initializer function:
6065 /// \code
6066 /// void @.red_init(void* %arg) {
6067 /// %0 = bitcast void* %arg to <type>*
6068 /// store <type> <init>, <type>* %0
6069 /// ret void
6070 /// }
6071 /// \endcode
6072 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6073                                            SourceLocation Loc,
6074                                            ReductionCodeGen &RCG, unsigned N) {
6075   ASTContext &C = CGM.getContext();
6076   FunctionArgList Args;
6077   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6078                           ImplicitParamDecl::Other);
6079   Args.emplace_back(&Param);
6080   const auto &FnInfo =
6081       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6082   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6083   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6084   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6085                                     Name, &CGM.getModule());
6086   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6087   Fn->setDoesNotRecurse();
6088   CodeGenFunction CGF(CGM);
6089   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6090   Address PrivateAddr = CGF.EmitLoadOfPointer(
6091       CGF.GetAddrOfLocalVar(&Param),
6092       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6093   llvm::Value *Size = nullptr;
6094   // If the size of the reduction item is non-constant, load it from global
6095   // threadprivate variable.
6096   if (RCG.getSizes(N).second) {
6097     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6098         CGF, CGM.getContext().getSizeType(),
6099         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6100     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6101                                 CGM.getContext().getSizeType(), Loc);
6102   }
6103   RCG.emitAggregateType(CGF, N, Size);
6104   LValue SharedLVal;
6105   // If initializer uses initializer from declare reduction construct, emit a
6106   // pointer to the address of the original reduction item (reuired by reduction
6107   // initializer)
6108   if (RCG.usesReductionInitializer(N)) {
6109     Address SharedAddr =
6110         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6111             CGF, CGM.getContext().VoidPtrTy,
6112             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6113     SharedAddr = CGF.EmitLoadOfPointer(
6114         SharedAddr,
6115         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6116     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6117   } else {
6118     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6119         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6120         CGM.getContext().VoidPtrTy);
6121   }
6122   // Emit the initializer:
6123   // %0 = bitcast void* %arg to <type>*
6124   // store <type> <init>, <type>* %0
6125   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6126                          [](CodeGenFunction &) { return false; });
6127   CGF.FinishFunction();
6128   return Fn;
6129 }
6130 
6131 /// Emits reduction combiner function:
6132 /// \code
6133 /// void @.red_comb(void* %arg0, void* %arg1) {
6134 /// %lhs = bitcast void* %arg0 to <type>*
6135 /// %rhs = bitcast void* %arg1 to <type>*
6136 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6137 /// store <type> %2, <type>* %lhs
6138 /// ret void
6139 /// }
6140 /// \endcode
6141 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6142                                            SourceLocation Loc,
6143                                            ReductionCodeGen &RCG, unsigned N,
6144                                            const Expr *ReductionOp,
6145                                            const Expr *LHS, const Expr *RHS,
6146                                            const Expr *PrivateRef) {
6147   ASTContext &C = CGM.getContext();
6148   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6149   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6150   FunctionArgList Args;
6151   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6152                                C.VoidPtrTy, ImplicitParamDecl::Other);
6153   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6154                             ImplicitParamDecl::Other);
6155   Args.emplace_back(&ParamInOut);
6156   Args.emplace_back(&ParamIn);
6157   const auto &FnInfo =
6158       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6159   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6160   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6161   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6162                                     Name, &CGM.getModule());
6163   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6164   Fn->setDoesNotRecurse();
6165   CodeGenFunction CGF(CGM);
6166   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6167   llvm::Value *Size = nullptr;
6168   // If the size of the reduction item is non-constant, load it from global
6169   // threadprivate variable.
6170   if (RCG.getSizes(N).second) {
6171     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6172         CGF, CGM.getContext().getSizeType(),
6173         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6174     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6175                                 CGM.getContext().getSizeType(), Loc);
6176   }
6177   RCG.emitAggregateType(CGF, N, Size);
6178   // Remap lhs and rhs variables to the addresses of the function arguments.
6179   // %lhs = bitcast void* %arg0 to <type>*
6180   // %rhs = bitcast void* %arg1 to <type>*
6181   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6182   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6183     // Pull out the pointer to the variable.
6184     Address PtrAddr = CGF.EmitLoadOfPointer(
6185         CGF.GetAddrOfLocalVar(&ParamInOut),
6186         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6187     return CGF.Builder.CreateElementBitCast(
6188         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6189   });
6190   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6191     // Pull out the pointer to the variable.
6192     Address PtrAddr = CGF.EmitLoadOfPointer(
6193         CGF.GetAddrOfLocalVar(&ParamIn),
6194         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6195     return CGF.Builder.CreateElementBitCast(
6196         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6197   });
6198   PrivateScope.Privatize();
6199   // Emit the combiner body:
6200   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6201   // store <type> %2, <type>* %lhs
6202   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6203       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6204       cast<DeclRefExpr>(RHS));
6205   CGF.FinishFunction();
6206   return Fn;
6207 }
6208 
6209 /// Emits reduction finalizer function:
6210 /// \code
6211 /// void @.red_fini(void* %arg) {
6212 /// %0 = bitcast void* %arg to <type>*
6213 /// <destroy>(<type>* %0)
6214 /// ret void
6215 /// }
6216 /// \endcode
6217 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6218                                            SourceLocation Loc,
6219                                            ReductionCodeGen &RCG, unsigned N) {
6220   if (!RCG.needCleanups(N))
6221     return nullptr;
6222   ASTContext &C = CGM.getContext();
6223   FunctionArgList Args;
6224   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6225                           ImplicitParamDecl::Other);
6226   Args.emplace_back(&Param);
6227   const auto &FnInfo =
6228       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6229   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6230   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6231   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6232                                     Name, &CGM.getModule());
6233   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6234   Fn->setDoesNotRecurse();
6235   CodeGenFunction CGF(CGM);
6236   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6237   Address PrivateAddr = CGF.EmitLoadOfPointer(
6238       CGF.GetAddrOfLocalVar(&Param),
6239       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6240   llvm::Value *Size = nullptr;
6241   // If the size of the reduction item is non-constant, load it from global
6242   // threadprivate variable.
6243   if (RCG.getSizes(N).second) {
6244     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6245         CGF, CGM.getContext().getSizeType(),
6246         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6247     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6248                                 CGM.getContext().getSizeType(), Loc);
6249   }
6250   RCG.emitAggregateType(CGF, N, Size);
6251   // Emit the finalizer body:
6252   // <destroy>(<type>* %0)
6253   RCG.emitCleanups(CGF, N, PrivateAddr);
6254   CGF.FinishFunction();
6255   return Fn;
6256 }
6257 
6258 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6259     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6260     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6261   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6262     return nullptr;
6263 
6264   // Build typedef struct:
6265   // kmp_task_red_input {
6266   //   void *reduce_shar; // shared reduction item
6267   //   size_t reduce_size; // size of data item
6268   //   void *reduce_init; // data initialization routine
6269   //   void *reduce_fini; // data finalization routine
6270   //   void *reduce_comb; // data combiner routine
6271   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6272   // } kmp_task_red_input_t;
6273   ASTContext &C = CGM.getContext();
6274   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6275   RD->startDefinition();
6276   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6277   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6278   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6279   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6280   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6281   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6282       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6283   RD->completeDefinition();
6284   QualType RDType = C.getRecordType(RD);
6285   unsigned Size = Data.ReductionVars.size();
6286   llvm::APInt ArraySize(/*numBits=*/64, Size);
6287   QualType ArrayRDType = C.getConstantArrayType(
6288       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6289   // kmp_task_red_input_t .rd_input.[Size];
6290   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6291   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6292                        Data.ReductionOps);
6293   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6294     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6295     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6296                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6297     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6298         TaskRedInput.getPointer(), Idxs,
6299         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6300         ".rd_input.gep.");
6301     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6302     // ElemLVal.reduce_shar = &Shareds[Cnt];
6303     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6304     RCG.emitSharedLValue(CGF, Cnt);
6305     llvm::Value *CastedShared =
6306         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6307     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6308     RCG.emitAggregateType(CGF, Cnt);
6309     llvm::Value *SizeValInChars;
6310     llvm::Value *SizeVal;
6311     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6312     // We use delayed creation/initialization for VLAs, array sections and
6313     // custom reduction initializations. It is required because runtime does not
6314     // provide the way to pass the sizes of VLAs/array sections to
6315     // initializer/combiner/finalizer functions and does not pass the pointer to
6316     // original reduction item to the initializer. Instead threadprivate global
6317     // variables are used to store these values and use them in the functions.
6318     bool DelayedCreation = !!SizeVal;
6319     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6320                                                /*isSigned=*/false);
6321     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6322     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6323     // ElemLVal.reduce_init = init;
6324     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6325     llvm::Value *InitAddr =
6326         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6327     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6328     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6329     // ElemLVal.reduce_fini = fini;
6330     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6331     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6332     llvm::Value *FiniAddr = Fini
6333                                 ? CGF.EmitCastToVoidPtr(Fini)
6334                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6335     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6336     // ElemLVal.reduce_comb = comb;
6337     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6338     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6339         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6340         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6341     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6342     // ElemLVal.flags = 0;
6343     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6344     if (DelayedCreation) {
6345       CGF.EmitStoreOfScalar(
6346           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6347           FlagsLVal);
6348     } else
6349       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6350   }
6351   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6352   // *data);
6353   llvm::Value *Args[] = {
6354       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6355                                 /*isSigned=*/true),
6356       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6357       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6358                                                       CGM.VoidPtrTy)};
6359   return CGF.EmitRuntimeCall(
6360       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6361 }
6362 
6363 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6364                                               SourceLocation Loc,
6365                                               ReductionCodeGen &RCG,
6366                                               unsigned N) {
6367   auto Sizes = RCG.getSizes(N);
6368   // Emit threadprivate global variable if the type is non-constant
6369   // (Sizes.second = nullptr).
6370   if (Sizes.second) {
6371     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6372                                                      /*isSigned=*/false);
6373     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6374         CGF, CGM.getContext().getSizeType(),
6375         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6376     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6377   }
6378   // Store address of the original reduction item if custom initializer is used.
6379   if (RCG.usesReductionInitializer(N)) {
6380     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6381         CGF, CGM.getContext().VoidPtrTy,
6382         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6383     CGF.Builder.CreateStore(
6384         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6385             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6386         SharedAddr, /*IsVolatile=*/false);
6387   }
6388 }
6389 
6390 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6391                                               SourceLocation Loc,
6392                                               llvm::Value *ReductionsPtr,
6393                                               LValue SharedLVal) {
6394   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6395   // *d);
6396   llvm::Value *Args[] = {
6397       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6398                                 /*isSigned=*/true),
6399       ReductionsPtr,
6400       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6401                                                       CGM.VoidPtrTy)};
6402   return Address(
6403       CGF.EmitRuntimeCall(
6404           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6405       SharedLVal.getAlignment());
6406 }
6407 
6408 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6409                                        SourceLocation Loc) {
6410   if (!CGF.HaveInsertPoint())
6411     return;
6412   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6413   // global_tid);
6414   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6415   // Ignore return result until untied tasks are supported.
6416   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6417   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6418     Region->emitUntiedSwitch(CGF);
6419 }
6420 
6421 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6422                                            OpenMPDirectiveKind InnerKind,
6423                                            const RegionCodeGenTy &CodeGen,
6424                                            bool HasCancel) {
6425   if (!CGF.HaveInsertPoint())
6426     return;
6427   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6428   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6429 }
6430 
6431 namespace {
6432 enum RTCancelKind {
6433   CancelNoreq = 0,
6434   CancelParallel = 1,
6435   CancelLoop = 2,
6436   CancelSections = 3,
6437   CancelTaskgroup = 4
6438 };
6439 } // anonymous namespace
6440 
6441 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6442   RTCancelKind CancelKind = CancelNoreq;
6443   if (CancelRegion == OMPD_parallel)
6444     CancelKind = CancelParallel;
6445   else if (CancelRegion == OMPD_for)
6446     CancelKind = CancelLoop;
6447   else if (CancelRegion == OMPD_sections)
6448     CancelKind = CancelSections;
6449   else {
6450     assert(CancelRegion == OMPD_taskgroup);
6451     CancelKind = CancelTaskgroup;
6452   }
6453   return CancelKind;
6454 }
6455 
6456 void CGOpenMPRuntime::emitCancellationPointCall(
6457     CodeGenFunction &CGF, SourceLocation Loc,
6458     OpenMPDirectiveKind CancelRegion) {
6459   if (!CGF.HaveInsertPoint())
6460     return;
6461   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6462   // global_tid, kmp_int32 cncl_kind);
6463   if (auto *OMPRegionInfo =
6464           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6465     // For 'cancellation point taskgroup', the task region info may not have a
6466     // cancel. This may instead happen in another adjacent task.
6467     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6468       llvm::Value *Args[] = {
6469           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6470           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6471       // Ignore return result until untied tasks are supported.
6472       llvm::Value *Result = CGF.EmitRuntimeCall(
6473           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6474       // if (__kmpc_cancellationpoint()) {
6475       //   exit from construct;
6476       // }
6477       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6478       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6479       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6480       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6481       CGF.EmitBlock(ExitBB);
6482       // exit from construct;
6483       CodeGenFunction::JumpDest CancelDest =
6484           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6485       CGF.EmitBranchThroughCleanup(CancelDest);
6486       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6487     }
6488   }
6489 }
6490 
6491 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6492                                      const Expr *IfCond,
6493                                      OpenMPDirectiveKind CancelRegion) {
6494   if (!CGF.HaveInsertPoint())
6495     return;
6496   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6497   // kmp_int32 cncl_kind);
6498   if (auto *OMPRegionInfo =
6499           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6500     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6501                                                         PrePostActionTy &) {
6502       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6503       llvm::Value *Args[] = {
6504           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6505           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6506       // Ignore return result until untied tasks are supported.
6507       llvm::Value *Result = CGF.EmitRuntimeCall(
6508           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6509       // if (__kmpc_cancel()) {
6510       //   exit from construct;
6511       // }
6512       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6513       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6514       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6515       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6516       CGF.EmitBlock(ExitBB);
6517       // exit from construct;
6518       CodeGenFunction::JumpDest CancelDest =
6519           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6520       CGF.EmitBranchThroughCleanup(CancelDest);
6521       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6522     };
6523     if (IfCond) {
6524       emitOMPIfClause(CGF, IfCond, ThenGen,
6525                       [](CodeGenFunction &, PrePostActionTy &) {});
6526     } else {
6527       RegionCodeGenTy ThenRCG(ThenGen);
6528       ThenRCG(CGF);
6529     }
6530   }
6531 }
6532 
6533 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6534     const OMPExecutableDirective &D, StringRef ParentName,
6535     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6536     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6537   assert(!ParentName.empty() && "Invalid target region parent name!");
6538   HasEmittedTargetRegion = true;
6539   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6540                                    IsOffloadEntry, CodeGen);
6541 }
6542 
6543 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6544     const OMPExecutableDirective &D, StringRef ParentName,
6545     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6546     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6547   // Create a unique name for the entry function using the source location
6548   // information of the current target region. The name will be something like:
6549   //
6550   // __omp_offloading_DD_FFFF_PP_lBB
6551   //
6552   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6553   // mangled name of the function that encloses the target region and BB is the
6554   // line number of the target region.
6555 
6556   unsigned DeviceID;
6557   unsigned FileID;
6558   unsigned Line;
6559   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6560                            Line);
6561   SmallString<64> EntryFnName;
6562   {
6563     llvm::raw_svector_ostream OS(EntryFnName);
6564     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6565        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6566   }
6567 
6568   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6569 
6570   CodeGenFunction CGF(CGM, true);
6571   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6572   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6573 
6574   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6575 
6576   // If this target outline function is not an offload entry, we don't need to
6577   // register it.
6578   if (!IsOffloadEntry)
6579     return;
6580 
6581   // The target region ID is used by the runtime library to identify the current
6582   // target region, so it only has to be unique and not necessarily point to
6583   // anything. It could be the pointer to the outlined function that implements
6584   // the target region, but we aren't using that so that the compiler doesn't
6585   // need to keep that, and could therefore inline the host function if proven
6586   // worthwhile during optimization. In the other hand, if emitting code for the
6587   // device, the ID has to be the function address so that it can retrieved from
6588   // the offloading entry and launched by the runtime library. We also mark the
6589   // outlined function to have external linkage in case we are emitting code for
6590   // the device, because these functions will be entry points to the device.
6591 
6592   if (CGM.getLangOpts().OpenMPIsDevice) {
6593     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6594     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6595     OutlinedFn->setDSOLocal(false);
6596   } else {
6597     std::string Name = getName({EntryFnName, "region_id"});
6598     OutlinedFnID = new llvm::GlobalVariable(
6599         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6600         llvm::GlobalValue::WeakAnyLinkage,
6601         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6602   }
6603 
6604   // Register the information for the entry associated with this target region.
6605   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6606       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6607       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6608 }
6609 
6610 /// Checks if the expression is constant or does not have non-trivial function
6611 /// calls.
6612 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6613   // We can skip constant expressions.
6614   // We can skip expressions with trivial calls or simple expressions.
6615   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6616           !E->hasNonTrivialCall(Ctx)) &&
6617          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6618 }
6619 
6620 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6621                                                     const Stmt *Body) {
6622   const Stmt *Child = Body->IgnoreContainers();
6623   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6624     Child = nullptr;
6625     for (const Stmt *S : C->body()) {
6626       if (const auto *E = dyn_cast<Expr>(S)) {
6627         if (isTrivial(Ctx, E))
6628           continue;
6629       }
6630       // Some of the statements can be ignored.
6631       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6632           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6633         continue;
6634       // Analyze declarations.
6635       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6636         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6637               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6638                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6639                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6640                   isa<UsingDirectiveDecl>(D) ||
6641                   isa<OMPDeclareReductionDecl>(D) ||
6642                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6643                 return true;
6644               const auto *VD = dyn_cast<VarDecl>(D);
6645               if (!VD)
6646                 return false;
6647               return VD->isConstexpr() ||
6648                      ((VD->getType().isTrivialType(Ctx) ||
6649                        VD->getType()->isReferenceType()) &&
6650                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6651             }))
6652           continue;
6653       }
6654       // Found multiple children - cannot get the one child only.
6655       if (Child)
6656         return nullptr;
6657       Child = S;
6658     }
6659     if (Child)
6660       Child = Child->IgnoreContainers();
6661   }
6662   return Child;
6663 }
6664 
6665 /// Emit the number of teams for a target directive.  Inspect the num_teams
6666 /// clause associated with a teams construct combined or closely nested
6667 /// with the target directive.
6668 ///
6669 /// Emit a team of size one for directives such as 'target parallel' that
6670 /// have no associated teams construct.
6671 ///
6672 /// Otherwise, return nullptr.
6673 static llvm::Value *
6674 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6675                                const OMPExecutableDirective &D) {
6676   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6677          "Clauses associated with the teams directive expected to be emitted "
6678          "only for the host!");
6679   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6680   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6681          "Expected target-based executable directive.");
6682   CGBuilderTy &Bld = CGF.Builder;
6683   switch (DirectiveKind) {
6684   case OMPD_target: {
6685     const auto *CS = D.getInnermostCapturedStmt();
6686     const auto *Body =
6687         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6688     const Stmt *ChildStmt =
6689         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6690     if (const auto *NestedDir =
6691             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6692       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6693         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6694           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6695           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6696           const Expr *NumTeams =
6697               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6698           llvm::Value *NumTeamsVal =
6699               CGF.EmitScalarExpr(NumTeams,
6700                                  /*IgnoreResultAssign*/ true);
6701           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6702                                    /*isSigned=*/true);
6703         }
6704         return Bld.getInt32(0);
6705       }
6706       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6707           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6708         return Bld.getInt32(1);
6709       return Bld.getInt32(0);
6710     }
6711     return nullptr;
6712   }
6713   case OMPD_target_teams:
6714   case OMPD_target_teams_distribute:
6715   case OMPD_target_teams_distribute_simd:
6716   case OMPD_target_teams_distribute_parallel_for:
6717   case OMPD_target_teams_distribute_parallel_for_simd: {
6718     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6719       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6720       const Expr *NumTeams =
6721           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6722       llvm::Value *NumTeamsVal =
6723           CGF.EmitScalarExpr(NumTeams,
6724                              /*IgnoreResultAssign*/ true);
6725       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6726                                /*isSigned=*/true);
6727     }
6728     return Bld.getInt32(0);
6729   }
6730   case OMPD_target_parallel:
6731   case OMPD_target_parallel_for:
6732   case OMPD_target_parallel_for_simd:
6733   case OMPD_target_simd:
6734     return Bld.getInt32(1);
6735   case OMPD_parallel:
6736   case OMPD_for:
6737   case OMPD_parallel_for:
6738   case OMPD_parallel_sections:
6739   case OMPD_for_simd:
6740   case OMPD_parallel_for_simd:
6741   case OMPD_cancel:
6742   case OMPD_cancellation_point:
6743   case OMPD_ordered:
6744   case OMPD_threadprivate:
6745   case OMPD_allocate:
6746   case OMPD_task:
6747   case OMPD_simd:
6748   case OMPD_sections:
6749   case OMPD_section:
6750   case OMPD_single:
6751   case OMPD_master:
6752   case OMPD_critical:
6753   case OMPD_taskyield:
6754   case OMPD_barrier:
6755   case OMPD_taskwait:
6756   case OMPD_taskgroup:
6757   case OMPD_atomic:
6758   case OMPD_flush:
6759   case OMPD_teams:
6760   case OMPD_target_data:
6761   case OMPD_target_exit_data:
6762   case OMPD_target_enter_data:
6763   case OMPD_distribute:
6764   case OMPD_distribute_simd:
6765   case OMPD_distribute_parallel_for:
6766   case OMPD_distribute_parallel_for_simd:
6767   case OMPD_teams_distribute:
6768   case OMPD_teams_distribute_simd:
6769   case OMPD_teams_distribute_parallel_for:
6770   case OMPD_teams_distribute_parallel_for_simd:
6771   case OMPD_target_update:
6772   case OMPD_declare_simd:
6773   case OMPD_declare_target:
6774   case OMPD_end_declare_target:
6775   case OMPD_declare_reduction:
6776   case OMPD_declare_mapper:
6777   case OMPD_taskloop:
6778   case OMPD_taskloop_simd:
6779   case OMPD_requires:
6780   case OMPD_unknown:
6781     break;
6782   }
6783   llvm_unreachable("Unexpected directive kind.");
6784 }
6785 
6786 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6787                                   llvm::Value *DefaultThreadLimitVal) {
6788   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6789       CGF.getContext(), CS->getCapturedStmt());
6790   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6791     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6792       llvm::Value *NumThreads = nullptr;
6793       llvm::Value *CondVal = nullptr;
6794       // Handle if clause. If if clause present, the number of threads is
6795       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6796       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6797         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6798         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6799         const OMPIfClause *IfClause = nullptr;
6800         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6801           if (C->getNameModifier() == OMPD_unknown ||
6802               C->getNameModifier() == OMPD_parallel) {
6803             IfClause = C;
6804             break;
6805           }
6806         }
6807         if (IfClause) {
6808           const Expr *Cond = IfClause->getCondition();
6809           bool Result;
6810           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6811             if (!Result)
6812               return CGF.Builder.getInt32(1);
6813           } else {
6814             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6815             if (const auto *PreInit =
6816                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6817               for (const auto *I : PreInit->decls()) {
6818                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6819                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6820                 } else {
6821                   CodeGenFunction::AutoVarEmission Emission =
6822                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6823                   CGF.EmitAutoVarCleanups(Emission);
6824                 }
6825               }
6826             }
6827             CondVal = CGF.EvaluateExprAsBool(Cond);
6828           }
6829         }
6830       }
6831       // Check the value of num_threads clause iff if clause was not specified
6832       // or is not evaluated to false.
6833       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6834         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6835         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6836         const auto *NumThreadsClause =
6837             Dir->getSingleClause<OMPNumThreadsClause>();
6838         CodeGenFunction::LexicalScope Scope(
6839             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6840         if (const auto *PreInit =
6841                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6842           for (const auto *I : PreInit->decls()) {
6843             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6844               CGF.EmitVarDecl(cast<VarDecl>(*I));
6845             } else {
6846               CodeGenFunction::AutoVarEmission Emission =
6847                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6848               CGF.EmitAutoVarCleanups(Emission);
6849             }
6850           }
6851         }
6852         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6853         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6854                                                /*isSigned=*/false);
6855         if (DefaultThreadLimitVal)
6856           NumThreads = CGF.Builder.CreateSelect(
6857               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6858               DefaultThreadLimitVal, NumThreads);
6859       } else {
6860         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6861                                            : CGF.Builder.getInt32(0);
6862       }
6863       // Process condition of the if clause.
6864       if (CondVal) {
6865         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6866                                               CGF.Builder.getInt32(1));
6867       }
6868       return NumThreads;
6869     }
6870     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6871       return CGF.Builder.getInt32(1);
6872     return DefaultThreadLimitVal;
6873   }
6874   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6875                                : CGF.Builder.getInt32(0);
6876 }
6877 
6878 /// Emit the number of threads for a target directive.  Inspect the
6879 /// thread_limit clause associated with a teams construct combined or closely
6880 /// nested with the target directive.
6881 ///
6882 /// Emit the num_threads clause for directives such as 'target parallel' that
6883 /// have no associated teams construct.
6884 ///
6885 /// Otherwise, return nullptr.
6886 static llvm::Value *
6887 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6888                                  const OMPExecutableDirective &D) {
6889   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6890          "Clauses associated with the teams directive expected to be emitted "
6891          "only for the host!");
6892   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6893   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6894          "Expected target-based executable directive.");
6895   CGBuilderTy &Bld = CGF.Builder;
6896   llvm::Value *ThreadLimitVal = nullptr;
6897   llvm::Value *NumThreadsVal = nullptr;
6898   switch (DirectiveKind) {
6899   case OMPD_target: {
6900     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6901     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6902       return NumThreads;
6903     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6904         CGF.getContext(), CS->getCapturedStmt());
6905     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6906       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6907         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6908         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6909         const auto *ThreadLimitClause =
6910             Dir->getSingleClause<OMPThreadLimitClause>();
6911         CodeGenFunction::LexicalScope Scope(
6912             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6913         if (const auto *PreInit =
6914                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6915           for (const auto *I : PreInit->decls()) {
6916             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6917               CGF.EmitVarDecl(cast<VarDecl>(*I));
6918             } else {
6919               CodeGenFunction::AutoVarEmission Emission =
6920                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6921               CGF.EmitAutoVarCleanups(Emission);
6922             }
6923           }
6924         }
6925         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6926             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6927         ThreadLimitVal =
6928             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6929       }
6930       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6931           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6932         CS = Dir->getInnermostCapturedStmt();
6933         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6934             CGF.getContext(), CS->getCapturedStmt());
6935         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6936       }
6937       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6938           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6939         CS = Dir->getInnermostCapturedStmt();
6940         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6941           return NumThreads;
6942       }
6943       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6944         return Bld.getInt32(1);
6945     }
6946     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6947   }
6948   case OMPD_target_teams: {
6949     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6950       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6951       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6952       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6953           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6954       ThreadLimitVal =
6955           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6956     }
6957     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6958     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6959       return NumThreads;
6960     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6961         CGF.getContext(), CS->getCapturedStmt());
6962     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6963       if (Dir->getDirectiveKind() == OMPD_distribute) {
6964         CS = Dir->getInnermostCapturedStmt();
6965         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6966           return NumThreads;
6967       }
6968     }
6969     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6970   }
6971   case OMPD_target_teams_distribute:
6972     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6973       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6974       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6975       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6976           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6977       ThreadLimitVal =
6978           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6979     }
6980     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6981   case OMPD_target_parallel:
6982   case OMPD_target_parallel_for:
6983   case OMPD_target_parallel_for_simd:
6984   case OMPD_target_teams_distribute_parallel_for:
6985   case OMPD_target_teams_distribute_parallel_for_simd: {
6986     llvm::Value *CondVal = nullptr;
6987     // Handle if clause. If if clause present, the number of threads is
6988     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6989     if (D.hasClausesOfKind<OMPIfClause>()) {
6990       const OMPIfClause *IfClause = nullptr;
6991       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6992         if (C->getNameModifier() == OMPD_unknown ||
6993             C->getNameModifier() == OMPD_parallel) {
6994           IfClause = C;
6995           break;
6996         }
6997       }
6998       if (IfClause) {
6999         const Expr *Cond = IfClause->getCondition();
7000         bool Result;
7001         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7002           if (!Result)
7003             return Bld.getInt32(1);
7004         } else {
7005           CodeGenFunction::RunCleanupsScope Scope(CGF);
7006           CondVal = CGF.EvaluateExprAsBool(Cond);
7007         }
7008       }
7009     }
7010     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7011       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7012       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7013       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7014           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7015       ThreadLimitVal =
7016           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7017     }
7018     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7019       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7020       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7021       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7022           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7023       NumThreadsVal =
7024           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7025       ThreadLimitVal = ThreadLimitVal
7026                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7027                                                                 ThreadLimitVal),
7028                                               NumThreadsVal, ThreadLimitVal)
7029                            : NumThreadsVal;
7030     }
7031     if (!ThreadLimitVal)
7032       ThreadLimitVal = Bld.getInt32(0);
7033     if (CondVal)
7034       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7035     return ThreadLimitVal;
7036   }
7037   case OMPD_target_teams_distribute_simd:
7038   case OMPD_target_simd:
7039     return Bld.getInt32(1);
7040   case OMPD_parallel:
7041   case OMPD_for:
7042   case OMPD_parallel_for:
7043   case OMPD_parallel_sections:
7044   case OMPD_for_simd:
7045   case OMPD_parallel_for_simd:
7046   case OMPD_cancel:
7047   case OMPD_cancellation_point:
7048   case OMPD_ordered:
7049   case OMPD_threadprivate:
7050   case OMPD_allocate:
7051   case OMPD_task:
7052   case OMPD_simd:
7053   case OMPD_sections:
7054   case OMPD_section:
7055   case OMPD_single:
7056   case OMPD_master:
7057   case OMPD_critical:
7058   case OMPD_taskyield:
7059   case OMPD_barrier:
7060   case OMPD_taskwait:
7061   case OMPD_taskgroup:
7062   case OMPD_atomic:
7063   case OMPD_flush:
7064   case OMPD_teams:
7065   case OMPD_target_data:
7066   case OMPD_target_exit_data:
7067   case OMPD_target_enter_data:
7068   case OMPD_distribute:
7069   case OMPD_distribute_simd:
7070   case OMPD_distribute_parallel_for:
7071   case OMPD_distribute_parallel_for_simd:
7072   case OMPD_teams_distribute:
7073   case OMPD_teams_distribute_simd:
7074   case OMPD_teams_distribute_parallel_for:
7075   case OMPD_teams_distribute_parallel_for_simd:
7076   case OMPD_target_update:
7077   case OMPD_declare_simd:
7078   case OMPD_declare_target:
7079   case OMPD_end_declare_target:
7080   case OMPD_declare_reduction:
7081   case OMPD_declare_mapper:
7082   case OMPD_taskloop:
7083   case OMPD_taskloop_simd:
7084   case OMPD_requires:
7085   case OMPD_unknown:
7086     break;
7087   }
7088   llvm_unreachable("Unsupported directive kind.");
7089 }
7090 
7091 namespace {
7092 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7093 
7094 // Utility to handle information from clauses associated with a given
7095 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7096 // It provides a convenient interface to obtain the information and generate
7097 // code for that information.
7098 class MappableExprsHandler {
7099 public:
7100   /// Values for bit flags used to specify the mapping type for
7101   /// offloading.
7102   enum OpenMPOffloadMappingFlags : uint64_t {
7103     /// No flags
7104     OMP_MAP_NONE = 0x0,
7105     /// Allocate memory on the device and move data from host to device.
7106     OMP_MAP_TO = 0x01,
7107     /// Allocate memory on the device and move data from device to host.
7108     OMP_MAP_FROM = 0x02,
7109     /// Always perform the requested mapping action on the element, even
7110     /// if it was already mapped before.
7111     OMP_MAP_ALWAYS = 0x04,
7112     /// Delete the element from the device environment, ignoring the
7113     /// current reference count associated with the element.
7114     OMP_MAP_DELETE = 0x08,
7115     /// The element being mapped is a pointer-pointee pair; both the
7116     /// pointer and the pointee should be mapped.
7117     OMP_MAP_PTR_AND_OBJ = 0x10,
7118     /// This flags signals that the base address of an entry should be
7119     /// passed to the target kernel as an argument.
7120     OMP_MAP_TARGET_PARAM = 0x20,
7121     /// Signal that the runtime library has to return the device pointer
7122     /// in the current position for the data being mapped. Used when we have the
7123     /// use_device_ptr clause.
7124     OMP_MAP_RETURN_PARAM = 0x40,
7125     /// This flag signals that the reference being passed is a pointer to
7126     /// private data.
7127     OMP_MAP_PRIVATE = 0x80,
7128     /// Pass the element to the device by value.
7129     OMP_MAP_LITERAL = 0x100,
7130     /// Implicit map
7131     OMP_MAP_IMPLICIT = 0x200,
7132     /// Close is a hint to the runtime to allocate memory close to
7133     /// the target device.
7134     OMP_MAP_CLOSE = 0x400,
7135     /// The 16 MSBs of the flags indicate whether the entry is member of some
7136     /// struct/class.
7137     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7138     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7139   };
7140 
7141   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7142   static unsigned getFlagMemberOffset() {
7143     unsigned Offset = 0;
7144     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7145          Remain = Remain >> 1)
7146       Offset++;
7147     return Offset;
7148   }
7149 
7150   /// Class that associates information with a base pointer to be passed to the
7151   /// runtime library.
7152   class BasePointerInfo {
7153     /// The base pointer.
7154     llvm::Value *Ptr = nullptr;
7155     /// The base declaration that refers to this device pointer, or null if
7156     /// there is none.
7157     const ValueDecl *DevPtrDecl = nullptr;
7158 
7159   public:
7160     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7161         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7162     llvm::Value *operator*() const { return Ptr; }
7163     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7164     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7165   };
7166 
7167   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7168   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7169   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7170 
7171   /// Map between a struct and the its lowest & highest elements which have been
7172   /// mapped.
7173   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7174   ///                    HE(FieldIndex, Pointer)}
7175   struct StructRangeInfoTy {
7176     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7177         0, Address::invalid()};
7178     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7179         0, Address::invalid()};
7180     Address Base = Address::invalid();
7181   };
7182 
7183 private:
7184   /// Kind that defines how a device pointer has to be returned.
7185   struct MapInfo {
7186     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7187     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7188     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7189     bool ReturnDevicePointer = false;
7190     bool IsImplicit = false;
7191 
7192     MapInfo() = default;
7193     MapInfo(
7194         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7195         OpenMPMapClauseKind MapType,
7196         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7197         bool ReturnDevicePointer, bool IsImplicit)
7198         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7199           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7200   };
7201 
7202   /// If use_device_ptr is used on a pointer which is a struct member and there
7203   /// is no map information about it, then emission of that entry is deferred
7204   /// until the whole struct has been processed.
7205   struct DeferredDevicePtrEntryTy {
7206     const Expr *IE = nullptr;
7207     const ValueDecl *VD = nullptr;
7208 
7209     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7210         : IE(IE), VD(VD) {}
7211   };
7212 
7213   /// The target directive from where the mappable clauses were extracted. It
7214   /// is either a executable directive or a user-defined mapper directive.
7215   llvm::PointerUnion<const OMPExecutableDirective *,
7216                      const OMPDeclareMapperDecl *>
7217       CurDir;
7218 
7219   /// Function the directive is being generated for.
7220   CodeGenFunction &CGF;
7221 
7222   /// Set of all first private variables in the current directive.
7223   /// bool data is set to true if the variable is implicitly marked as
7224   /// firstprivate, false otherwise.
7225   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7226 
7227   /// Map between device pointer declarations and their expression components.
7228   /// The key value for declarations in 'this' is null.
7229   llvm::DenseMap<
7230       const ValueDecl *,
7231       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7232       DevPointersMap;
7233 
7234   llvm::Value *getExprTypeSize(const Expr *E) const {
7235     QualType ExprTy = E->getType().getCanonicalType();
7236 
7237     // Reference types are ignored for mapping purposes.
7238     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7239       ExprTy = RefTy->getPointeeType().getCanonicalType();
7240 
7241     // Given that an array section is considered a built-in type, we need to
7242     // do the calculation based on the length of the section instead of relying
7243     // on CGF.getTypeSize(E->getType()).
7244     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7245       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7246                             OAE->getBase()->IgnoreParenImpCasts())
7247                             .getCanonicalType();
7248 
7249       // If there is no length associated with the expression, that means we
7250       // are using the whole length of the base.
7251       if (!OAE->getLength() && OAE->getColonLoc().isValid())
7252         return CGF.getTypeSize(BaseTy);
7253 
7254       llvm::Value *ElemSize;
7255       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7256         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7257       } else {
7258         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7259         assert(ATy && "Expecting array type if not a pointer type.");
7260         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7261       }
7262 
7263       // If we don't have a length at this point, that is because we have an
7264       // array section with a single element.
7265       if (!OAE->getLength())
7266         return ElemSize;
7267 
7268       llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
7269       LengthVal =
7270           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
7271       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7272     }
7273     return CGF.getTypeSize(ExprTy);
7274   }
7275 
7276   /// Return the corresponding bits for a given map clause modifier. Add
7277   /// a flag marking the map as a pointer if requested. Add a flag marking the
7278   /// map as the first one of a series of maps that relate to the same map
7279   /// expression.
7280   OpenMPOffloadMappingFlags getMapTypeBits(
7281       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7282       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7283     OpenMPOffloadMappingFlags Bits =
7284         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7285     switch (MapType) {
7286     case OMPC_MAP_alloc:
7287     case OMPC_MAP_release:
7288       // alloc and release is the default behavior in the runtime library,  i.e.
7289       // if we don't pass any bits alloc/release that is what the runtime is
7290       // going to do. Therefore, we don't need to signal anything for these two
7291       // type modifiers.
7292       break;
7293     case OMPC_MAP_to:
7294       Bits |= OMP_MAP_TO;
7295       break;
7296     case OMPC_MAP_from:
7297       Bits |= OMP_MAP_FROM;
7298       break;
7299     case OMPC_MAP_tofrom:
7300       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7301       break;
7302     case OMPC_MAP_delete:
7303       Bits |= OMP_MAP_DELETE;
7304       break;
7305     case OMPC_MAP_unknown:
7306       llvm_unreachable("Unexpected map type!");
7307     }
7308     if (AddPtrFlag)
7309       Bits |= OMP_MAP_PTR_AND_OBJ;
7310     if (AddIsTargetParamFlag)
7311       Bits |= OMP_MAP_TARGET_PARAM;
7312     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7313         != MapModifiers.end())
7314       Bits |= OMP_MAP_ALWAYS;
7315     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7316         != MapModifiers.end())
7317       Bits |= OMP_MAP_CLOSE;
7318     return Bits;
7319   }
7320 
7321   /// Return true if the provided expression is a final array section. A
7322   /// final array section, is one whose length can't be proved to be one.
7323   bool isFinalArraySectionExpression(const Expr *E) const {
7324     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7325 
7326     // It is not an array section and therefore not a unity-size one.
7327     if (!OASE)
7328       return false;
7329 
7330     // An array section with no colon always refer to a single element.
7331     if (OASE->getColonLoc().isInvalid())
7332       return false;
7333 
7334     const Expr *Length = OASE->getLength();
7335 
7336     // If we don't have a length we have to check if the array has size 1
7337     // for this dimension. Also, we should always expect a length if the
7338     // base type is pointer.
7339     if (!Length) {
7340       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7341                              OASE->getBase()->IgnoreParenImpCasts())
7342                              .getCanonicalType();
7343       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7344         return ATy->getSize().getSExtValue() != 1;
7345       // If we don't have a constant dimension length, we have to consider
7346       // the current section as having any size, so it is not necessarily
7347       // unitary. If it happen to be unity size, that's user fault.
7348       return true;
7349     }
7350 
7351     // Check if the length evaluates to 1.
7352     Expr::EvalResult Result;
7353     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7354       return true; // Can have more that size 1.
7355 
7356     llvm::APSInt ConstLength = Result.Val.getInt();
7357     return ConstLength.getSExtValue() != 1;
7358   }
7359 
7360   /// Generate the base pointers, section pointers, sizes and map type
7361   /// bits for the provided map type, map modifier, and expression components.
7362   /// \a IsFirstComponent should be set to true if the provided set of
7363   /// components is the first associated with a capture.
7364   void generateInfoForComponentList(
7365       OpenMPMapClauseKind MapType,
7366       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7367       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7368       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7369       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7370       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7371       bool IsImplicit,
7372       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7373           OverlappedElements = llvm::None) const {
7374     // The following summarizes what has to be generated for each map and the
7375     // types below. The generated information is expressed in this order:
7376     // base pointer, section pointer, size, flags
7377     // (to add to the ones that come from the map type and modifier).
7378     //
7379     // double d;
7380     // int i[100];
7381     // float *p;
7382     //
7383     // struct S1 {
7384     //   int i;
7385     //   float f[50];
7386     // }
7387     // struct S2 {
7388     //   int i;
7389     //   float f[50];
7390     //   S1 s;
7391     //   double *p;
7392     //   struct S2 *ps;
7393     // }
7394     // S2 s;
7395     // S2 *ps;
7396     //
7397     // map(d)
7398     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7399     //
7400     // map(i)
7401     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7402     //
7403     // map(i[1:23])
7404     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7405     //
7406     // map(p)
7407     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7408     //
7409     // map(p[1:24])
7410     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7411     //
7412     // map(s)
7413     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7414     //
7415     // map(s.i)
7416     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7417     //
7418     // map(s.s.f)
7419     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7420     //
7421     // map(s.p)
7422     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7423     //
7424     // map(to: s.p[:22])
7425     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7426     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7427     // &(s.p), &(s.p[0]), 22*sizeof(double),
7428     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7429     // (*) alloc space for struct members, only this is a target parameter
7430     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7431     //      optimizes this entry out, same in the examples below)
7432     // (***) map the pointee (map: to)
7433     //
7434     // map(s.ps)
7435     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7436     //
7437     // map(from: s.ps->s.i)
7438     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7439     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7440     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7441     //
7442     // map(to: s.ps->ps)
7443     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7444     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7445     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7446     //
7447     // map(s.ps->ps->ps)
7448     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7449     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7450     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7451     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7452     //
7453     // map(to: s.ps->ps->s.f[:22])
7454     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7455     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7456     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7457     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7458     //
7459     // map(ps)
7460     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7461     //
7462     // map(ps->i)
7463     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7464     //
7465     // map(ps->s.f)
7466     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7467     //
7468     // map(from: ps->p)
7469     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7470     //
7471     // map(to: ps->p[:22])
7472     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7473     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7474     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7475     //
7476     // map(ps->ps)
7477     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7478     //
7479     // map(from: ps->ps->s.i)
7480     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7481     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7482     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7483     //
7484     // map(from: ps->ps->ps)
7485     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7486     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7487     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7488     //
7489     // map(ps->ps->ps->ps)
7490     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7491     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7492     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7493     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7494     //
7495     // map(to: ps->ps->ps->s.f[:22])
7496     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7497     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7498     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7499     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7500     //
7501     // map(to: s.f[:22]) map(from: s.p[:33])
7502     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7503     //     sizeof(double*) (**), TARGET_PARAM
7504     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7505     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7506     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7507     // (*) allocate contiguous space needed to fit all mapped members even if
7508     //     we allocate space for members not mapped (in this example,
7509     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7510     //     them as well because they fall between &s.f[0] and &s.p)
7511     //
7512     // map(from: s.f[:22]) map(to: ps->p[:33])
7513     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7514     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7515     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7516     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7517     // (*) the struct this entry pertains to is the 2nd element in the list of
7518     //     arguments, hence MEMBER_OF(2)
7519     //
7520     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7521     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7522     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7523     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7524     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7525     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7526     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7527     // (*) the struct this entry pertains to is the 4th element in the list
7528     //     of arguments, hence MEMBER_OF(4)
7529 
7530     // Track if the map information being generated is the first for a capture.
7531     bool IsCaptureFirstInfo = IsFirstComponentList;
7532     // When the variable is on a declare target link or in a to clause with
7533     // unified memory, a reference is needed to hold the host/device address
7534     // of the variable.
7535     bool RequiresReference = false;
7536 
7537     // Scan the components from the base to the complete expression.
7538     auto CI = Components.rbegin();
7539     auto CE = Components.rend();
7540     auto I = CI;
7541 
7542     // Track if the map information being generated is the first for a list of
7543     // components.
7544     bool IsExpressionFirstInfo = true;
7545     Address BP = Address::invalid();
7546     const Expr *AssocExpr = I->getAssociatedExpression();
7547     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7548     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7549 
7550     if (isa<MemberExpr>(AssocExpr)) {
7551       // The base is the 'this' pointer. The content of the pointer is going
7552       // to be the base of the field being mapped.
7553       BP = CGF.LoadCXXThisAddress();
7554     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7555                (OASE &&
7556                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7557       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7558     } else {
7559       // The base is the reference to the variable.
7560       // BP = &Var.
7561       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7562       if (const auto *VD =
7563               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7564         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7565                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7566           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7567               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7568                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7569             RequiresReference = true;
7570             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7571           }
7572         }
7573       }
7574 
7575       // If the variable is a pointer and is being dereferenced (i.e. is not
7576       // the last component), the base has to be the pointer itself, not its
7577       // reference. References are ignored for mapping purposes.
7578       QualType Ty =
7579           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7580       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7581         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7582 
7583         // We do not need to generate individual map information for the
7584         // pointer, it can be associated with the combined storage.
7585         ++I;
7586       }
7587     }
7588 
7589     // Track whether a component of the list should be marked as MEMBER_OF some
7590     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7591     // in a component list should be marked as MEMBER_OF, all subsequent entries
7592     // do not belong to the base struct. E.g.
7593     // struct S2 s;
7594     // s.ps->ps->ps->f[:]
7595     //   (1) (2) (3) (4)
7596     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7597     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7598     // is the pointee of ps(2) which is not member of struct s, so it should not
7599     // be marked as such (it is still PTR_AND_OBJ).
7600     // The variable is initialized to false so that PTR_AND_OBJ entries which
7601     // are not struct members are not considered (e.g. array of pointers to
7602     // data).
7603     bool ShouldBeMemberOf = false;
7604 
7605     // Variable keeping track of whether or not we have encountered a component
7606     // in the component list which is a member expression. Useful when we have a
7607     // pointer or a final array section, in which case it is the previous
7608     // component in the list which tells us whether we have a member expression.
7609     // E.g. X.f[:]
7610     // While processing the final array section "[:]" it is "f" which tells us
7611     // whether we are dealing with a member of a declared struct.
7612     const MemberExpr *EncounteredME = nullptr;
7613 
7614     for (; I != CE; ++I) {
7615       // If the current component is member of a struct (parent struct) mark it.
7616       if (!EncounteredME) {
7617         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7618         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7619         // as MEMBER_OF the parent struct.
7620         if (EncounteredME)
7621           ShouldBeMemberOf = true;
7622       }
7623 
7624       auto Next = std::next(I);
7625 
7626       // We need to generate the addresses and sizes if this is the last
7627       // component, if the component is a pointer or if it is an array section
7628       // whose length can't be proved to be one. If this is a pointer, it
7629       // becomes the base address for the following components.
7630 
7631       // A final array section, is one whose length can't be proved to be one.
7632       bool IsFinalArraySection =
7633           isFinalArraySectionExpression(I->getAssociatedExpression());
7634 
7635       // Get information on whether the element is a pointer. Have to do a
7636       // special treatment for array sections given that they are built-in
7637       // types.
7638       const auto *OASE =
7639           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7640       bool IsPointer =
7641           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7642                        .getCanonicalType()
7643                        ->isAnyPointerType()) ||
7644           I->getAssociatedExpression()->getType()->isAnyPointerType();
7645 
7646       if (Next == CE || IsPointer || IsFinalArraySection) {
7647         // If this is not the last component, we expect the pointer to be
7648         // associated with an array expression or member expression.
7649         assert((Next == CE ||
7650                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7651                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7652                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7653                "Unexpected expression");
7654 
7655         Address LB =
7656             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7657 
7658         // If this component is a pointer inside the base struct then we don't
7659         // need to create any entry for it - it will be combined with the object
7660         // it is pointing to into a single PTR_AND_OBJ entry.
7661         bool IsMemberPointer =
7662             IsPointer && EncounteredME &&
7663             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7664              EncounteredME);
7665         if (!OverlappedElements.empty()) {
7666           // Handle base element with the info for overlapped elements.
7667           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7668           assert(Next == CE &&
7669                  "Expected last element for the overlapped elements.");
7670           assert(!IsPointer &&
7671                  "Unexpected base element with the pointer type.");
7672           // Mark the whole struct as the struct that requires allocation on the
7673           // device.
7674           PartialStruct.LowestElem = {0, LB};
7675           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7676               I->getAssociatedExpression()->getType());
7677           Address HB = CGF.Builder.CreateConstGEP(
7678               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7679                                                               CGF.VoidPtrTy),
7680               TypeSize.getQuantity() - 1);
7681           PartialStruct.HighestElem = {
7682               std::numeric_limits<decltype(
7683                   PartialStruct.HighestElem.first)>::max(),
7684               HB};
7685           PartialStruct.Base = BP;
7686           // Emit data for non-overlapped data.
7687           OpenMPOffloadMappingFlags Flags =
7688               OMP_MAP_MEMBER_OF |
7689               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7690                              /*AddPtrFlag=*/false,
7691                              /*AddIsTargetParamFlag=*/false);
7692           LB = BP;
7693           llvm::Value *Size = nullptr;
7694           // Do bitcopy of all non-overlapped structure elements.
7695           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7696                    Component : OverlappedElements) {
7697             Address ComponentLB = Address::invalid();
7698             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7699                  Component) {
7700               if (MC.getAssociatedDeclaration()) {
7701                 ComponentLB =
7702                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7703                         .getAddress();
7704                 Size = CGF.Builder.CreatePtrDiff(
7705                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7706                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7707                 break;
7708               }
7709             }
7710             BasePointers.push_back(BP.getPointer());
7711             Pointers.push_back(LB.getPointer());
7712             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7713                                                       /*isSigned=*/true));
7714             Types.push_back(Flags);
7715             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7716           }
7717           BasePointers.push_back(BP.getPointer());
7718           Pointers.push_back(LB.getPointer());
7719           Size = CGF.Builder.CreatePtrDiff(
7720               CGF.EmitCastToVoidPtr(
7721                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7722               CGF.EmitCastToVoidPtr(LB.getPointer()));
7723           Sizes.push_back(
7724               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7725           Types.push_back(Flags);
7726           break;
7727         }
7728         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7729         if (!IsMemberPointer) {
7730           BasePointers.push_back(BP.getPointer());
7731           Pointers.push_back(LB.getPointer());
7732           Sizes.push_back(
7733               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7734 
7735           // We need to add a pointer flag for each map that comes from the
7736           // same expression except for the first one. We also need to signal
7737           // this map is the first one that relates with the current capture
7738           // (there is a set of entries for each capture).
7739           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7740               MapType, MapModifiers, IsImplicit,
7741               !IsExpressionFirstInfo || RequiresReference,
7742               IsCaptureFirstInfo && !RequiresReference);
7743 
7744           if (!IsExpressionFirstInfo) {
7745             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7746             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7747             if (IsPointer)
7748               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7749                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7750 
7751             if (ShouldBeMemberOf) {
7752               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7753               // should be later updated with the correct value of MEMBER_OF.
7754               Flags |= OMP_MAP_MEMBER_OF;
7755               // From now on, all subsequent PTR_AND_OBJ entries should not be
7756               // marked as MEMBER_OF.
7757               ShouldBeMemberOf = false;
7758             }
7759           }
7760 
7761           Types.push_back(Flags);
7762         }
7763 
7764         // If we have encountered a member expression so far, keep track of the
7765         // mapped member. If the parent is "*this", then the value declaration
7766         // is nullptr.
7767         if (EncounteredME) {
7768           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7769           unsigned FieldIndex = FD->getFieldIndex();
7770 
7771           // Update info about the lowest and highest elements for this struct
7772           if (!PartialStruct.Base.isValid()) {
7773             PartialStruct.LowestElem = {FieldIndex, LB};
7774             PartialStruct.HighestElem = {FieldIndex, LB};
7775             PartialStruct.Base = BP;
7776           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7777             PartialStruct.LowestElem = {FieldIndex, LB};
7778           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7779             PartialStruct.HighestElem = {FieldIndex, LB};
7780           }
7781         }
7782 
7783         // If we have a final array section, we are done with this expression.
7784         if (IsFinalArraySection)
7785           break;
7786 
7787         // The pointer becomes the base for the next element.
7788         if (Next != CE)
7789           BP = LB;
7790 
7791         IsExpressionFirstInfo = false;
7792         IsCaptureFirstInfo = false;
7793       }
7794     }
7795   }
7796 
7797   /// Return the adjusted map modifiers if the declaration a capture refers to
7798   /// appears in a first-private clause. This is expected to be used only with
7799   /// directives that start with 'target'.
7800   MappableExprsHandler::OpenMPOffloadMappingFlags
7801   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7802     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7803 
7804     // A first private variable captured by reference will use only the
7805     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7806     // declaration is known as first-private in this handler.
7807     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7808       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7809           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7810         return MappableExprsHandler::OMP_MAP_ALWAYS |
7811                MappableExprsHandler::OMP_MAP_TO;
7812       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7813         return MappableExprsHandler::OMP_MAP_TO |
7814                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7815       return MappableExprsHandler::OMP_MAP_PRIVATE |
7816              MappableExprsHandler::OMP_MAP_TO;
7817     }
7818     return MappableExprsHandler::OMP_MAP_TO |
7819            MappableExprsHandler::OMP_MAP_FROM;
7820   }
7821 
7822   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7823     // Rotate by getFlagMemberOffset() bits.
7824     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7825                                                   << getFlagMemberOffset());
7826   }
7827 
7828   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7829                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7830     // If the entry is PTR_AND_OBJ but has not been marked with the special
7831     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7832     // marked as MEMBER_OF.
7833     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7834         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7835       return;
7836 
7837     // Reset the placeholder value to prepare the flag for the assignment of the
7838     // proper MEMBER_OF value.
7839     Flags &= ~OMP_MAP_MEMBER_OF;
7840     Flags |= MemberOfFlag;
7841   }
7842 
7843   void getPlainLayout(const CXXRecordDecl *RD,
7844                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7845                       bool AsBase) const {
7846     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7847 
7848     llvm::StructType *St =
7849         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7850 
7851     unsigned NumElements = St->getNumElements();
7852     llvm::SmallVector<
7853         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7854         RecordLayout(NumElements);
7855 
7856     // Fill bases.
7857     for (const auto &I : RD->bases()) {
7858       if (I.isVirtual())
7859         continue;
7860       const auto *Base = I.getType()->getAsCXXRecordDecl();
7861       // Ignore empty bases.
7862       if (Base->isEmpty() || CGF.getContext()
7863                                  .getASTRecordLayout(Base)
7864                                  .getNonVirtualSize()
7865                                  .isZero())
7866         continue;
7867 
7868       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7869       RecordLayout[FieldIndex] = Base;
7870     }
7871     // Fill in virtual bases.
7872     for (const auto &I : RD->vbases()) {
7873       const auto *Base = I.getType()->getAsCXXRecordDecl();
7874       // Ignore empty bases.
7875       if (Base->isEmpty())
7876         continue;
7877       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7878       if (RecordLayout[FieldIndex])
7879         continue;
7880       RecordLayout[FieldIndex] = Base;
7881     }
7882     // Fill in all the fields.
7883     assert(!RD->isUnion() && "Unexpected union.");
7884     for (const auto *Field : RD->fields()) {
7885       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7886       // will fill in later.)
7887       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7888         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7889         RecordLayout[FieldIndex] = Field;
7890       }
7891     }
7892     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7893              &Data : RecordLayout) {
7894       if (Data.isNull())
7895         continue;
7896       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7897         getPlainLayout(Base, Layout, /*AsBase=*/true);
7898       else
7899         Layout.push_back(Data.get<const FieldDecl *>());
7900     }
7901   }
7902 
7903 public:
7904   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7905       : CurDir(&Dir), CGF(CGF) {
7906     // Extract firstprivate clause information.
7907     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7908       for (const auto *D : C->varlists())
7909         FirstPrivateDecls.try_emplace(
7910             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7911     // Extract device pointer clause information.
7912     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7913       for (auto L : C->component_lists())
7914         DevPointersMap[L.first].push_back(L.second);
7915   }
7916 
7917   /// Constructor for the declare mapper directive.
7918   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7919       : CurDir(&Dir), CGF(CGF) {}
7920 
7921   /// Generate code for the combined entry if we have a partially mapped struct
7922   /// and take care of the mapping flags of the arguments corresponding to
7923   /// individual struct members.
7924   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7925                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7926                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7927                          const StructRangeInfoTy &PartialStruct) const {
7928     // Base is the base of the struct
7929     BasePointers.push_back(PartialStruct.Base.getPointer());
7930     // Pointer is the address of the lowest element
7931     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7932     Pointers.push_back(LB);
7933     // Size is (addr of {highest+1} element) - (addr of lowest element)
7934     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7935     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7936     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7937     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7938     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7939     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7940                                                   /*isSigned=*/false);
7941     Sizes.push_back(Size);
7942     // Map type is always TARGET_PARAM
7943     Types.push_back(OMP_MAP_TARGET_PARAM);
7944     // Remove TARGET_PARAM flag from the first element
7945     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7946 
7947     // All other current entries will be MEMBER_OF the combined entry
7948     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7949     // 0xFFFF in the MEMBER_OF field).
7950     OpenMPOffloadMappingFlags MemberOfFlag =
7951         getMemberOfFlag(BasePointers.size() - 1);
7952     for (auto &M : CurTypes)
7953       setCorrectMemberOfFlag(M, MemberOfFlag);
7954   }
7955 
7956   /// Generate all the base pointers, section pointers, sizes and map
7957   /// types for the extracted mappable expressions. Also, for each item that
7958   /// relates with a device pointer, a pair of the relevant declaration and
7959   /// index where it occurs is appended to the device pointers info array.
7960   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7961                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7962                        MapFlagsArrayTy &Types) const {
7963     // We have to process the component lists that relate with the same
7964     // declaration in a single chunk so that we can generate the map flags
7965     // correctly. Therefore, we organize all lists in a map.
7966     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7967 
7968     // Helper function to fill the information map for the different supported
7969     // clauses.
7970     auto &&InfoGen = [&Info](
7971         const ValueDecl *D,
7972         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7973         OpenMPMapClauseKind MapType,
7974         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7975         bool ReturnDevicePointer, bool IsImplicit) {
7976       const ValueDecl *VD =
7977           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7978       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7979                             IsImplicit);
7980     };
7981 
7982     assert(CurDir.is<const OMPExecutableDirective *>() &&
7983            "Expect a executable directive");
7984     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7985     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7986       for (const auto &L : C->component_lists()) {
7987         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7988             /*ReturnDevicePointer=*/false, C->isImplicit());
7989       }
7990     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7991       for (const auto &L : C->component_lists()) {
7992         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7993             /*ReturnDevicePointer=*/false, C->isImplicit());
7994       }
7995     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7996       for (const auto &L : C->component_lists()) {
7997         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7998             /*ReturnDevicePointer=*/false, C->isImplicit());
7999       }
8000 
8001     // Look at the use_device_ptr clause information and mark the existing map
8002     // entries as such. If there is no map information for an entry in the
8003     // use_device_ptr list, we create one with map type 'alloc' and zero size
8004     // section. It is the user fault if that was not mapped before. If there is
8005     // no map information and the pointer is a struct member, then we defer the
8006     // emission of that entry until the whole struct has been processed.
8007     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8008         DeferredInfo;
8009 
8010     for (const auto *C :
8011          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8012       for (const auto &L : C->component_lists()) {
8013         assert(!L.second.empty() && "Not expecting empty list of components!");
8014         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8015         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8016         const Expr *IE = L.second.back().getAssociatedExpression();
8017         // If the first component is a member expression, we have to look into
8018         // 'this', which maps to null in the map of map information. Otherwise
8019         // look directly for the information.
8020         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8021 
8022         // We potentially have map information for this declaration already.
8023         // Look for the first set of components that refer to it.
8024         if (It != Info.end()) {
8025           auto CI = std::find_if(
8026               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8027                 return MI.Components.back().getAssociatedDeclaration() == VD;
8028               });
8029           // If we found a map entry, signal that the pointer has to be returned
8030           // and move on to the next declaration.
8031           if (CI != It->second.end()) {
8032             CI->ReturnDevicePointer = true;
8033             continue;
8034           }
8035         }
8036 
8037         // We didn't find any match in our map information - generate a zero
8038         // size array section - if the pointer is a struct member we defer this
8039         // action until the whole struct has been processed.
8040         if (isa<MemberExpr>(IE)) {
8041           // Insert the pointer into Info to be processed by
8042           // generateInfoForComponentList. Because it is a member pointer
8043           // without a pointee, no entry will be generated for it, therefore
8044           // we need to generate one after the whole struct has been processed.
8045           // Nonetheless, generateInfoForComponentList must be called to take
8046           // the pointer into account for the calculation of the range of the
8047           // partial struct.
8048           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8049                   /*ReturnDevicePointer=*/false, C->isImplicit());
8050           DeferredInfo[nullptr].emplace_back(IE, VD);
8051         } else {
8052           llvm::Value *Ptr =
8053               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8054           BasePointers.emplace_back(Ptr, VD);
8055           Pointers.push_back(Ptr);
8056           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8057           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8058         }
8059       }
8060     }
8061 
8062     for (const auto &M : Info) {
8063       // We need to know when we generate information for the first component
8064       // associated with a capture, because the mapping flags depend on it.
8065       bool IsFirstComponentList = true;
8066 
8067       // Temporary versions of arrays
8068       MapBaseValuesArrayTy CurBasePointers;
8069       MapValuesArrayTy CurPointers;
8070       MapValuesArrayTy CurSizes;
8071       MapFlagsArrayTy CurTypes;
8072       StructRangeInfoTy PartialStruct;
8073 
8074       for (const MapInfo &L : M.second) {
8075         assert(!L.Components.empty() &&
8076                "Not expecting declaration with no component lists.");
8077 
8078         // Remember the current base pointer index.
8079         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8080         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8081                                      CurBasePointers, CurPointers, CurSizes,
8082                                      CurTypes, PartialStruct,
8083                                      IsFirstComponentList, L.IsImplicit);
8084 
8085         // If this entry relates with a device pointer, set the relevant
8086         // declaration and add the 'return pointer' flag.
8087         if (L.ReturnDevicePointer) {
8088           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8089                  "Unexpected number of mapped base pointers.");
8090 
8091           const ValueDecl *RelevantVD =
8092               L.Components.back().getAssociatedDeclaration();
8093           assert(RelevantVD &&
8094                  "No relevant declaration related with device pointer??");
8095 
8096           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8097           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8098         }
8099         IsFirstComponentList = false;
8100       }
8101 
8102       // Append any pending zero-length pointers which are struct members and
8103       // used with use_device_ptr.
8104       auto CI = DeferredInfo.find(M.first);
8105       if (CI != DeferredInfo.end()) {
8106         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8107           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
8108           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8109               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8110           CurBasePointers.emplace_back(BasePtr, L.VD);
8111           CurPointers.push_back(Ptr);
8112           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8113           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8114           // value MEMBER_OF=FFFF so that the entry is later updated with the
8115           // correct value of MEMBER_OF.
8116           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8117                              OMP_MAP_MEMBER_OF);
8118         }
8119       }
8120 
8121       // If there is an entry in PartialStruct it means we have a struct with
8122       // individual members mapped. Emit an extra combined entry.
8123       if (PartialStruct.Base.isValid())
8124         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8125                           PartialStruct);
8126 
8127       // We need to append the results of this capture to what we already have.
8128       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8129       Pointers.append(CurPointers.begin(), CurPointers.end());
8130       Sizes.append(CurSizes.begin(), CurSizes.end());
8131       Types.append(CurTypes.begin(), CurTypes.end());
8132     }
8133   }
8134 
8135   /// Generate all the base pointers, section pointers, sizes and map types for
8136   /// the extracted map clauses of user-defined mapper.
8137   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8138                                 MapValuesArrayTy &Pointers,
8139                                 MapValuesArrayTy &Sizes,
8140                                 MapFlagsArrayTy &Types) const {
8141     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8142            "Expect a declare mapper directive");
8143     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8144     // We have to process the component lists that relate with the same
8145     // declaration in a single chunk so that we can generate the map flags
8146     // correctly. Therefore, we organize all lists in a map.
8147     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8148 
8149     // Helper function to fill the information map for the different supported
8150     // clauses.
8151     auto &&InfoGen = [&Info](
8152         const ValueDecl *D,
8153         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8154         OpenMPMapClauseKind MapType,
8155         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8156         bool ReturnDevicePointer, bool IsImplicit) {
8157       const ValueDecl *VD =
8158           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8159       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8160                             IsImplicit);
8161     };
8162 
8163     for (const auto *C : CurMapperDir->clauselists()) {
8164       const auto *MC = cast<OMPMapClause>(C);
8165       for (const auto &L : MC->component_lists()) {
8166         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8167                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8168       }
8169     }
8170 
8171     for (const auto &M : Info) {
8172       // We need to know when we generate information for the first component
8173       // associated with a capture, because the mapping flags depend on it.
8174       bool IsFirstComponentList = true;
8175 
8176       // Temporary versions of arrays
8177       MapBaseValuesArrayTy CurBasePointers;
8178       MapValuesArrayTy CurPointers;
8179       MapValuesArrayTy CurSizes;
8180       MapFlagsArrayTy CurTypes;
8181       StructRangeInfoTy PartialStruct;
8182 
8183       for (const MapInfo &L : M.second) {
8184         assert(!L.Components.empty() &&
8185                "Not expecting declaration with no component lists.");
8186         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8187                                      CurBasePointers, CurPointers, CurSizes,
8188                                      CurTypes, PartialStruct,
8189                                      IsFirstComponentList, L.IsImplicit);
8190         IsFirstComponentList = false;
8191       }
8192 
8193       // If there is an entry in PartialStruct it means we have a struct with
8194       // individual members mapped. Emit an extra combined entry.
8195       if (PartialStruct.Base.isValid())
8196         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8197                           PartialStruct);
8198 
8199       // We need to append the results of this capture to what we already have.
8200       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8201       Pointers.append(CurPointers.begin(), CurPointers.end());
8202       Sizes.append(CurSizes.begin(), CurSizes.end());
8203       Types.append(CurTypes.begin(), CurTypes.end());
8204     }
8205   }
8206 
8207   /// Emit capture info for lambdas for variables captured by reference.
8208   void generateInfoForLambdaCaptures(
8209       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8210       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8211       MapFlagsArrayTy &Types,
8212       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8213     const auto *RD = VD->getType()
8214                          .getCanonicalType()
8215                          .getNonReferenceType()
8216                          ->getAsCXXRecordDecl();
8217     if (!RD || !RD->isLambda())
8218       return;
8219     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8220     LValue VDLVal = CGF.MakeAddrLValue(
8221         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8222     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8223     FieldDecl *ThisCapture = nullptr;
8224     RD->getCaptureFields(Captures, ThisCapture);
8225     if (ThisCapture) {
8226       LValue ThisLVal =
8227           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8228       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8229       LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
8230       BasePointers.push_back(ThisLVal.getPointer());
8231       Pointers.push_back(ThisLValVal.getPointer());
8232       Sizes.push_back(
8233           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8234                                     CGF.Int64Ty, /*isSigned=*/true));
8235       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8236                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8237     }
8238     for (const LambdaCapture &LC : RD->captures()) {
8239       if (!LC.capturesVariable())
8240         continue;
8241       const VarDecl *VD = LC.getCapturedVar();
8242       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8243         continue;
8244       auto It = Captures.find(VD);
8245       assert(It != Captures.end() && "Found lambda capture without field.");
8246       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8247       if (LC.getCaptureKind() == LCK_ByRef) {
8248         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8249         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8250         BasePointers.push_back(VarLVal.getPointer());
8251         Pointers.push_back(VarLValVal.getPointer());
8252         Sizes.push_back(CGF.Builder.CreateIntCast(
8253             CGF.getTypeSize(
8254                 VD->getType().getCanonicalType().getNonReferenceType()),
8255             CGF.Int64Ty, /*isSigned=*/true));
8256       } else {
8257         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8258         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8259         BasePointers.push_back(VarLVal.getPointer());
8260         Pointers.push_back(VarRVal.getScalarVal());
8261         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8262       }
8263       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8264                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8265     }
8266   }
8267 
8268   /// Set correct indices for lambdas captures.
8269   void adjustMemberOfForLambdaCaptures(
8270       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8271       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8272       MapFlagsArrayTy &Types) const {
8273     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8274       // Set correct member_of idx for all implicit lambda captures.
8275       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8276                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8277         continue;
8278       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8279       assert(BasePtr && "Unable to find base lambda address.");
8280       int TgtIdx = -1;
8281       for (unsigned J = I; J > 0; --J) {
8282         unsigned Idx = J - 1;
8283         if (Pointers[Idx] != BasePtr)
8284           continue;
8285         TgtIdx = Idx;
8286         break;
8287       }
8288       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8289       // All other current entries will be MEMBER_OF the combined entry
8290       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8291       // 0xFFFF in the MEMBER_OF field).
8292       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8293       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8294     }
8295   }
8296 
8297   /// Generate the base pointers, section pointers, sizes and map types
8298   /// associated to a given capture.
8299   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8300                               llvm::Value *Arg,
8301                               MapBaseValuesArrayTy &BasePointers,
8302                               MapValuesArrayTy &Pointers,
8303                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8304                               StructRangeInfoTy &PartialStruct) const {
8305     assert(!Cap->capturesVariableArrayType() &&
8306            "Not expecting to generate map info for a variable array type!");
8307 
8308     // We need to know when we generating information for the first component
8309     const ValueDecl *VD = Cap->capturesThis()
8310                               ? nullptr
8311                               : Cap->getCapturedVar()->getCanonicalDecl();
8312 
8313     // If this declaration appears in a is_device_ptr clause we just have to
8314     // pass the pointer by value. If it is a reference to a declaration, we just
8315     // pass its value.
8316     if (DevPointersMap.count(VD)) {
8317       BasePointers.emplace_back(Arg, VD);
8318       Pointers.push_back(Arg);
8319       Sizes.push_back(
8320           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8321                                     CGF.Int64Ty, /*isSigned=*/true));
8322       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8323       return;
8324     }
8325 
8326     using MapData =
8327         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8328                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8329     SmallVector<MapData, 4> DeclComponentLists;
8330     assert(CurDir.is<const OMPExecutableDirective *>() &&
8331            "Expect a executable directive");
8332     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8333     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8334       for (const auto &L : C->decl_component_lists(VD)) {
8335         assert(L.first == VD &&
8336                "We got information for the wrong declaration??");
8337         assert(!L.second.empty() &&
8338                "Not expecting declaration with no component lists.");
8339         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8340                                         C->getMapTypeModifiers(),
8341                                         C->isImplicit());
8342       }
8343     }
8344 
8345     // Find overlapping elements (including the offset from the base element).
8346     llvm::SmallDenseMap<
8347         const MapData *,
8348         llvm::SmallVector<
8349             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8350         4>
8351         OverlappedData;
8352     size_t Count = 0;
8353     for (const MapData &L : DeclComponentLists) {
8354       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8355       OpenMPMapClauseKind MapType;
8356       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8357       bool IsImplicit;
8358       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8359       ++Count;
8360       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8361         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8362         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8363         auto CI = Components.rbegin();
8364         auto CE = Components.rend();
8365         auto SI = Components1.rbegin();
8366         auto SE = Components1.rend();
8367         for (; CI != CE && SI != SE; ++CI, ++SI) {
8368           if (CI->getAssociatedExpression()->getStmtClass() !=
8369               SI->getAssociatedExpression()->getStmtClass())
8370             break;
8371           // Are we dealing with different variables/fields?
8372           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8373             break;
8374         }
8375         // Found overlapping if, at least for one component, reached the head of
8376         // the components list.
8377         if (CI == CE || SI == SE) {
8378           assert((CI != CE || SI != SE) &&
8379                  "Unexpected full match of the mapping components.");
8380           const MapData &BaseData = CI == CE ? L : L1;
8381           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8382               SI == SE ? Components : Components1;
8383           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8384           OverlappedElements.getSecond().push_back(SubData);
8385         }
8386       }
8387     }
8388     // Sort the overlapped elements for each item.
8389     llvm::SmallVector<const FieldDecl *, 4> Layout;
8390     if (!OverlappedData.empty()) {
8391       if (const auto *CRD =
8392               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8393         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8394       else {
8395         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8396         Layout.append(RD->field_begin(), RD->field_end());
8397       }
8398     }
8399     for (auto &Pair : OverlappedData) {
8400       llvm::sort(
8401           Pair.getSecond(),
8402           [&Layout](
8403               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8404               OMPClauseMappableExprCommon::MappableExprComponentListRef
8405                   Second) {
8406             auto CI = First.rbegin();
8407             auto CE = First.rend();
8408             auto SI = Second.rbegin();
8409             auto SE = Second.rend();
8410             for (; CI != CE && SI != SE; ++CI, ++SI) {
8411               if (CI->getAssociatedExpression()->getStmtClass() !=
8412                   SI->getAssociatedExpression()->getStmtClass())
8413                 break;
8414               // Are we dealing with different variables/fields?
8415               if (CI->getAssociatedDeclaration() !=
8416                   SI->getAssociatedDeclaration())
8417                 break;
8418             }
8419 
8420             // Lists contain the same elements.
8421             if (CI == CE && SI == SE)
8422               return false;
8423 
8424             // List with less elements is less than list with more elements.
8425             if (CI == CE || SI == SE)
8426               return CI == CE;
8427 
8428             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8429             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8430             if (FD1->getParent() == FD2->getParent())
8431               return FD1->getFieldIndex() < FD2->getFieldIndex();
8432             const auto It =
8433                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8434                   return FD == FD1 || FD == FD2;
8435                 });
8436             return *It == FD1;
8437           });
8438     }
8439 
8440     // Associated with a capture, because the mapping flags depend on it.
8441     // Go through all of the elements with the overlapped elements.
8442     for (const auto &Pair : OverlappedData) {
8443       const MapData &L = *Pair.getFirst();
8444       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8445       OpenMPMapClauseKind MapType;
8446       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8447       bool IsImplicit;
8448       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8449       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8450           OverlappedComponents = Pair.getSecond();
8451       bool IsFirstComponentList = true;
8452       generateInfoForComponentList(MapType, MapModifiers, Components,
8453                                    BasePointers, Pointers, Sizes, Types,
8454                                    PartialStruct, IsFirstComponentList,
8455                                    IsImplicit, OverlappedComponents);
8456     }
8457     // Go through other elements without overlapped elements.
8458     bool IsFirstComponentList = OverlappedData.empty();
8459     for (const MapData &L : DeclComponentLists) {
8460       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8461       OpenMPMapClauseKind MapType;
8462       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8463       bool IsImplicit;
8464       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8465       auto It = OverlappedData.find(&L);
8466       if (It == OverlappedData.end())
8467         generateInfoForComponentList(MapType, MapModifiers, Components,
8468                                      BasePointers, Pointers, Sizes, Types,
8469                                      PartialStruct, IsFirstComponentList,
8470                                      IsImplicit);
8471       IsFirstComponentList = false;
8472     }
8473   }
8474 
8475   /// Generate the base pointers, section pointers, sizes and map types
8476   /// associated with the declare target link variables.
8477   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8478                                         MapValuesArrayTy &Pointers,
8479                                         MapValuesArrayTy &Sizes,
8480                                         MapFlagsArrayTy &Types) const {
8481     assert(CurDir.is<const OMPExecutableDirective *>() &&
8482            "Expect a executable directive");
8483     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8484     // Map other list items in the map clause which are not captured variables
8485     // but "declare target link" global variables.
8486     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8487       for (const auto &L : C->component_lists()) {
8488         if (!L.first)
8489           continue;
8490         const auto *VD = dyn_cast<VarDecl>(L.first);
8491         if (!VD)
8492           continue;
8493         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8494             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8495         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8496             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8497           continue;
8498         StructRangeInfoTy PartialStruct;
8499         generateInfoForComponentList(
8500             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8501             Pointers, Sizes, Types, PartialStruct,
8502             /*IsFirstComponentList=*/true, C->isImplicit());
8503         assert(!PartialStruct.Base.isValid() &&
8504                "No partial structs for declare target link expected.");
8505       }
8506     }
8507   }
8508 
8509   /// Generate the default map information for a given capture \a CI,
8510   /// record field declaration \a RI and captured value \a CV.
8511   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8512                               const FieldDecl &RI, llvm::Value *CV,
8513                               MapBaseValuesArrayTy &CurBasePointers,
8514                               MapValuesArrayTy &CurPointers,
8515                               MapValuesArrayTy &CurSizes,
8516                               MapFlagsArrayTy &CurMapTypes) const {
8517     bool IsImplicit = true;
8518     // Do the default mapping.
8519     if (CI.capturesThis()) {
8520       CurBasePointers.push_back(CV);
8521       CurPointers.push_back(CV);
8522       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8523       CurSizes.push_back(
8524           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8525                                     CGF.Int64Ty, /*isSigned=*/true));
8526       // Default map type.
8527       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8528     } else if (CI.capturesVariableByCopy()) {
8529       CurBasePointers.push_back(CV);
8530       CurPointers.push_back(CV);
8531       if (!RI.getType()->isAnyPointerType()) {
8532         // We have to signal to the runtime captures passed by value that are
8533         // not pointers.
8534         CurMapTypes.push_back(OMP_MAP_LITERAL);
8535         CurSizes.push_back(CGF.Builder.CreateIntCast(
8536             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8537       } else {
8538         // Pointers are implicitly mapped with a zero size and no flags
8539         // (other than first map that is added for all implicit maps).
8540         CurMapTypes.push_back(OMP_MAP_NONE);
8541         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8542       }
8543       const VarDecl *VD = CI.getCapturedVar();
8544       auto I = FirstPrivateDecls.find(VD);
8545       if (I != FirstPrivateDecls.end())
8546         IsImplicit = I->getSecond();
8547     } else {
8548       assert(CI.capturesVariable() && "Expected captured reference.");
8549       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8550       QualType ElementType = PtrTy->getPointeeType();
8551       CurSizes.push_back(CGF.Builder.CreateIntCast(
8552           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8553       // The default map type for a scalar/complex type is 'to' because by
8554       // default the value doesn't have to be retrieved. For an aggregate
8555       // type, the default is 'tofrom'.
8556       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8557       const VarDecl *VD = CI.getCapturedVar();
8558       auto I = FirstPrivateDecls.find(VD);
8559       if (I != FirstPrivateDecls.end() &&
8560           VD->getType().isConstant(CGF.getContext())) {
8561         llvm::Constant *Addr =
8562             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8563         // Copy the value of the original variable to the new global copy.
8564         CGF.Builder.CreateMemCpy(
8565             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8566             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8567             CurSizes.back(), /*IsVolatile=*/false);
8568         // Use new global variable as the base pointers.
8569         CurBasePointers.push_back(Addr);
8570         CurPointers.push_back(Addr);
8571       } else {
8572         CurBasePointers.push_back(CV);
8573         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8574           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8575               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8576               AlignmentSource::Decl));
8577           CurPointers.push_back(PtrAddr.getPointer());
8578         } else {
8579           CurPointers.push_back(CV);
8580         }
8581       }
8582       if (I != FirstPrivateDecls.end())
8583         IsImplicit = I->getSecond();
8584     }
8585     // Every default map produces a single argument which is a target parameter.
8586     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8587 
8588     // Add flag stating this is an implicit map.
8589     if (IsImplicit)
8590       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8591   }
8592 };
8593 } // anonymous namespace
8594 
8595 /// Emit the arrays used to pass the captures and map information to the
8596 /// offloading runtime library. If there is no map or capture information,
8597 /// return nullptr by reference.
8598 static void
8599 emitOffloadingArrays(CodeGenFunction &CGF,
8600                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8601                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8602                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8603                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8604                      CGOpenMPRuntime::TargetDataInfo &Info) {
8605   CodeGenModule &CGM = CGF.CGM;
8606   ASTContext &Ctx = CGF.getContext();
8607 
8608   // Reset the array information.
8609   Info.clearArrayInfo();
8610   Info.NumberOfPtrs = BasePointers.size();
8611 
8612   if (Info.NumberOfPtrs) {
8613     // Detect if we have any capture size requiring runtime evaluation of the
8614     // size so that a constant array could be eventually used.
8615     bool hasRuntimeEvaluationCaptureSize = false;
8616     for (llvm::Value *S : Sizes)
8617       if (!isa<llvm::Constant>(S)) {
8618         hasRuntimeEvaluationCaptureSize = true;
8619         break;
8620       }
8621 
8622     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8623     QualType PointerArrayType =
8624         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
8625                                  /*IndexTypeQuals=*/0);
8626 
8627     Info.BasePointersArray =
8628         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8629     Info.PointersArray =
8630         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8631 
8632     // If we don't have any VLA types or other types that require runtime
8633     // evaluation, we can use a constant array for the map sizes, otherwise we
8634     // need to fill up the arrays as we do for the pointers.
8635     QualType Int64Ty =
8636         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8637     if (hasRuntimeEvaluationCaptureSize) {
8638       QualType SizeArrayType =
8639           Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal,
8640                                    /*IndexTypeQuals=*/0);
8641       Info.SizesArray =
8642           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8643     } else {
8644       // We expect all the sizes to be constant, so we collect them to create
8645       // a constant array.
8646       SmallVector<llvm::Constant *, 16> ConstSizes;
8647       for (llvm::Value *S : Sizes)
8648         ConstSizes.push_back(cast<llvm::Constant>(S));
8649 
8650       auto *SizesArrayInit = llvm::ConstantArray::get(
8651           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8652       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8653       auto *SizesArrayGbl = new llvm::GlobalVariable(
8654           CGM.getModule(), SizesArrayInit->getType(),
8655           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8656           SizesArrayInit, Name);
8657       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8658       Info.SizesArray = SizesArrayGbl;
8659     }
8660 
8661     // The map types are always constant so we don't need to generate code to
8662     // fill arrays. Instead, we create an array constant.
8663     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8664     llvm::copy(MapTypes, Mapping.begin());
8665     llvm::Constant *MapTypesArrayInit =
8666         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8667     std::string MaptypesName =
8668         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8669     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8670         CGM.getModule(), MapTypesArrayInit->getType(),
8671         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8672         MapTypesArrayInit, MaptypesName);
8673     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8674     Info.MapTypesArray = MapTypesArrayGbl;
8675 
8676     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8677       llvm::Value *BPVal = *BasePointers[I];
8678       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8679           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8680           Info.BasePointersArray, 0, I);
8681       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8682           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8683       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8684       CGF.Builder.CreateStore(BPVal, BPAddr);
8685 
8686       if (Info.requiresDevicePointerInfo())
8687         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8688           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8689 
8690       llvm::Value *PVal = Pointers[I];
8691       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8692           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8693           Info.PointersArray, 0, I);
8694       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8695           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8696       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8697       CGF.Builder.CreateStore(PVal, PAddr);
8698 
8699       if (hasRuntimeEvaluationCaptureSize) {
8700         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8701             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8702             Info.SizesArray,
8703             /*Idx0=*/0,
8704             /*Idx1=*/I);
8705         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8706         CGF.Builder.CreateStore(
8707             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8708             SAddr);
8709       }
8710     }
8711   }
8712 }
8713 
8714 /// Emit the arguments to be passed to the runtime library based on the
8715 /// arrays of pointers, sizes and map types.
8716 static void emitOffloadingArraysArgument(
8717     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8718     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8719     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8720   CodeGenModule &CGM = CGF.CGM;
8721   if (Info.NumberOfPtrs) {
8722     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8723         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8724         Info.BasePointersArray,
8725         /*Idx0=*/0, /*Idx1=*/0);
8726     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8727         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8728         Info.PointersArray,
8729         /*Idx0=*/0,
8730         /*Idx1=*/0);
8731     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8732         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8733         /*Idx0=*/0, /*Idx1=*/0);
8734     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8735         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8736         Info.MapTypesArray,
8737         /*Idx0=*/0,
8738         /*Idx1=*/0);
8739   } else {
8740     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8741     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8742     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8743     MapTypesArrayArg =
8744         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8745   }
8746 }
8747 
8748 /// Check for inner distribute directive.
8749 static const OMPExecutableDirective *
8750 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8751   const auto *CS = D.getInnermostCapturedStmt();
8752   const auto *Body =
8753       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8754   const Stmt *ChildStmt =
8755       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8756 
8757   if (const auto *NestedDir =
8758           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8759     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8760     switch (D.getDirectiveKind()) {
8761     case OMPD_target:
8762       if (isOpenMPDistributeDirective(DKind))
8763         return NestedDir;
8764       if (DKind == OMPD_teams) {
8765         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8766             /*IgnoreCaptured=*/true);
8767         if (!Body)
8768           return nullptr;
8769         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8770         if (const auto *NND =
8771                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8772           DKind = NND->getDirectiveKind();
8773           if (isOpenMPDistributeDirective(DKind))
8774             return NND;
8775         }
8776       }
8777       return nullptr;
8778     case OMPD_target_teams:
8779       if (isOpenMPDistributeDirective(DKind))
8780         return NestedDir;
8781       return nullptr;
8782     case OMPD_target_parallel:
8783     case OMPD_target_simd:
8784     case OMPD_target_parallel_for:
8785     case OMPD_target_parallel_for_simd:
8786       return nullptr;
8787     case OMPD_target_teams_distribute:
8788     case OMPD_target_teams_distribute_simd:
8789     case OMPD_target_teams_distribute_parallel_for:
8790     case OMPD_target_teams_distribute_parallel_for_simd:
8791     case OMPD_parallel:
8792     case OMPD_for:
8793     case OMPD_parallel_for:
8794     case OMPD_parallel_sections:
8795     case OMPD_for_simd:
8796     case OMPD_parallel_for_simd:
8797     case OMPD_cancel:
8798     case OMPD_cancellation_point:
8799     case OMPD_ordered:
8800     case OMPD_threadprivate:
8801     case OMPD_allocate:
8802     case OMPD_task:
8803     case OMPD_simd:
8804     case OMPD_sections:
8805     case OMPD_section:
8806     case OMPD_single:
8807     case OMPD_master:
8808     case OMPD_critical:
8809     case OMPD_taskyield:
8810     case OMPD_barrier:
8811     case OMPD_taskwait:
8812     case OMPD_taskgroup:
8813     case OMPD_atomic:
8814     case OMPD_flush:
8815     case OMPD_teams:
8816     case OMPD_target_data:
8817     case OMPD_target_exit_data:
8818     case OMPD_target_enter_data:
8819     case OMPD_distribute:
8820     case OMPD_distribute_simd:
8821     case OMPD_distribute_parallel_for:
8822     case OMPD_distribute_parallel_for_simd:
8823     case OMPD_teams_distribute:
8824     case OMPD_teams_distribute_simd:
8825     case OMPD_teams_distribute_parallel_for:
8826     case OMPD_teams_distribute_parallel_for_simd:
8827     case OMPD_target_update:
8828     case OMPD_declare_simd:
8829     case OMPD_declare_target:
8830     case OMPD_end_declare_target:
8831     case OMPD_declare_reduction:
8832     case OMPD_declare_mapper:
8833     case OMPD_taskloop:
8834     case OMPD_taskloop_simd:
8835     case OMPD_requires:
8836     case OMPD_unknown:
8837       llvm_unreachable("Unexpected directive.");
8838     }
8839   }
8840 
8841   return nullptr;
8842 }
8843 
8844 /// Emit the user-defined mapper function. The code generation follows the
8845 /// pattern in the example below.
8846 /// \code
8847 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8848 ///                                           void *base, void *begin,
8849 ///                                           int64_t size, int64_t type) {
8850 ///   // Allocate space for an array section first.
8851 ///   if (size > 1 && !maptype.IsDelete)
8852 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8853 ///                                 size*sizeof(Ty), clearToFrom(type));
8854 ///   // Map members.
8855 ///   for (unsigned i = 0; i < size; i++) {
8856 ///     // For each component specified by this mapper:
8857 ///     for (auto c : all_components) {
8858 ///       if (c.hasMapper())
8859 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8860 ///                       c.arg_type);
8861 ///       else
8862 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8863 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8864 ///     }
8865 ///   }
8866 ///   // Delete the array section.
8867 ///   if (size > 1 && maptype.IsDelete)
8868 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8869 ///                                 size*sizeof(Ty), clearToFrom(type));
8870 /// }
8871 /// \endcode
8872 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8873                                             CodeGenFunction *CGF) {
8874   if (UDMMap.count(D) > 0)
8875     return;
8876   ASTContext &C = CGM.getContext();
8877   QualType Ty = D->getType();
8878   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8879   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8880   auto *MapperVarDecl =
8881       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8882   SourceLocation Loc = D->getLocation();
8883   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8884 
8885   // Prepare mapper function arguments and attributes.
8886   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8887                               C.VoidPtrTy, ImplicitParamDecl::Other);
8888   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8889                             ImplicitParamDecl::Other);
8890   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8891                              C.VoidPtrTy, ImplicitParamDecl::Other);
8892   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8893                             ImplicitParamDecl::Other);
8894   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8895                             ImplicitParamDecl::Other);
8896   FunctionArgList Args;
8897   Args.push_back(&HandleArg);
8898   Args.push_back(&BaseArg);
8899   Args.push_back(&BeginArg);
8900   Args.push_back(&SizeArg);
8901   Args.push_back(&TypeArg);
8902   const CGFunctionInfo &FnInfo =
8903       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8904   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8905   SmallString<64> TyStr;
8906   llvm::raw_svector_ostream Out(TyStr);
8907   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8908   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8909   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8910                                     Name, &CGM.getModule());
8911   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8912   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8913   // Start the mapper function code generation.
8914   CodeGenFunction MapperCGF(CGM);
8915   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8916   // Compute the starting and end addreses of array elements.
8917   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8918       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8919       C.getPointerType(Int64Ty), Loc);
8920   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8921       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8922       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8923   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8924   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8925       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8926       C.getPointerType(Int64Ty), Loc);
8927   // Prepare common arguments for array initiation and deletion.
8928   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8929       MapperCGF.GetAddrOfLocalVar(&HandleArg),
8930       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8931   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8932       MapperCGF.GetAddrOfLocalVar(&BaseArg),
8933       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8934   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8935       MapperCGF.GetAddrOfLocalVar(&BeginArg),
8936       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8937 
8938   // Emit array initiation if this is an array section and \p MapType indicates
8939   // that memory allocation is required.
8940   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8941   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8942                              ElementSize, HeadBB, /*IsInit=*/true);
8943 
8944   // Emit a for loop to iterate through SizeArg of elements and map all of them.
8945 
8946   // Emit the loop header block.
8947   MapperCGF.EmitBlock(HeadBB);
8948   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8949   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8950   // Evaluate whether the initial condition is satisfied.
8951   llvm::Value *IsEmpty =
8952       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8953   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8954   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8955 
8956   // Emit the loop body block.
8957   MapperCGF.EmitBlock(BodyBB);
8958   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8959       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8960   PtrPHI->addIncoming(PtrBegin, EntryBB);
8961   Address PtrCurrent =
8962       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
8963                           .getAlignment()
8964                           .alignmentOfArrayElement(ElementSize));
8965   // Privatize the declared variable of mapper to be the current array element.
8966   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
8967   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
8968     return MapperCGF
8969         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
8970         .getAddress();
8971   });
8972   (void)Scope.Privatize();
8973 
8974   // Get map clause information. Fill up the arrays with all mapped variables.
8975   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8976   MappableExprsHandler::MapValuesArrayTy Pointers;
8977   MappableExprsHandler::MapValuesArrayTy Sizes;
8978   MappableExprsHandler::MapFlagsArrayTy MapTypes;
8979   MappableExprsHandler MEHandler(*D, MapperCGF);
8980   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
8981 
8982   // Call the runtime API __tgt_mapper_num_components to get the number of
8983   // pre-existing components.
8984   llvm::Value *OffloadingArgs[] = {Handle};
8985   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
8986       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
8987   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
8988       PreviousSize,
8989       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
8990 
8991   // Fill up the runtime mapper handle for all components.
8992   for (unsigned I = 0; I < BasePointers.size(); ++I) {
8993     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
8994         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8995     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
8996         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8997     llvm::Value *CurSizeArg = Sizes[I];
8998 
8999     // Extract the MEMBER_OF field from the map type.
9000     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9001     MapperCGF.EmitBlock(MemberBB);
9002     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9003     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9004         OriMapType,
9005         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9006     llvm::BasicBlock *MemberCombineBB =
9007         MapperCGF.createBasicBlock("omp.member.combine");
9008     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9009     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9010     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9011     // Add the number of pre-existing components to the MEMBER_OF field if it
9012     // is valid.
9013     MapperCGF.EmitBlock(MemberCombineBB);
9014     llvm::Value *CombinedMember =
9015         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9016     // Do nothing if it is not a member of previous components.
9017     MapperCGF.EmitBlock(TypeBB);
9018     llvm::PHINode *MemberMapType =
9019         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9020     MemberMapType->addIncoming(OriMapType, MemberBB);
9021     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9022 
9023     // Combine the map type inherited from user-defined mapper with that
9024     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9025     // bits of the \a MapType, which is the input argument of the mapper
9026     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9027     // bits of MemberMapType.
9028     // [OpenMP 5.0], 1.2.6. map-type decay.
9029     //        | alloc |  to   | from  | tofrom | release | delete
9030     // ----------------------------------------------------------
9031     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9032     // to     | alloc |  to   | alloc |   to   | release | delete
9033     // from   | alloc | alloc | from  |  from  | release | delete
9034     // tofrom | alloc |  to   | from  | tofrom | release | delete
9035     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9036         MapType,
9037         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9038                                    MappableExprsHandler::OMP_MAP_FROM));
9039     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9040     llvm::BasicBlock *AllocElseBB =
9041         MapperCGF.createBasicBlock("omp.type.alloc.else");
9042     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9043     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9044     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9045     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9046     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9047     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9048     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9049     MapperCGF.EmitBlock(AllocBB);
9050     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9051         MemberMapType,
9052         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9053                                      MappableExprsHandler::OMP_MAP_FROM)));
9054     MapperCGF.Builder.CreateBr(EndBB);
9055     MapperCGF.EmitBlock(AllocElseBB);
9056     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9057         LeftToFrom,
9058         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9059     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9060     // In case of to, clear OMP_MAP_FROM.
9061     MapperCGF.EmitBlock(ToBB);
9062     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9063         MemberMapType,
9064         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9065     MapperCGF.Builder.CreateBr(EndBB);
9066     MapperCGF.EmitBlock(ToElseBB);
9067     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9068         LeftToFrom,
9069         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9070     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9071     // In case of from, clear OMP_MAP_TO.
9072     MapperCGF.EmitBlock(FromBB);
9073     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9074         MemberMapType,
9075         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9076     // In case of tofrom, do nothing.
9077     MapperCGF.EmitBlock(EndBB);
9078     llvm::PHINode *CurMapType =
9079         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9080     CurMapType->addIncoming(AllocMapType, AllocBB);
9081     CurMapType->addIncoming(ToMapType, ToBB);
9082     CurMapType->addIncoming(FromMapType, FromBB);
9083     CurMapType->addIncoming(MemberMapType, ToElseBB);
9084 
9085     // TODO: call the corresponding mapper function if a user-defined mapper is
9086     // associated with this map clause.
9087     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9088     // data structure.
9089     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9090                                      CurSizeArg, CurMapType};
9091     MapperCGF.EmitRuntimeCall(
9092         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9093         OffloadingArgs);
9094   }
9095 
9096   // Update the pointer to point to the next element that needs to be mapped,
9097   // and check whether we have mapped all elements.
9098   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9099       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9100   PtrPHI->addIncoming(PtrNext, BodyBB);
9101   llvm::Value *IsDone =
9102       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9103   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9104   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9105 
9106   MapperCGF.EmitBlock(ExitBB);
9107   // Emit array deletion if this is an array section and \p MapType indicates
9108   // that deletion is required.
9109   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9110                              ElementSize, DoneBB, /*IsInit=*/false);
9111 
9112   // Emit the function exit block.
9113   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9114   MapperCGF.FinishFunction();
9115   UDMMap.try_emplace(D, Fn);
9116   if (CGF) {
9117     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9118     Decls.second.push_back(D);
9119   }
9120 }
9121 
9122 /// Emit the array initialization or deletion portion for user-defined mapper
9123 /// code generation. First, it evaluates whether an array section is mapped and
9124 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9125 /// true, and \a MapType indicates to not delete this array, array
9126 /// initialization code is generated. If \a IsInit is false, and \a MapType
9127 /// indicates to not this array, array deletion code is generated.
9128 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9129     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9130     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9131     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9132   StringRef Prefix = IsInit ? ".init" : ".del";
9133 
9134   // Evaluate if this is an array section.
9135   llvm::BasicBlock *IsDeleteBB =
9136       MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
9137   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
9138   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9139       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9140   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9141 
9142   // Evaluate if we are going to delete this section.
9143   MapperCGF.EmitBlock(IsDeleteBB);
9144   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9145       MapType,
9146       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9147   llvm::Value *DeleteCond;
9148   if (IsInit) {
9149     DeleteCond = MapperCGF.Builder.CreateIsNull(
9150         DeleteBit, "omp.array" + Prefix + ".delete");
9151   } else {
9152     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9153         DeleteBit, "omp.array" + Prefix + ".delete");
9154   }
9155   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9156 
9157   MapperCGF.EmitBlock(BodyBB);
9158   // Get the array size by multiplying element size and element number (i.e., \p
9159   // Size).
9160   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9161       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9162   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9163   // memory allocation/deletion purpose only.
9164   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9165       MapType,
9166       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9167                                    MappableExprsHandler::OMP_MAP_FROM)));
9168   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9169   // data structure.
9170   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9171   MapperCGF.EmitRuntimeCall(
9172       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9173 }
9174 
9175 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9176     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
9177     const llvm::function_ref<llvm::Value *(
9178         CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
9179   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9180   const OMPExecutableDirective *TD = &D;
9181   // Get nested teams distribute kind directive, if any.
9182   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9183     TD = getNestedDistributeDirective(CGM.getContext(), D);
9184   if (!TD)
9185     return;
9186   const auto *LD = cast<OMPLoopDirective>(TD);
9187   auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
9188                                                      PrePostActionTy &) {
9189     llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
9190 
9191     // Emit device ID if any.
9192     llvm::Value *DeviceID;
9193     if (Device)
9194       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9195                                            CGF.Int64Ty, /*isSigned=*/true);
9196     else
9197       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9198 
9199     llvm::Value *Args[] = {DeviceID, NumIterations};
9200     CGF.EmitRuntimeCall(
9201         createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9202   };
9203   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9204 }
9205 
9206 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
9207                                      const OMPExecutableDirective &D,
9208                                      llvm::Function *OutlinedFn,
9209                                      llvm::Value *OutlinedFnID,
9210                                      const Expr *IfCond, const Expr *Device) {
9211   if (!CGF.HaveInsertPoint())
9212     return;
9213 
9214   assert(OutlinedFn && "Invalid outlined function!");
9215 
9216   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9217   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9218   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9219   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9220                                             PrePostActionTy &) {
9221     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9222   };
9223   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9224 
9225   CodeGenFunction::OMPTargetDataInfo InputInfo;
9226   llvm::Value *MapTypesArray = nullptr;
9227   // Fill up the pointer arrays and transfer execution to the device.
9228   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9229                     &MapTypesArray, &CS, RequiresOuterTask,
9230                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
9231     // On top of the arrays that were filled up, the target offloading call
9232     // takes as arguments the device id as well as the host pointer. The host
9233     // pointer is used by the runtime library to identify the current target
9234     // region, so it only has to be unique and not necessarily point to
9235     // anything. It could be the pointer to the outlined function that
9236     // implements the target region, but we aren't using that so that the
9237     // compiler doesn't need to keep that, and could therefore inline the host
9238     // function if proven worthwhile during optimization.
9239 
9240     // From this point on, we need to have an ID of the target region defined.
9241     assert(OutlinedFnID && "Invalid outlined function ID!");
9242 
9243     // Emit device ID if any.
9244     llvm::Value *DeviceID;
9245     if (Device) {
9246       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9247                                            CGF.Int64Ty, /*isSigned=*/true);
9248     } else {
9249       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9250     }
9251 
9252     // Emit the number of elements in the offloading arrays.
9253     llvm::Value *PointerNum =
9254         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9255 
9256     // Return value of the runtime offloading call.
9257     llvm::Value *Return;
9258 
9259     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9260     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9261 
9262     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9263     // The target region is an outlined function launched by the runtime
9264     // via calls __tgt_target() or __tgt_target_teams().
9265     //
9266     // __tgt_target() launches a target region with one team and one thread,
9267     // executing a serial region.  This master thread may in turn launch
9268     // more threads within its team upon encountering a parallel region,
9269     // however, no additional teams can be launched on the device.
9270     //
9271     // __tgt_target_teams() launches a target region with one or more teams,
9272     // each with one or more threads.  This call is required for target
9273     // constructs such as:
9274     //  'target teams'
9275     //  'target' / 'teams'
9276     //  'target teams distribute parallel for'
9277     //  'target parallel'
9278     // and so on.
9279     //
9280     // Note that on the host and CPU targets, the runtime implementation of
9281     // these calls simply call the outlined function without forking threads.
9282     // The outlined functions themselves have runtime calls to
9283     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9284     // the compiler in emitTeamsCall() and emitParallelCall().
9285     //
9286     // In contrast, on the NVPTX target, the implementation of
9287     // __tgt_target_teams() launches a GPU kernel with the requested number
9288     // of teams and threads so no additional calls to the runtime are required.
9289     if (NumTeams) {
9290       // If we have NumTeams defined this means that we have an enclosed teams
9291       // region. Therefore we also expect to have NumThreads defined. These two
9292       // values should be defined in the presence of a teams directive,
9293       // regardless of having any clauses associated. If the user is using teams
9294       // but no clauses, these two values will be the default that should be
9295       // passed to the runtime library - a 32-bit integer with the value zero.
9296       assert(NumThreads && "Thread limit expression should be available along "
9297                            "with number of teams.");
9298       llvm::Value *OffloadingArgs[] = {DeviceID,
9299                                        OutlinedFnID,
9300                                        PointerNum,
9301                                        InputInfo.BasePointersArray.getPointer(),
9302                                        InputInfo.PointersArray.getPointer(),
9303                                        InputInfo.SizesArray.getPointer(),
9304                                        MapTypesArray,
9305                                        NumTeams,
9306                                        NumThreads};
9307       Return = CGF.EmitRuntimeCall(
9308           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9309                                           : OMPRTL__tgt_target_teams),
9310           OffloadingArgs);
9311     } else {
9312       llvm::Value *OffloadingArgs[] = {DeviceID,
9313                                        OutlinedFnID,
9314                                        PointerNum,
9315                                        InputInfo.BasePointersArray.getPointer(),
9316                                        InputInfo.PointersArray.getPointer(),
9317                                        InputInfo.SizesArray.getPointer(),
9318                                        MapTypesArray};
9319       Return = CGF.EmitRuntimeCall(
9320           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9321                                           : OMPRTL__tgt_target),
9322           OffloadingArgs);
9323     }
9324 
9325     // Check the error code and execute the host version if required.
9326     llvm::BasicBlock *OffloadFailedBlock =
9327         CGF.createBasicBlock("omp_offload.failed");
9328     llvm::BasicBlock *OffloadContBlock =
9329         CGF.createBasicBlock("omp_offload.cont");
9330     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9331     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9332 
9333     CGF.EmitBlock(OffloadFailedBlock);
9334     if (RequiresOuterTask) {
9335       CapturedVars.clear();
9336       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9337     }
9338     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9339     CGF.EmitBranch(OffloadContBlock);
9340 
9341     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9342   };
9343 
9344   // Notify that the host version must be executed.
9345   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9346                     RequiresOuterTask](CodeGenFunction &CGF,
9347                                        PrePostActionTy &) {
9348     if (RequiresOuterTask) {
9349       CapturedVars.clear();
9350       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9351     }
9352     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9353   };
9354 
9355   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9356                           &CapturedVars, RequiresOuterTask,
9357                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9358     // Fill up the arrays with all the captured variables.
9359     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9360     MappableExprsHandler::MapValuesArrayTy Pointers;
9361     MappableExprsHandler::MapValuesArrayTy Sizes;
9362     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9363 
9364     // Get mappable expression information.
9365     MappableExprsHandler MEHandler(D, CGF);
9366     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9367 
9368     auto RI = CS.getCapturedRecordDecl()->field_begin();
9369     auto CV = CapturedVars.begin();
9370     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9371                                               CE = CS.capture_end();
9372          CI != CE; ++CI, ++RI, ++CV) {
9373       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9374       MappableExprsHandler::MapValuesArrayTy CurPointers;
9375       MappableExprsHandler::MapValuesArrayTy CurSizes;
9376       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9377       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9378 
9379       // VLA sizes are passed to the outlined region by copy and do not have map
9380       // information associated.
9381       if (CI->capturesVariableArrayType()) {
9382         CurBasePointers.push_back(*CV);
9383         CurPointers.push_back(*CV);
9384         CurSizes.push_back(CGF.Builder.CreateIntCast(
9385             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9386         // Copy to the device as an argument. No need to retrieve it.
9387         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9388                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9389                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9390       } else {
9391         // If we have any information in the map clause, we use it, otherwise we
9392         // just do a default mapping.
9393         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9394                                          CurSizes, CurMapTypes, PartialStruct);
9395         if (CurBasePointers.empty())
9396           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9397                                            CurPointers, CurSizes, CurMapTypes);
9398         // Generate correct mapping for variables captured by reference in
9399         // lambdas.
9400         if (CI->capturesVariable())
9401           MEHandler.generateInfoForLambdaCaptures(
9402               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9403               CurMapTypes, LambdaPointers);
9404       }
9405       // We expect to have at least an element of information for this capture.
9406       assert(!CurBasePointers.empty() &&
9407              "Non-existing map pointer for capture!");
9408       assert(CurBasePointers.size() == CurPointers.size() &&
9409              CurBasePointers.size() == CurSizes.size() &&
9410              CurBasePointers.size() == CurMapTypes.size() &&
9411              "Inconsistent map information sizes!");
9412 
9413       // If there is an entry in PartialStruct it means we have a struct with
9414       // individual members mapped. Emit an extra combined entry.
9415       if (PartialStruct.Base.isValid())
9416         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9417                                     CurMapTypes, PartialStruct);
9418 
9419       // We need to append the results of this capture to what we already have.
9420       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9421       Pointers.append(CurPointers.begin(), CurPointers.end());
9422       Sizes.append(CurSizes.begin(), CurSizes.end());
9423       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9424     }
9425     // Adjust MEMBER_OF flags for the lambdas captures.
9426     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9427                                               Pointers, MapTypes);
9428     // Map other list items in the map clause which are not captured variables
9429     // but "declare target link" global variables.
9430     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9431                                                MapTypes);
9432 
9433     TargetDataInfo Info;
9434     // Fill up the arrays and create the arguments.
9435     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9436     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9437                                  Info.PointersArray, Info.SizesArray,
9438                                  Info.MapTypesArray, Info);
9439     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9440     InputInfo.BasePointersArray =
9441         Address(Info.BasePointersArray, CGM.getPointerAlign());
9442     InputInfo.PointersArray =
9443         Address(Info.PointersArray, CGM.getPointerAlign());
9444     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9445     MapTypesArray = Info.MapTypesArray;
9446     if (RequiresOuterTask)
9447       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9448     else
9449       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9450   };
9451 
9452   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9453                              CodeGenFunction &CGF, PrePostActionTy &) {
9454     if (RequiresOuterTask) {
9455       CodeGenFunction::OMPTargetDataInfo InputInfo;
9456       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9457     } else {
9458       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9459     }
9460   };
9461 
9462   // If we have a target function ID it means that we need to support
9463   // offloading, otherwise, just execute on the host. We need to execute on host
9464   // regardless of the conditional in the if clause if, e.g., the user do not
9465   // specify target triples.
9466   if (OutlinedFnID) {
9467     if (IfCond) {
9468       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9469     } else {
9470       RegionCodeGenTy ThenRCG(TargetThenGen);
9471       ThenRCG(CGF);
9472     }
9473   } else {
9474     RegionCodeGenTy ElseRCG(TargetElseGen);
9475     ElseRCG(CGF);
9476   }
9477 }
9478 
9479 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9480                                                     StringRef ParentName) {
9481   if (!S)
9482     return;
9483 
9484   // Codegen OMP target directives that offload compute to the device.
9485   bool RequiresDeviceCodegen =
9486       isa<OMPExecutableDirective>(S) &&
9487       isOpenMPTargetExecutionDirective(
9488           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9489 
9490   if (RequiresDeviceCodegen) {
9491     const auto &E = *cast<OMPExecutableDirective>(S);
9492     unsigned DeviceID;
9493     unsigned FileID;
9494     unsigned Line;
9495     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9496                              FileID, Line);
9497 
9498     // Is this a target region that should not be emitted as an entry point? If
9499     // so just signal we are done with this target region.
9500     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9501                                                             ParentName, Line))
9502       return;
9503 
9504     switch (E.getDirectiveKind()) {
9505     case OMPD_target:
9506       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9507                                                    cast<OMPTargetDirective>(E));
9508       break;
9509     case OMPD_target_parallel:
9510       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9511           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9512       break;
9513     case OMPD_target_teams:
9514       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9515           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9516       break;
9517     case OMPD_target_teams_distribute:
9518       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9519           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9520       break;
9521     case OMPD_target_teams_distribute_simd:
9522       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9523           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9524       break;
9525     case OMPD_target_parallel_for:
9526       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9527           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9528       break;
9529     case OMPD_target_parallel_for_simd:
9530       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9531           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9532       break;
9533     case OMPD_target_simd:
9534       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9535           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9536       break;
9537     case OMPD_target_teams_distribute_parallel_for:
9538       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9539           CGM, ParentName,
9540           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9541       break;
9542     case OMPD_target_teams_distribute_parallel_for_simd:
9543       CodeGenFunction::
9544           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9545               CGM, ParentName,
9546               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9547       break;
9548     case OMPD_parallel:
9549     case OMPD_for:
9550     case OMPD_parallel_for:
9551     case OMPD_parallel_sections:
9552     case OMPD_for_simd:
9553     case OMPD_parallel_for_simd:
9554     case OMPD_cancel:
9555     case OMPD_cancellation_point:
9556     case OMPD_ordered:
9557     case OMPD_threadprivate:
9558     case OMPD_allocate:
9559     case OMPD_task:
9560     case OMPD_simd:
9561     case OMPD_sections:
9562     case OMPD_section:
9563     case OMPD_single:
9564     case OMPD_master:
9565     case OMPD_critical:
9566     case OMPD_taskyield:
9567     case OMPD_barrier:
9568     case OMPD_taskwait:
9569     case OMPD_taskgroup:
9570     case OMPD_atomic:
9571     case OMPD_flush:
9572     case OMPD_teams:
9573     case OMPD_target_data:
9574     case OMPD_target_exit_data:
9575     case OMPD_target_enter_data:
9576     case OMPD_distribute:
9577     case OMPD_distribute_simd:
9578     case OMPD_distribute_parallel_for:
9579     case OMPD_distribute_parallel_for_simd:
9580     case OMPD_teams_distribute:
9581     case OMPD_teams_distribute_simd:
9582     case OMPD_teams_distribute_parallel_for:
9583     case OMPD_teams_distribute_parallel_for_simd:
9584     case OMPD_target_update:
9585     case OMPD_declare_simd:
9586     case OMPD_declare_target:
9587     case OMPD_end_declare_target:
9588     case OMPD_declare_reduction:
9589     case OMPD_declare_mapper:
9590     case OMPD_taskloop:
9591     case OMPD_taskloop_simd:
9592     case OMPD_requires:
9593     case OMPD_unknown:
9594       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9595     }
9596     return;
9597   }
9598 
9599   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9600     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9601       return;
9602 
9603     scanForTargetRegionsFunctions(
9604         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9605     return;
9606   }
9607 
9608   // If this is a lambda function, look into its body.
9609   if (const auto *L = dyn_cast<LambdaExpr>(S))
9610     S = L->getBody();
9611 
9612   // Keep looking for target regions recursively.
9613   for (const Stmt *II : S->children())
9614     scanForTargetRegionsFunctions(II, ParentName);
9615 }
9616 
9617 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9618   // If emitting code for the host, we do not process FD here. Instead we do
9619   // the normal code generation.
9620   if (!CGM.getLangOpts().OpenMPIsDevice) {
9621     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9622       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9623           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9624       // Do not emit device_type(nohost) functions for the host.
9625       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9626         return true;
9627     }
9628     return false;
9629   }
9630 
9631   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9632   StringRef Name = CGM.getMangledName(GD);
9633   // Try to detect target regions in the function.
9634   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9635     scanForTargetRegionsFunctions(FD->getBody(), Name);
9636     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9637         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9638     // Do not emit device_type(nohost) functions for the host.
9639     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9640       return true;
9641   }
9642 
9643   // Do not to emit function if it is not marked as declare target.
9644   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9645          AlreadyEmittedTargetFunctions.count(Name) == 0;
9646 }
9647 
9648 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9649   if (!CGM.getLangOpts().OpenMPIsDevice)
9650     return false;
9651 
9652   // Check if there are Ctors/Dtors in this declaration and look for target
9653   // regions in it. We use the complete variant to produce the kernel name
9654   // mangling.
9655   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9656   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9657     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9658       StringRef ParentName =
9659           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9660       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9661     }
9662     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9663       StringRef ParentName =
9664           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9665       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9666     }
9667   }
9668 
9669   // Do not to emit variable if it is not marked as declare target.
9670   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9671       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9672           cast<VarDecl>(GD.getDecl()));
9673   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9674       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9675        HasRequiresUnifiedSharedMemory)) {
9676     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9677     return true;
9678   }
9679   return false;
9680 }
9681 
9682 llvm::Constant *
9683 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9684                                                 const VarDecl *VD) {
9685   assert(VD->getType().isConstant(CGM.getContext()) &&
9686          "Expected constant variable.");
9687   StringRef VarName;
9688   llvm::Constant *Addr;
9689   llvm::GlobalValue::LinkageTypes Linkage;
9690   QualType Ty = VD->getType();
9691   SmallString<128> Buffer;
9692   {
9693     unsigned DeviceID;
9694     unsigned FileID;
9695     unsigned Line;
9696     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9697                              FileID, Line);
9698     llvm::raw_svector_ostream OS(Buffer);
9699     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9700        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9701     VarName = OS.str();
9702   }
9703   Linkage = llvm::GlobalValue::InternalLinkage;
9704   Addr =
9705       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9706                                   getDefaultFirstprivateAddressSpace());
9707   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9708   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9709   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9710   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9711       VarName, Addr, VarSize,
9712       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9713   return Addr;
9714 }
9715 
9716 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9717                                                    llvm::Constant *Addr) {
9718   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9719       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9720   if (!Res) {
9721     if (CGM.getLangOpts().OpenMPIsDevice) {
9722       // Register non-target variables being emitted in device code (debug info
9723       // may cause this).
9724       StringRef VarName = CGM.getMangledName(VD);
9725       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9726     }
9727     return;
9728   }
9729   // Register declare target variables.
9730   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9731   StringRef VarName;
9732   CharUnits VarSize;
9733   llvm::GlobalValue::LinkageTypes Linkage;
9734 
9735   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9736       !HasRequiresUnifiedSharedMemory) {
9737     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9738     VarName = CGM.getMangledName(VD);
9739     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9740       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9741       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9742     } else {
9743       VarSize = CharUnits::Zero();
9744     }
9745     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9746     // Temp solution to prevent optimizations of the internal variables.
9747     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9748       std::string RefName = getName({VarName, "ref"});
9749       if (!CGM.GetGlobalValue(RefName)) {
9750         llvm::Constant *AddrRef =
9751             getOrCreateInternalVariable(Addr->getType(), RefName);
9752         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9753         GVAddrRef->setConstant(/*Val=*/true);
9754         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9755         GVAddrRef->setInitializer(Addr);
9756         CGM.addCompilerUsedGlobal(GVAddrRef);
9757       }
9758     }
9759   } else {
9760     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9761             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9762              HasRequiresUnifiedSharedMemory)) &&
9763            "Declare target attribute must link or to with unified memory.");
9764     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9765       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9766     else
9767       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9768 
9769     if (CGM.getLangOpts().OpenMPIsDevice) {
9770       VarName = Addr->getName();
9771       Addr = nullptr;
9772     } else {
9773       VarName = getAddrOfDeclareTargetVar(VD).getName();
9774       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9775     }
9776     VarSize = CGM.getPointerSize();
9777     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9778   }
9779 
9780   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9781       VarName, Addr, VarSize, Flags, Linkage);
9782 }
9783 
9784 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9785   if (isa<FunctionDecl>(GD.getDecl()) ||
9786       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9787     return emitTargetFunctions(GD);
9788 
9789   return emitTargetGlobalVariable(GD);
9790 }
9791 
9792 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9793   for (const VarDecl *VD : DeferredGlobalVariables) {
9794     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9795         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9796     if (!Res)
9797       continue;
9798     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9799         !HasRequiresUnifiedSharedMemory) {
9800       CGM.EmitGlobal(VD);
9801     } else {
9802       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9803               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9804                HasRequiresUnifiedSharedMemory)) &&
9805              "Expected link clause or to clause with unified memory.");
9806       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9807     }
9808   }
9809 }
9810 
9811 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9812     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9813   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9814          " Expected target-based directive.");
9815 }
9816 
9817 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9818     const OMPRequiresDecl *D) {
9819   for (const OMPClause *Clause : D->clauselists()) {
9820     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9821       HasRequiresUnifiedSharedMemory = true;
9822       break;
9823     }
9824   }
9825 }
9826 
9827 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9828                                                        LangAS &AS) {
9829   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9830     return false;
9831   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9832   switch(A->getAllocatorType()) {
9833   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9834   // Not supported, fallback to the default mem space.
9835   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9836   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9837   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9838   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9839   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9840   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9841   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9842     AS = LangAS::Default;
9843     return true;
9844   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9845     llvm_unreachable("Expected predefined allocator for the variables with the "
9846                      "static storage.");
9847   }
9848   return false;
9849 }
9850 
9851 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9852   return HasRequiresUnifiedSharedMemory;
9853 }
9854 
9855 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9856     CodeGenModule &CGM)
9857     : CGM(CGM) {
9858   if (CGM.getLangOpts().OpenMPIsDevice) {
9859     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9860     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9861   }
9862 }
9863 
9864 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9865   if (CGM.getLangOpts().OpenMPIsDevice)
9866     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9867 }
9868 
9869 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9870   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9871     return true;
9872 
9873   StringRef Name = CGM.getMangledName(GD);
9874   const auto *D = cast<FunctionDecl>(GD.getDecl());
9875   // Do not to emit function if it is marked as declare target as it was already
9876   // emitted.
9877   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9878     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9879       if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9880         return !F->isDeclaration();
9881       return false;
9882     }
9883     return true;
9884   }
9885 
9886   return !AlreadyEmittedTargetFunctions.insert(Name).second;
9887 }
9888 
9889 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9890   // If we don't have entries or if we are emitting code for the device, we
9891   // don't need to do anything.
9892   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9893       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9894       (OffloadEntriesInfoManager.empty() &&
9895        !HasEmittedDeclareTargetRegion &&
9896        !HasEmittedTargetRegion))
9897     return nullptr;
9898 
9899   // Create and register the function that handles the requires directives.
9900   ASTContext &C = CGM.getContext();
9901 
9902   llvm::Function *RequiresRegFn;
9903   {
9904     CodeGenFunction CGF(CGM);
9905     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9906     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9907     std::string ReqName = getName({"omp_offloading", "requires_reg"});
9908     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9909     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9910     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9911     // TODO: check for other requires clauses.
9912     // The requires directive takes effect only when a target region is
9913     // present in the compilation unit. Otherwise it is ignored and not
9914     // passed to the runtime. This avoids the runtime from throwing an error
9915     // for mismatching requires clauses across compilation units that don't
9916     // contain at least 1 target region.
9917     assert((HasEmittedTargetRegion ||
9918             HasEmittedDeclareTargetRegion ||
9919             !OffloadEntriesInfoManager.empty()) &&
9920            "Target or declare target region expected.");
9921     if (HasRequiresUnifiedSharedMemory)
9922       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9923     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9924         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9925     CGF.FinishFunction();
9926   }
9927   return RequiresRegFn;
9928 }
9929 
9930 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
9931   // If we have offloading in the current module, we need to emit the entries
9932   // now and register the offloading descriptor.
9933   createOffloadEntriesAndInfoMetadata();
9934 
9935   // Create and register the offloading binary descriptors. This is the main
9936   // entity that captures all the information about offloading in the current
9937   // compilation unit.
9938   return createOffloadingBinaryDescriptorRegistration();
9939 }
9940 
9941 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9942                                     const OMPExecutableDirective &D,
9943                                     SourceLocation Loc,
9944                                     llvm::Function *OutlinedFn,
9945                                     ArrayRef<llvm::Value *> CapturedVars) {
9946   if (!CGF.HaveInsertPoint())
9947     return;
9948 
9949   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9950   CodeGenFunction::RunCleanupsScope Scope(CGF);
9951 
9952   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9953   llvm::Value *Args[] = {
9954       RTLoc,
9955       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9956       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9957   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9958   RealArgs.append(std::begin(Args), std::end(Args));
9959   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9960 
9961   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9962   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9963 }
9964 
9965 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9966                                          const Expr *NumTeams,
9967                                          const Expr *ThreadLimit,
9968                                          SourceLocation Loc) {
9969   if (!CGF.HaveInsertPoint())
9970     return;
9971 
9972   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9973 
9974   llvm::Value *NumTeamsVal =
9975       NumTeams
9976           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9977                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9978           : CGF.Builder.getInt32(0);
9979 
9980   llvm::Value *ThreadLimitVal =
9981       ThreadLimit
9982           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9983                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9984           : CGF.Builder.getInt32(0);
9985 
9986   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9987   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9988                                      ThreadLimitVal};
9989   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9990                       PushNumTeamsArgs);
9991 }
9992 
9993 void CGOpenMPRuntime::emitTargetDataCalls(
9994     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9995     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9996   if (!CGF.HaveInsertPoint())
9997     return;
9998 
9999   // Action used to replace the default codegen action and turn privatization
10000   // off.
10001   PrePostActionTy NoPrivAction;
10002 
10003   // Generate the code for the opening of the data environment. Capture all the
10004   // arguments of the runtime call by reference because they are used in the
10005   // closing of the region.
10006   auto &&BeginThenGen = [this, &D, Device, &Info,
10007                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10008     // Fill up the arrays with all the mapped variables.
10009     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10010     MappableExprsHandler::MapValuesArrayTy Pointers;
10011     MappableExprsHandler::MapValuesArrayTy Sizes;
10012     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10013 
10014     // Get map clause information.
10015     MappableExprsHandler MCHandler(D, CGF);
10016     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10017 
10018     // Fill up the arrays and create the arguments.
10019     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10020 
10021     llvm::Value *BasePointersArrayArg = nullptr;
10022     llvm::Value *PointersArrayArg = nullptr;
10023     llvm::Value *SizesArrayArg = nullptr;
10024     llvm::Value *MapTypesArrayArg = nullptr;
10025     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10026                                  SizesArrayArg, MapTypesArrayArg, Info);
10027 
10028     // Emit device ID if any.
10029     llvm::Value *DeviceID = nullptr;
10030     if (Device) {
10031       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10032                                            CGF.Int64Ty, /*isSigned=*/true);
10033     } else {
10034       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10035     }
10036 
10037     // Emit the number of elements in the offloading arrays.
10038     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10039 
10040     llvm::Value *OffloadingArgs[] = {
10041         DeviceID,         PointerNum,    BasePointersArrayArg,
10042         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10043     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
10044                         OffloadingArgs);
10045 
10046     // If device pointer privatization is required, emit the body of the region
10047     // here. It will have to be duplicated: with and without privatization.
10048     if (!Info.CaptureDeviceAddrMap.empty())
10049       CodeGen(CGF);
10050   };
10051 
10052   // Generate code for the closing of the data region.
10053   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10054                                             PrePostActionTy &) {
10055     assert(Info.isValid() && "Invalid data environment closing arguments.");
10056 
10057     llvm::Value *BasePointersArrayArg = nullptr;
10058     llvm::Value *PointersArrayArg = nullptr;
10059     llvm::Value *SizesArrayArg = nullptr;
10060     llvm::Value *MapTypesArrayArg = nullptr;
10061     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10062                                  SizesArrayArg, MapTypesArrayArg, Info);
10063 
10064     // Emit device ID if any.
10065     llvm::Value *DeviceID = nullptr;
10066     if (Device) {
10067       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10068                                            CGF.Int64Ty, /*isSigned=*/true);
10069     } else {
10070       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10071     }
10072 
10073     // Emit the number of elements in the offloading arrays.
10074     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10075 
10076     llvm::Value *OffloadingArgs[] = {
10077         DeviceID,         PointerNum,    BasePointersArrayArg,
10078         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10079     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10080                         OffloadingArgs);
10081   };
10082 
10083   // If we need device pointer privatization, we need to emit the body of the
10084   // region with no privatization in the 'else' branch of the conditional.
10085   // Otherwise, we don't have to do anything.
10086   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10087                                                          PrePostActionTy &) {
10088     if (!Info.CaptureDeviceAddrMap.empty()) {
10089       CodeGen.setAction(NoPrivAction);
10090       CodeGen(CGF);
10091     }
10092   };
10093 
10094   // We don't have to do anything to close the region if the if clause evaluates
10095   // to false.
10096   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10097 
10098   if (IfCond) {
10099     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10100   } else {
10101     RegionCodeGenTy RCG(BeginThenGen);
10102     RCG(CGF);
10103   }
10104 
10105   // If we don't require privatization of device pointers, we emit the body in
10106   // between the runtime calls. This avoids duplicating the body code.
10107   if (Info.CaptureDeviceAddrMap.empty()) {
10108     CodeGen.setAction(NoPrivAction);
10109     CodeGen(CGF);
10110   }
10111 
10112   if (IfCond) {
10113     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10114   } else {
10115     RegionCodeGenTy RCG(EndThenGen);
10116     RCG(CGF);
10117   }
10118 }
10119 
10120 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10121     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10122     const Expr *Device) {
10123   if (!CGF.HaveInsertPoint())
10124     return;
10125 
10126   assert((isa<OMPTargetEnterDataDirective>(D) ||
10127           isa<OMPTargetExitDataDirective>(D) ||
10128           isa<OMPTargetUpdateDirective>(D)) &&
10129          "Expecting either target enter, exit data, or update directives.");
10130 
10131   CodeGenFunction::OMPTargetDataInfo InputInfo;
10132   llvm::Value *MapTypesArray = nullptr;
10133   // Generate the code for the opening of the data environment.
10134   auto &&ThenGen = [this, &D, Device, &InputInfo,
10135                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10136     // Emit device ID if any.
10137     llvm::Value *DeviceID = nullptr;
10138     if (Device) {
10139       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10140                                            CGF.Int64Ty, /*isSigned=*/true);
10141     } else {
10142       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10143     }
10144 
10145     // Emit the number of elements in the offloading arrays.
10146     llvm::Constant *PointerNum =
10147         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10148 
10149     llvm::Value *OffloadingArgs[] = {DeviceID,
10150                                      PointerNum,
10151                                      InputInfo.BasePointersArray.getPointer(),
10152                                      InputInfo.PointersArray.getPointer(),
10153                                      InputInfo.SizesArray.getPointer(),
10154                                      MapTypesArray};
10155 
10156     // Select the right runtime function call for each expected standalone
10157     // directive.
10158     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10159     OpenMPRTLFunction RTLFn;
10160     switch (D.getDirectiveKind()) {
10161     case OMPD_target_enter_data:
10162       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10163                         : OMPRTL__tgt_target_data_begin;
10164       break;
10165     case OMPD_target_exit_data:
10166       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10167                         : OMPRTL__tgt_target_data_end;
10168       break;
10169     case OMPD_target_update:
10170       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10171                         : OMPRTL__tgt_target_data_update;
10172       break;
10173     case OMPD_parallel:
10174     case OMPD_for:
10175     case OMPD_parallel_for:
10176     case OMPD_parallel_sections:
10177     case OMPD_for_simd:
10178     case OMPD_parallel_for_simd:
10179     case OMPD_cancel:
10180     case OMPD_cancellation_point:
10181     case OMPD_ordered:
10182     case OMPD_threadprivate:
10183     case OMPD_allocate:
10184     case OMPD_task:
10185     case OMPD_simd:
10186     case OMPD_sections:
10187     case OMPD_section:
10188     case OMPD_single:
10189     case OMPD_master:
10190     case OMPD_critical:
10191     case OMPD_taskyield:
10192     case OMPD_barrier:
10193     case OMPD_taskwait:
10194     case OMPD_taskgroup:
10195     case OMPD_atomic:
10196     case OMPD_flush:
10197     case OMPD_teams:
10198     case OMPD_target_data:
10199     case OMPD_distribute:
10200     case OMPD_distribute_simd:
10201     case OMPD_distribute_parallel_for:
10202     case OMPD_distribute_parallel_for_simd:
10203     case OMPD_teams_distribute:
10204     case OMPD_teams_distribute_simd:
10205     case OMPD_teams_distribute_parallel_for:
10206     case OMPD_teams_distribute_parallel_for_simd:
10207     case OMPD_declare_simd:
10208     case OMPD_declare_target:
10209     case OMPD_end_declare_target:
10210     case OMPD_declare_reduction:
10211     case OMPD_declare_mapper:
10212     case OMPD_taskloop:
10213     case OMPD_taskloop_simd:
10214     case OMPD_target:
10215     case OMPD_target_simd:
10216     case OMPD_target_teams_distribute:
10217     case OMPD_target_teams_distribute_simd:
10218     case OMPD_target_teams_distribute_parallel_for:
10219     case OMPD_target_teams_distribute_parallel_for_simd:
10220     case OMPD_target_teams:
10221     case OMPD_target_parallel:
10222     case OMPD_target_parallel_for:
10223     case OMPD_target_parallel_for_simd:
10224     case OMPD_requires:
10225     case OMPD_unknown:
10226       llvm_unreachable("Unexpected standalone target data directive.");
10227       break;
10228     }
10229     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10230   };
10231 
10232   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10233                              CodeGenFunction &CGF, PrePostActionTy &) {
10234     // Fill up the arrays with all the mapped variables.
10235     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10236     MappableExprsHandler::MapValuesArrayTy Pointers;
10237     MappableExprsHandler::MapValuesArrayTy Sizes;
10238     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10239 
10240     // Get map clause information.
10241     MappableExprsHandler MEHandler(D, CGF);
10242     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10243 
10244     TargetDataInfo Info;
10245     // Fill up the arrays and create the arguments.
10246     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10247     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10248                                  Info.PointersArray, Info.SizesArray,
10249                                  Info.MapTypesArray, Info);
10250     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10251     InputInfo.BasePointersArray =
10252         Address(Info.BasePointersArray, CGM.getPointerAlign());
10253     InputInfo.PointersArray =
10254         Address(Info.PointersArray, CGM.getPointerAlign());
10255     InputInfo.SizesArray =
10256         Address(Info.SizesArray, CGM.getPointerAlign());
10257     MapTypesArray = Info.MapTypesArray;
10258     if (D.hasClausesOfKind<OMPDependClause>())
10259       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10260     else
10261       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10262   };
10263 
10264   if (IfCond) {
10265     emitOMPIfClause(CGF, IfCond, TargetThenGen,
10266                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
10267   } else {
10268     RegionCodeGenTy ThenRCG(TargetThenGen);
10269     ThenRCG(CGF);
10270   }
10271 }
10272 
10273 namespace {
10274   /// Kind of parameter in a function with 'declare simd' directive.
10275   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10276   /// Attribute set of the parameter.
10277   struct ParamAttrTy {
10278     ParamKindTy Kind = Vector;
10279     llvm::APSInt StrideOrArg;
10280     llvm::APSInt Alignment;
10281   };
10282 } // namespace
10283 
10284 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10285                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10286   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10287   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10288   // of that clause. The VLEN value must be power of 2.
10289   // In other case the notion of the function`s "characteristic data type" (CDT)
10290   // is used to compute the vector length.
10291   // CDT is defined in the following order:
10292   //   a) For non-void function, the CDT is the return type.
10293   //   b) If the function has any non-uniform, non-linear parameters, then the
10294   //   CDT is the type of the first such parameter.
10295   //   c) If the CDT determined by a) or b) above is struct, union, or class
10296   //   type which is pass-by-value (except for the type that maps to the
10297   //   built-in complex data type), the characteristic data type is int.
10298   //   d) If none of the above three cases is applicable, the CDT is int.
10299   // The VLEN is then determined based on the CDT and the size of vector
10300   // register of that ISA for which current vector version is generated. The
10301   // VLEN is computed using the formula below:
10302   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10303   // where vector register size specified in section 3.2.1 Registers and the
10304   // Stack Frame of original AMD64 ABI document.
10305   QualType RetType = FD->getReturnType();
10306   if (RetType.isNull())
10307     return 0;
10308   ASTContext &C = FD->getASTContext();
10309   QualType CDT;
10310   if (!RetType.isNull() && !RetType->isVoidType()) {
10311     CDT = RetType;
10312   } else {
10313     unsigned Offset = 0;
10314     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10315       if (ParamAttrs[Offset].Kind == Vector)
10316         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10317       ++Offset;
10318     }
10319     if (CDT.isNull()) {
10320       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10321         if (ParamAttrs[I + Offset].Kind == Vector) {
10322           CDT = FD->getParamDecl(I)->getType();
10323           break;
10324         }
10325       }
10326     }
10327   }
10328   if (CDT.isNull())
10329     CDT = C.IntTy;
10330   CDT = CDT->getCanonicalTypeUnqualified();
10331   if (CDT->isRecordType() || CDT->isUnionType())
10332     CDT = C.IntTy;
10333   return C.getTypeSize(CDT);
10334 }
10335 
10336 static void
10337 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10338                            const llvm::APSInt &VLENVal,
10339                            ArrayRef<ParamAttrTy> ParamAttrs,
10340                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10341   struct ISADataTy {
10342     char ISA;
10343     unsigned VecRegSize;
10344   };
10345   ISADataTy ISAData[] = {
10346       {
10347           'b', 128
10348       }, // SSE
10349       {
10350           'c', 256
10351       }, // AVX
10352       {
10353           'd', 256
10354       }, // AVX2
10355       {
10356           'e', 512
10357       }, // AVX512
10358   };
10359   llvm::SmallVector<char, 2> Masked;
10360   switch (State) {
10361   case OMPDeclareSimdDeclAttr::BS_Undefined:
10362     Masked.push_back('N');
10363     Masked.push_back('M');
10364     break;
10365   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10366     Masked.push_back('N');
10367     break;
10368   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10369     Masked.push_back('M');
10370     break;
10371   }
10372   for (char Mask : Masked) {
10373     for (const ISADataTy &Data : ISAData) {
10374       SmallString<256> Buffer;
10375       llvm::raw_svector_ostream Out(Buffer);
10376       Out << "_ZGV" << Data.ISA << Mask;
10377       if (!VLENVal) {
10378         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10379         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10380         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10381       } else {
10382         Out << VLENVal;
10383       }
10384       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10385         switch (ParamAttr.Kind){
10386         case LinearWithVarStride:
10387           Out << 's' << ParamAttr.StrideOrArg;
10388           break;
10389         case Linear:
10390           Out << 'l';
10391           if (!!ParamAttr.StrideOrArg)
10392             Out << ParamAttr.StrideOrArg;
10393           break;
10394         case Uniform:
10395           Out << 'u';
10396           break;
10397         case Vector:
10398           Out << 'v';
10399           break;
10400         }
10401         if (!!ParamAttr.Alignment)
10402           Out << 'a' << ParamAttr.Alignment;
10403       }
10404       Out << '_' << Fn->getName();
10405       Fn->addFnAttr(Out.str());
10406     }
10407   }
10408 }
10409 
10410 // This are the Functions that are needed to mangle the name of the
10411 // vector functions generated by the compiler, according to the rules
10412 // defined in the "Vector Function ABI specifications for AArch64",
10413 // available at
10414 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10415 
10416 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10417 ///
10418 /// TODO: Need to implement the behavior for reference marked with a
10419 /// var or no linear modifiers (1.b in the section). For this, we
10420 /// need to extend ParamKindTy to support the linear modifiers.
10421 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10422   QT = QT.getCanonicalType();
10423 
10424   if (QT->isVoidType())
10425     return false;
10426 
10427   if (Kind == ParamKindTy::Uniform)
10428     return false;
10429 
10430   if (Kind == ParamKindTy::Linear)
10431     return false;
10432 
10433   // TODO: Handle linear references with modifiers
10434 
10435   if (Kind == ParamKindTy::LinearWithVarStride)
10436     return false;
10437 
10438   return true;
10439 }
10440 
10441 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10442 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10443   QT = QT.getCanonicalType();
10444   unsigned Size = C.getTypeSize(QT);
10445 
10446   // Only scalars and complex within 16 bytes wide set PVB to true.
10447   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10448     return false;
10449 
10450   if (QT->isFloatingType())
10451     return true;
10452 
10453   if (QT->isIntegerType())
10454     return true;
10455 
10456   if (QT->isPointerType())
10457     return true;
10458 
10459   // TODO: Add support for complex types (section 3.1.2, item 2).
10460 
10461   return false;
10462 }
10463 
10464 /// Computes the lane size (LS) of a return type or of an input parameter,
10465 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10466 /// TODO: Add support for references, section 3.2.1, item 1.
10467 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10468   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10469     QualType PTy = QT.getCanonicalType()->getPointeeType();
10470     if (getAArch64PBV(PTy, C))
10471       return C.getTypeSize(PTy);
10472   }
10473   if (getAArch64PBV(QT, C))
10474     return C.getTypeSize(QT);
10475 
10476   return C.getTypeSize(C.getUIntPtrType());
10477 }
10478 
10479 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10480 // signature of the scalar function, as defined in 3.2.2 of the
10481 // AAVFABI.
10482 static std::tuple<unsigned, unsigned, bool>
10483 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10484   QualType RetType = FD->getReturnType().getCanonicalType();
10485 
10486   ASTContext &C = FD->getASTContext();
10487 
10488   bool OutputBecomesInput = false;
10489 
10490   llvm::SmallVector<unsigned, 8> Sizes;
10491   if (!RetType->isVoidType()) {
10492     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10493     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10494       OutputBecomesInput = true;
10495   }
10496   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10497     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10498     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10499   }
10500 
10501   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10502   // The LS of a function parameter / return value can only be a power
10503   // of 2, starting from 8 bits, up to 128.
10504   assert(std::all_of(Sizes.begin(), Sizes.end(),
10505                      [](unsigned Size) {
10506                        return Size == 8 || Size == 16 || Size == 32 ||
10507                               Size == 64 || Size == 128;
10508                      }) &&
10509          "Invalid size");
10510 
10511   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10512                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10513                          OutputBecomesInput);
10514 }
10515 
10516 /// Mangle the parameter part of the vector function name according to
10517 /// their OpenMP classification. The mangling function is defined in
10518 /// section 3.5 of the AAVFABI.
10519 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10520   SmallString<256> Buffer;
10521   llvm::raw_svector_ostream Out(Buffer);
10522   for (const auto &ParamAttr : ParamAttrs) {
10523     switch (ParamAttr.Kind) {
10524     case LinearWithVarStride:
10525       Out << "ls" << ParamAttr.StrideOrArg;
10526       break;
10527     case Linear:
10528       Out << 'l';
10529       // Don't print the step value if it is not present or if it is
10530       // equal to 1.
10531       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10532         Out << ParamAttr.StrideOrArg;
10533       break;
10534     case Uniform:
10535       Out << 'u';
10536       break;
10537     case Vector:
10538       Out << 'v';
10539       break;
10540     }
10541 
10542     if (!!ParamAttr.Alignment)
10543       Out << 'a' << ParamAttr.Alignment;
10544   }
10545 
10546   return Out.str();
10547 }
10548 
10549 // Function used to add the attribute. The parameter `VLEN` is
10550 // templated to allow the use of "x" when targeting scalable functions
10551 // for SVE.
10552 template <typename T>
10553 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10554                                  char ISA, StringRef ParSeq,
10555                                  StringRef MangledName, bool OutputBecomesInput,
10556                                  llvm::Function *Fn) {
10557   SmallString<256> Buffer;
10558   llvm::raw_svector_ostream Out(Buffer);
10559   Out << Prefix << ISA << LMask << VLEN;
10560   if (OutputBecomesInput)
10561     Out << "v";
10562   Out << ParSeq << "_" << MangledName;
10563   Fn->addFnAttr(Out.str());
10564 }
10565 
10566 // Helper function to generate the Advanced SIMD names depending on
10567 // the value of the NDS when simdlen is not present.
10568 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10569                                       StringRef Prefix, char ISA,
10570                                       StringRef ParSeq, StringRef MangledName,
10571                                       bool OutputBecomesInput,
10572                                       llvm::Function *Fn) {
10573   switch (NDS) {
10574   case 8:
10575     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10576                          OutputBecomesInput, Fn);
10577     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10578                          OutputBecomesInput, Fn);
10579     break;
10580   case 16:
10581     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10582                          OutputBecomesInput, Fn);
10583     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10584                          OutputBecomesInput, Fn);
10585     break;
10586   case 32:
10587     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10588                          OutputBecomesInput, Fn);
10589     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10590                          OutputBecomesInput, Fn);
10591     break;
10592   case 64:
10593   case 128:
10594     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10595                          OutputBecomesInput, Fn);
10596     break;
10597   default:
10598     llvm_unreachable("Scalar type is too wide.");
10599   }
10600 }
10601 
10602 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10603 static void emitAArch64DeclareSimdFunction(
10604     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10605     ArrayRef<ParamAttrTy> ParamAttrs,
10606     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10607     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10608 
10609   // Get basic data for building the vector signature.
10610   const auto Data = getNDSWDS(FD, ParamAttrs);
10611   const unsigned NDS = std::get<0>(Data);
10612   const unsigned WDS = std::get<1>(Data);
10613   const bool OutputBecomesInput = std::get<2>(Data);
10614 
10615   // Check the values provided via `simdlen` by the user.
10616   // 1. A `simdlen(1)` doesn't produce vector signatures,
10617   if (UserVLEN == 1) {
10618     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10619         DiagnosticsEngine::Warning,
10620         "The clause simdlen(1) has no effect when targeting aarch64.");
10621     CGM.getDiags().Report(SLoc, DiagID);
10622     return;
10623   }
10624 
10625   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10626   // Advanced SIMD output.
10627   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10628     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10629         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10630                                     "power of 2 when targeting Advanced SIMD.");
10631     CGM.getDiags().Report(SLoc, DiagID);
10632     return;
10633   }
10634 
10635   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10636   // limits.
10637   if (ISA == 's' && UserVLEN != 0) {
10638     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10639       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10640           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10641                                       "lanes in the architectural constraints "
10642                                       "for SVE (min is 128-bit, max is "
10643                                       "2048-bit, by steps of 128-bit)");
10644       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10645       return;
10646     }
10647   }
10648 
10649   // Sort out parameter sequence.
10650   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10651   StringRef Prefix = "_ZGV";
10652   // Generate simdlen from user input (if any).
10653   if (UserVLEN) {
10654     if (ISA == 's') {
10655       // SVE generates only a masked function.
10656       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10657                            OutputBecomesInput, Fn);
10658     } else {
10659       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10660       // Advanced SIMD generates one or two functions, depending on
10661       // the `[not]inbranch` clause.
10662       switch (State) {
10663       case OMPDeclareSimdDeclAttr::BS_Undefined:
10664         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10665                              OutputBecomesInput, Fn);
10666         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10667                              OutputBecomesInput, Fn);
10668         break;
10669       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10670         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10671                              OutputBecomesInput, Fn);
10672         break;
10673       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10674         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10675                              OutputBecomesInput, Fn);
10676         break;
10677       }
10678     }
10679   } else {
10680     // If no user simdlen is provided, follow the AAVFABI rules for
10681     // generating the vector length.
10682     if (ISA == 's') {
10683       // SVE, section 3.4.1, item 1.
10684       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10685                            OutputBecomesInput, Fn);
10686     } else {
10687       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10688       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10689       // two vector names depending on the use of the clause
10690       // `[not]inbranch`.
10691       switch (State) {
10692       case OMPDeclareSimdDeclAttr::BS_Undefined:
10693         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10694                                   OutputBecomesInput, Fn);
10695         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10696                                   OutputBecomesInput, Fn);
10697         break;
10698       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10699         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10700                                   OutputBecomesInput, Fn);
10701         break;
10702       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10703         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10704                                   OutputBecomesInput, Fn);
10705         break;
10706       }
10707     }
10708   }
10709 }
10710 
10711 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10712                                               llvm::Function *Fn) {
10713   ASTContext &C = CGM.getContext();
10714   FD = FD->getMostRecentDecl();
10715   // Map params to their positions in function decl.
10716   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10717   if (isa<CXXMethodDecl>(FD))
10718     ParamPositions.try_emplace(FD, 0);
10719   unsigned ParamPos = ParamPositions.size();
10720   for (const ParmVarDecl *P : FD->parameters()) {
10721     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10722     ++ParamPos;
10723   }
10724   while (FD) {
10725     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10726       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10727       // Mark uniform parameters.
10728       for (const Expr *E : Attr->uniforms()) {
10729         E = E->IgnoreParenImpCasts();
10730         unsigned Pos;
10731         if (isa<CXXThisExpr>(E)) {
10732           Pos = ParamPositions[FD];
10733         } else {
10734           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10735                                 ->getCanonicalDecl();
10736           Pos = ParamPositions[PVD];
10737         }
10738         ParamAttrs[Pos].Kind = Uniform;
10739       }
10740       // Get alignment info.
10741       auto NI = Attr->alignments_begin();
10742       for (const Expr *E : Attr->aligneds()) {
10743         E = E->IgnoreParenImpCasts();
10744         unsigned Pos;
10745         QualType ParmTy;
10746         if (isa<CXXThisExpr>(E)) {
10747           Pos = ParamPositions[FD];
10748           ParmTy = E->getType();
10749         } else {
10750           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10751                                 ->getCanonicalDecl();
10752           Pos = ParamPositions[PVD];
10753           ParmTy = PVD->getType();
10754         }
10755         ParamAttrs[Pos].Alignment =
10756             (*NI)
10757                 ? (*NI)->EvaluateKnownConstInt(C)
10758                 : llvm::APSInt::getUnsigned(
10759                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10760                           .getQuantity());
10761         ++NI;
10762       }
10763       // Mark linear parameters.
10764       auto SI = Attr->steps_begin();
10765       auto MI = Attr->modifiers_begin();
10766       for (const Expr *E : Attr->linears()) {
10767         E = E->IgnoreParenImpCasts();
10768         unsigned Pos;
10769         if (isa<CXXThisExpr>(E)) {
10770           Pos = ParamPositions[FD];
10771         } else {
10772           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10773                                 ->getCanonicalDecl();
10774           Pos = ParamPositions[PVD];
10775         }
10776         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10777         ParamAttr.Kind = Linear;
10778         if (*SI) {
10779           Expr::EvalResult Result;
10780           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10781             if (const auto *DRE =
10782                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10783               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10784                 ParamAttr.Kind = LinearWithVarStride;
10785                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10786                     ParamPositions[StridePVD->getCanonicalDecl()]);
10787               }
10788             }
10789           } else {
10790             ParamAttr.StrideOrArg = Result.Val.getInt();
10791           }
10792         }
10793         ++SI;
10794         ++MI;
10795       }
10796       llvm::APSInt VLENVal;
10797       SourceLocation ExprLoc;
10798       const Expr *VLENExpr = Attr->getSimdlen();
10799       if (VLENExpr) {
10800         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10801         ExprLoc = VLENExpr->getExprLoc();
10802       }
10803       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10804       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10805           CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10806         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10807       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10808         unsigned VLEN = VLENVal.getExtValue();
10809         StringRef MangledName = Fn->getName();
10810         if (CGM.getTarget().hasFeature("sve"))
10811           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10812                                          MangledName, 's', 128, Fn, ExprLoc);
10813         if (CGM.getTarget().hasFeature("neon"))
10814           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10815                                          MangledName, 'n', 128, Fn, ExprLoc);
10816       }
10817     }
10818     FD = FD->getPreviousDecl();
10819   }
10820 }
10821 
10822 namespace {
10823 /// Cleanup action for doacross support.
10824 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10825 public:
10826   static const int DoacrossFinArgs = 2;
10827 
10828 private:
10829   llvm::FunctionCallee RTLFn;
10830   llvm::Value *Args[DoacrossFinArgs];
10831 
10832 public:
10833   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10834                     ArrayRef<llvm::Value *> CallArgs)
10835       : RTLFn(RTLFn) {
10836     assert(CallArgs.size() == DoacrossFinArgs);
10837     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10838   }
10839   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10840     if (!CGF.HaveInsertPoint())
10841       return;
10842     CGF.EmitRuntimeCall(RTLFn, Args);
10843   }
10844 };
10845 } // namespace
10846 
10847 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10848                                        const OMPLoopDirective &D,
10849                                        ArrayRef<Expr *> NumIterations) {
10850   if (!CGF.HaveInsertPoint())
10851     return;
10852 
10853   ASTContext &C = CGM.getContext();
10854   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10855   RecordDecl *RD;
10856   if (KmpDimTy.isNull()) {
10857     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10858     //  kmp_int64 lo; // lower
10859     //  kmp_int64 up; // upper
10860     //  kmp_int64 st; // stride
10861     // };
10862     RD = C.buildImplicitRecord("kmp_dim");
10863     RD->startDefinition();
10864     addFieldToRecordDecl(C, RD, Int64Ty);
10865     addFieldToRecordDecl(C, RD, Int64Ty);
10866     addFieldToRecordDecl(C, RD, Int64Ty);
10867     RD->completeDefinition();
10868     KmpDimTy = C.getRecordType(RD);
10869   } else {
10870     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10871   }
10872   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10873   QualType ArrayTy =
10874       C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
10875 
10876   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10877   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10878   enum { LowerFD = 0, UpperFD, StrideFD };
10879   // Fill dims with data.
10880   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10881     LValue DimsLVal = CGF.MakeAddrLValue(
10882         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10883     // dims.upper = num_iterations;
10884     LValue UpperLVal = CGF.EmitLValueForField(
10885         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10886     llvm::Value *NumIterVal =
10887         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10888                                  D.getNumIterations()->getType(), Int64Ty,
10889                                  D.getNumIterations()->getExprLoc());
10890     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10891     // dims.stride = 1;
10892     LValue StrideLVal = CGF.EmitLValueForField(
10893         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10894     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10895                           StrideLVal);
10896   }
10897 
10898   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10899   // kmp_int32 num_dims, struct kmp_dim * dims);
10900   llvm::Value *Args[] = {
10901       emitUpdateLocation(CGF, D.getBeginLoc()),
10902       getThreadID(CGF, D.getBeginLoc()),
10903       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10904       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10905           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10906           CGM.VoidPtrTy)};
10907 
10908   llvm::FunctionCallee RTLFn =
10909       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10910   CGF.EmitRuntimeCall(RTLFn, Args);
10911   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10912       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10913   llvm::FunctionCallee FiniRTLFn =
10914       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10915   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10916                                              llvm::makeArrayRef(FiniArgs));
10917 }
10918 
10919 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10920                                           const OMPDependClause *C) {
10921   QualType Int64Ty =
10922       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10923   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10924   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10925       Int64Ty, Size, ArrayType::Normal, 0);
10926   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10927   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10928     const Expr *CounterVal = C->getLoopData(I);
10929     assert(CounterVal);
10930     llvm::Value *CntVal = CGF.EmitScalarConversion(
10931         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10932         CounterVal->getExprLoc());
10933     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10934                           /*Volatile=*/false, Int64Ty);
10935   }
10936   llvm::Value *Args[] = {
10937       emitUpdateLocation(CGF, C->getBeginLoc()),
10938       getThreadID(CGF, C->getBeginLoc()),
10939       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10940   llvm::FunctionCallee RTLFn;
10941   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10942     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10943   } else {
10944     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10945     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10946   }
10947   CGF.EmitRuntimeCall(RTLFn, Args);
10948 }
10949 
10950 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10951                                llvm::FunctionCallee Callee,
10952                                ArrayRef<llvm::Value *> Args) const {
10953   assert(Loc.isValid() && "Outlined function call location must be valid.");
10954   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10955 
10956   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10957     if (Fn->doesNotThrow()) {
10958       CGF.EmitNounwindRuntimeCall(Fn, Args);
10959       return;
10960     }
10961   }
10962   CGF.EmitRuntimeCall(Callee, Args);
10963 }
10964 
10965 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10966     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10967     ArrayRef<llvm::Value *> Args) const {
10968   emitCall(CGF, Loc, OutlinedFn, Args);
10969 }
10970 
10971 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10972   if (const auto *FD = dyn_cast<FunctionDecl>(D))
10973     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10974       HasEmittedDeclareTargetRegion = true;
10975 }
10976 
10977 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10978                                              const VarDecl *NativeParam,
10979                                              const VarDecl *TargetParam) const {
10980   return CGF.GetAddrOfLocalVar(NativeParam);
10981 }
10982 
10983 namespace {
10984 /// Cleanup action for allocate support.
10985 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10986 public:
10987   static const int CleanupArgs = 3;
10988 
10989 private:
10990   llvm::FunctionCallee RTLFn;
10991   llvm::Value *Args[CleanupArgs];
10992 
10993 public:
10994   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10995                        ArrayRef<llvm::Value *> CallArgs)
10996       : RTLFn(RTLFn) {
10997     assert(CallArgs.size() == CleanupArgs &&
10998            "Size of arguments does not match.");
10999     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11000   }
11001   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11002     if (!CGF.HaveInsertPoint())
11003       return;
11004     CGF.EmitRuntimeCall(RTLFn, Args);
11005   }
11006 };
11007 } // namespace
11008 
11009 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11010                                                    const VarDecl *VD) {
11011   if (!VD)
11012     return Address::invalid();
11013   const VarDecl *CVD = VD->getCanonicalDecl();
11014   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11015     return Address::invalid();
11016   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11017   // Use the default allocation.
11018   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
11019       !AA->getAllocator())
11020     return Address::invalid();
11021   llvm::Value *Size;
11022   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11023   if (CVD->getType()->isVariablyModifiedType()) {
11024     Size = CGF.getTypeSize(CVD->getType());
11025     // Align the size: ((size + align - 1) / align) * align
11026     Size = CGF.Builder.CreateNUWAdd(
11027         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11028     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11029     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11030   } else {
11031     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11032     Size = CGM.getSize(Sz.alignTo(Align));
11033   }
11034   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11035   assert(AA->getAllocator() &&
11036          "Expected allocator expression for non-default allocator.");
11037   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11038   // According to the standard, the original allocator type is a enum (integer).
11039   // Convert to pointer type, if required.
11040   if (Allocator->getType()->isIntegerTy())
11041     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11042   else if (Allocator->getType()->isPointerTy())
11043     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11044                                                                 CGM.VoidPtrTy);
11045   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11046 
11047   llvm::Value *Addr =
11048       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11049                           CVD->getName() + ".void.addr");
11050   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11051                                                               Allocator};
11052   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11053 
11054   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11055                                                 llvm::makeArrayRef(FiniArgs));
11056   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11057       Addr,
11058       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11059       CVD->getName() + ".addr");
11060   return Address(Addr, Align);
11061 }
11062 
11063 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11064     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11065     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11066   llvm_unreachable("Not supported in SIMD-only mode");
11067 }
11068 
11069 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11070     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11071     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11072   llvm_unreachable("Not supported in SIMD-only mode");
11073 }
11074 
11075 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11076     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11077     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11078     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11079     bool Tied, unsigned &NumberOfParts) {
11080   llvm_unreachable("Not supported in SIMD-only mode");
11081 }
11082 
11083 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11084                                            SourceLocation Loc,
11085                                            llvm::Function *OutlinedFn,
11086                                            ArrayRef<llvm::Value *> CapturedVars,
11087                                            const Expr *IfCond) {
11088   llvm_unreachable("Not supported in SIMD-only mode");
11089 }
11090 
11091 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11092     CodeGenFunction &CGF, StringRef CriticalName,
11093     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11094     const Expr *Hint) {
11095   llvm_unreachable("Not supported in SIMD-only mode");
11096 }
11097 
11098 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11099                                            const RegionCodeGenTy &MasterOpGen,
11100                                            SourceLocation Loc) {
11101   llvm_unreachable("Not supported in SIMD-only mode");
11102 }
11103 
11104 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11105                                             SourceLocation Loc) {
11106   llvm_unreachable("Not supported in SIMD-only mode");
11107 }
11108 
11109 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11110     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11111     SourceLocation Loc) {
11112   llvm_unreachable("Not supported in SIMD-only mode");
11113 }
11114 
11115 void CGOpenMPSIMDRuntime::emitSingleRegion(
11116     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11117     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11118     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11119     ArrayRef<const Expr *> AssignmentOps) {
11120   llvm_unreachable("Not supported in SIMD-only mode");
11121 }
11122 
11123 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11124                                             const RegionCodeGenTy &OrderedOpGen,
11125                                             SourceLocation Loc,
11126                                             bool IsThreads) {
11127   llvm_unreachable("Not supported in SIMD-only mode");
11128 }
11129 
11130 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11131                                           SourceLocation Loc,
11132                                           OpenMPDirectiveKind Kind,
11133                                           bool EmitChecks,
11134                                           bool ForceSimpleCall) {
11135   llvm_unreachable("Not supported in SIMD-only mode");
11136 }
11137 
11138 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11139     CodeGenFunction &CGF, SourceLocation Loc,
11140     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11141     bool Ordered, const DispatchRTInput &DispatchValues) {
11142   llvm_unreachable("Not supported in SIMD-only mode");
11143 }
11144 
11145 void CGOpenMPSIMDRuntime::emitForStaticInit(
11146     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11147     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11148   llvm_unreachable("Not supported in SIMD-only mode");
11149 }
11150 
11151 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11152     CodeGenFunction &CGF, SourceLocation Loc,
11153     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11154   llvm_unreachable("Not supported in SIMD-only mode");
11155 }
11156 
11157 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11158                                                      SourceLocation Loc,
11159                                                      unsigned IVSize,
11160                                                      bool IVSigned) {
11161   llvm_unreachable("Not supported in SIMD-only mode");
11162 }
11163 
11164 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11165                                               SourceLocation Loc,
11166                                               OpenMPDirectiveKind DKind) {
11167   llvm_unreachable("Not supported in SIMD-only mode");
11168 }
11169 
11170 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11171                                               SourceLocation Loc,
11172                                               unsigned IVSize, bool IVSigned,
11173                                               Address IL, Address LB,
11174                                               Address UB, Address ST) {
11175   llvm_unreachable("Not supported in SIMD-only mode");
11176 }
11177 
11178 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11179                                                llvm::Value *NumThreads,
11180                                                SourceLocation Loc) {
11181   llvm_unreachable("Not supported in SIMD-only mode");
11182 }
11183 
11184 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11185                                              OpenMPProcBindClauseKind ProcBind,
11186                                              SourceLocation Loc) {
11187   llvm_unreachable("Not supported in SIMD-only mode");
11188 }
11189 
11190 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11191                                                     const VarDecl *VD,
11192                                                     Address VDAddr,
11193                                                     SourceLocation Loc) {
11194   llvm_unreachable("Not supported in SIMD-only mode");
11195 }
11196 
11197 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11198     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11199     CodeGenFunction *CGF) {
11200   llvm_unreachable("Not supported in SIMD-only mode");
11201 }
11202 
11203 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11204     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11205   llvm_unreachable("Not supported in SIMD-only mode");
11206 }
11207 
11208 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11209                                     ArrayRef<const Expr *> Vars,
11210                                     SourceLocation Loc) {
11211   llvm_unreachable("Not supported in SIMD-only mode");
11212 }
11213 
11214 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11215                                        const OMPExecutableDirective &D,
11216                                        llvm::Function *TaskFunction,
11217                                        QualType SharedsTy, Address Shareds,
11218                                        const Expr *IfCond,
11219                                        const OMPTaskDataTy &Data) {
11220   llvm_unreachable("Not supported in SIMD-only mode");
11221 }
11222 
11223 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11224     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11225     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11226     const Expr *IfCond, const OMPTaskDataTy &Data) {
11227   llvm_unreachable("Not supported in SIMD-only mode");
11228 }
11229 
11230 void CGOpenMPSIMDRuntime::emitReduction(
11231     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11232     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11233     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11234   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11235   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11236                                  ReductionOps, Options);
11237 }
11238 
11239 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11240     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11241     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11242   llvm_unreachable("Not supported in SIMD-only mode");
11243 }
11244 
11245 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11246                                                   SourceLocation Loc,
11247                                                   ReductionCodeGen &RCG,
11248                                                   unsigned N) {
11249   llvm_unreachable("Not supported in SIMD-only mode");
11250 }
11251 
11252 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11253                                                   SourceLocation Loc,
11254                                                   llvm::Value *ReductionsPtr,
11255                                                   LValue SharedLVal) {
11256   llvm_unreachable("Not supported in SIMD-only mode");
11257 }
11258 
11259 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11260                                            SourceLocation Loc) {
11261   llvm_unreachable("Not supported in SIMD-only mode");
11262 }
11263 
11264 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11265     CodeGenFunction &CGF, SourceLocation Loc,
11266     OpenMPDirectiveKind CancelRegion) {
11267   llvm_unreachable("Not supported in SIMD-only mode");
11268 }
11269 
11270 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11271                                          SourceLocation Loc, const Expr *IfCond,
11272                                          OpenMPDirectiveKind CancelRegion) {
11273   llvm_unreachable("Not supported in SIMD-only mode");
11274 }
11275 
11276 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11277     const OMPExecutableDirective &D, StringRef ParentName,
11278     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11279     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11280   llvm_unreachable("Not supported in SIMD-only mode");
11281 }
11282 
11283 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
11284                                          const OMPExecutableDirective &D,
11285                                          llvm::Function *OutlinedFn,
11286                                          llvm::Value *OutlinedFnID,
11287                                          const Expr *IfCond,
11288                                          const Expr *Device) {
11289   llvm_unreachable("Not supported in SIMD-only mode");
11290 }
11291 
11292 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11293   llvm_unreachable("Not supported in SIMD-only mode");
11294 }
11295 
11296 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11297   llvm_unreachable("Not supported in SIMD-only mode");
11298 }
11299 
11300 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11301   return false;
11302 }
11303 
11304 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
11305   return nullptr;
11306 }
11307 
11308 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11309                                         const OMPExecutableDirective &D,
11310                                         SourceLocation Loc,
11311                                         llvm::Function *OutlinedFn,
11312                                         ArrayRef<llvm::Value *> CapturedVars) {
11313   llvm_unreachable("Not supported in SIMD-only mode");
11314 }
11315 
11316 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11317                                              const Expr *NumTeams,
11318                                              const Expr *ThreadLimit,
11319                                              SourceLocation Loc) {
11320   llvm_unreachable("Not supported in SIMD-only mode");
11321 }
11322 
11323 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11324     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11325     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11326   llvm_unreachable("Not supported in SIMD-only mode");
11327 }
11328 
11329 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11330     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11331     const Expr *Device) {
11332   llvm_unreachable("Not supported in SIMD-only mode");
11333 }
11334 
11335 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11336                                            const OMPLoopDirective &D,
11337                                            ArrayRef<Expr *> NumIterations) {
11338   llvm_unreachable("Not supported in SIMD-only mode");
11339 }
11340 
11341 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11342                                               const OMPDependClause *C) {
11343   llvm_unreachable("Not supported in SIMD-only mode");
11344 }
11345 
11346 const VarDecl *
11347 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
11348                                         const VarDecl *NativeParam) const {
11349   llvm_unreachable("Not supported in SIMD-only mode");
11350 }
11351 
11352 Address
11353 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
11354                                          const VarDecl *NativeParam,
11355                                          const VarDecl *TargetParam) const {
11356   llvm_unreachable("Not supported in SIMD-only mode");
11357 }
11358