1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/Basic/BitmaskEnum.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38   /// Kinds of OpenMP regions used in codegen.
39   enum CGOpenMPRegionKind {
40     /// Region with outlined function for standalone 'parallel'
41     /// directive.
42     ParallelOutlinedRegion,
43     /// Region with outlined function for standalone 'task' directive.
44     TaskOutlinedRegion,
45     /// Region for constructs that do not require function outlining,
46     /// like 'for', 'sections', 'atomic' etc. directives.
47     InlinedRegion,
48     /// Region with outlined function for standalone 'target' directive.
49     TargetRegion,
50   };
51 
52   CGOpenMPRegionInfo(const CapturedStmt &CS,
53                      const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63         Kind(Kind), HasCancel(HasCancel) {}
64 
65   /// Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69   /// Emit the captured statement body.
70   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72   /// Get an LValue for the current ThreadID variable.
73   /// \return LValue for thread id variable. This LValue always has type int32*.
74   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82   bool hasCancel() const { return HasCancel; }
83 
84   static bool classof(const CGCapturedStmtInfo *Info) {
85     return Info->getKind() == CR_OpenMP;
86   }
87 
88   ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91   CGOpenMPRegionKind RegionKind;
92   RegionCodeGenTy CodeGen;
93   OpenMPDirectiveKind Kind;
94   bool HasCancel;
95 };
96 
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101                              const RegionCodeGenTy &CodeGen,
102                              OpenMPDirectiveKind Kind, bool HasCancel,
103                              StringRef HelperName)
104       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105                            HasCancel),
106         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108   }
109 
110   /// Get a variable or parameter for storing global thread id
111   /// inside OpenMP construct.
112   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114   /// Get the name of the capture helper.
115   StringRef getHelperName() const override { return HelperName; }
116 
117   static bool classof(const CGCapturedStmtInfo *Info) {
118     return CGOpenMPRegionInfo::classof(Info) &&
119            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120                ParallelOutlinedRegion;
121   }
122 
123 private:
124   /// A variable or parameter storing global thread id for OpenMP
125   /// constructs.
126   const VarDecl *ThreadIDVar;
127   StringRef HelperName;
128 };
129 
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133   class UntiedTaskActionTy final : public PrePostActionTy {
134     bool Untied;
135     const VarDecl *PartIDVar;
136     const RegionCodeGenTy UntiedCodeGen;
137     llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139   public:
140     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141                        const RegionCodeGenTy &UntiedCodeGen)
142         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143     void Enter(CodeGenFunction &CGF) override {
144       if (Untied) {
145         // Emit task switching point.
146         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147             CGF.GetAddrOfLocalVar(PartIDVar),
148             PartIDVar->getType()->castAs<PointerType>());
149         llvm::Value *Res =
150             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190 
191   /// Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195   /// Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198   /// Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210 
211 private:
212   /// A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218 
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230   // Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236 
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244 
245   /// Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253 
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259 
260   /// Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267 
268   /// Get an LValue for the current ThreadID variable.
269   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272     llvm_unreachable("No LValue for inlined OpenMP construct");
273   }
274 
275   /// Get the name of the capture helper.
276   StringRef getHelperName() const override {
277     if (auto *OuterRegionInfo = getOldCSI())
278       return OuterRegionInfo->getHelperName();
279     llvm_unreachable("No helper name for inlined OpenMP construct");
280   }
281 
282   void emitUntiedSwitch(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       OuterRegionInfo->emitUntiedSwitch(CGF);
285   }
286 
287   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289   static bool classof(const CGCapturedStmtInfo *Info) {
290     return CGOpenMPRegionInfo::classof(Info) &&
291            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292   }
293 
294   ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297   /// CodeGen info about outer OpenMP region.
298   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299   CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
311       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312                            /*HasCancel=*/false),
313         HelperName(HelperName) {}
314 
315   /// This is unused for target regions because each starts executing
316   /// with a single thread.
317   const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319   /// Get the name of the capture helper.
320   StringRef getHelperName() const override { return HelperName; }
321 
322   static bool classof(const CGCapturedStmtInfo *Info) {
323     return CGOpenMPRegionInfo::classof(Info) &&
324            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325   }
326 
327 private:
328   StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332   llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340                                   OMPD_unknown,
341                                   /*HasCancel=*/false),
342         PrivScope(CGF) {
343     // Make sure the globals captured in the provided statement are local by
344     // using the privatization logic. We assume the same variable is not
345     // captured more than once.
346     for (const auto &C : CS.captures()) {
347       if (!C.capturesVariable() && !C.capturesVariableByCopy())
348         continue;
349 
350       const VarDecl *VD = C.getCapturedVar();
351       if (VD->isLocalVarDeclOrParm())
352         continue;
353 
354       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355                       /*RefersToEnclosingVariableOrCapture=*/false,
356                       VD->getType().getNonReferenceType(), VK_LValue,
357                       C.getLocation());
358       PrivScope.addPrivate(
359           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360     }
361     (void)PrivScope.Privatize();
362   }
363 
364   /// Lookup the captured field decl for a variable.
365   const FieldDecl *lookup(const VarDecl *VD) const override {
366     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367       return FD;
368     return nullptr;
369   }
370 
371   /// Emit the captured statement body.
372   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373     llvm_unreachable("No body for expressions");
374   }
375 
376   /// Get a variable or parameter for storing global thread id
377   /// inside OpenMP construct.
378   const VarDecl *getThreadIDVariable() const override {
379     llvm_unreachable("No thread id for expressions");
380   }
381 
382   /// Get the name of the capture helper.
383   StringRef getHelperName() const override {
384     llvm_unreachable("No helper name for expressions");
385   }
386 
387   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388 
389 private:
390   /// Private scope to capture global variables.
391   CodeGenFunction::OMPPrivateScope PrivScope;
392 };
393 
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396   CodeGenFunction &CGF;
397   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398   FieldDecl *LambdaThisCaptureField = nullptr;
399   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400 
401 public:
402   /// Constructs region for combined constructs.
403   /// \param CodeGen Code generation sequence for combined directives. Includes
404   /// a list of functions used for code generation of implicitly inlined
405   /// regions.
406   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407                           OpenMPDirectiveKind Kind, bool HasCancel)
408       : CGF(CGF) {
409     // Start emission for the construct.
410     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414     CGF.LambdaThisCaptureField = nullptr;
415     BlockInfo = CGF.BlockInfo;
416     CGF.BlockInfo = nullptr;
417   }
418 
419   ~InlinedOpenMPRegionRAII() {
420     // Restore original CapturedStmtInfo only if we're done with code emission.
421     auto *OldCSI =
422         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423     delete CGF.CapturedStmtInfo;
424     CGF.CapturedStmtInfo = OldCSI;
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427     CGF.BlockInfo = BlockInfo;
428   }
429 };
430 
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435   /// Use trampoline for internal microtask.
436   OMP_IDENT_IMD = 0x01,
437   /// Use c-style ident structure.
438   OMP_IDENT_KMPC = 0x02,
439   /// Atomic reduction option for kmpc_reduce.
440   OMP_ATOMIC_REDUCE = 0x10,
441   /// Explicit 'barrier' directive.
442   OMP_IDENT_BARRIER_EXPL = 0x20,
443   /// Implicit barrier in code.
444   OMP_IDENT_BARRIER_IMPL = 0x40,
445   /// Implicit barrier in 'for' directive.
446   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447   /// Implicit barrier in 'sections' directive.
448   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449   /// Implicit barrier in 'single' directive.
450   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451   /// Call of __kmp_for_static_init for static loop.
452   OMP_IDENT_WORK_LOOP = 0x200,
453   /// Call of __kmp_for_static_init for sections.
454   OMP_IDENT_WORK_SECTIONS = 0x400,
455   /// Call of __kmp_for_static_init for distribute.
456   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459 
460 /// Describes ident structure that describes a source location.
461 /// All descriptions are taken from
462 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
463 /// Original structure:
464 /// typedef struct ident {
465 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
466 ///                                  see above  */
467 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
468 ///                                  KMP_IDENT_KMPC identifies this union
469 ///                                  member  */
470 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
471 ///                                  see above */
472 ///#if USE_ITT_BUILD
473 ///                            /*  but currently used for storing
474 ///                                region-specific ITT */
475 ///                            /*  contextual information. */
476 ///#endif /* USE_ITT_BUILD */
477 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
478 ///                                 C++  */
479 ///    char const *psource;    /**< String describing the source location.
480 ///                            The string is composed of semi-colon separated
481 //                             fields which describe the source file,
482 ///                            the function and a pair of line numbers that
483 ///                            delimit the construct.
484 ///                             */
485 /// } ident_t;
486 enum IdentFieldIndex {
487   /// might be used in Fortran
488   IdentField_Reserved_1,
489   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
490   IdentField_Flags,
491   /// Not really used in Fortran any more
492   IdentField_Reserved_2,
493   /// Source[4] in Fortran, do not use for C++
494   IdentField_Reserved_3,
495   /// String describing the source location. The string is composed of
496   /// semi-colon separated fields which describe the source file, the function
497   /// and a pair of line numbers that delimit the construct.
498   IdentField_PSource
499 };
500 
501 /// Schedule types for 'omp for' loops (these enumerators are taken from
502 /// the enum sched_type in kmp.h).
503 enum OpenMPSchedType {
504   /// Lower bound for default (unordered) versions.
505   OMP_sch_lower = 32,
506   OMP_sch_static_chunked = 33,
507   OMP_sch_static = 34,
508   OMP_sch_dynamic_chunked = 35,
509   OMP_sch_guided_chunked = 36,
510   OMP_sch_runtime = 37,
511   OMP_sch_auto = 38,
512   /// static with chunk adjustment (e.g., simd)
513   OMP_sch_static_balanced_chunked = 45,
514   /// Lower bound for 'ordered' versions.
515   OMP_ord_lower = 64,
516   OMP_ord_static_chunked = 65,
517   OMP_ord_static = 66,
518   OMP_ord_dynamic_chunked = 67,
519   OMP_ord_guided_chunked = 68,
520   OMP_ord_runtime = 69,
521   OMP_ord_auto = 70,
522   OMP_sch_default = OMP_sch_static,
523   /// dist_schedule types
524   OMP_dist_sch_static_chunked = 91,
525   OMP_dist_sch_static = 92,
526   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
527   /// Set if the monotonic schedule modifier was present.
528   OMP_sch_modifier_monotonic = (1 << 29),
529   /// Set if the nonmonotonic schedule modifier was present.
530   OMP_sch_modifier_nonmonotonic = (1 << 30),
531 };
532 
533 enum OpenMPRTLFunction {
534   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
535   /// kmpc_micro microtask, ...);
536   OMPRTL__kmpc_fork_call,
537   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
538   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
539   OMPRTL__kmpc_threadprivate_cached,
540   /// Call to void __kmpc_threadprivate_register( ident_t *,
541   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
542   OMPRTL__kmpc_threadprivate_register,
543   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
544   OMPRTL__kmpc_global_thread_num,
545   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
546   // kmp_critical_name *crit);
547   OMPRTL__kmpc_critical,
548   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
549   // global_tid, kmp_critical_name *crit, uintptr_t hint);
550   OMPRTL__kmpc_critical_with_hint,
551   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
552   // kmp_critical_name *crit);
553   OMPRTL__kmpc_end_critical,
554   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
555   // global_tid);
556   OMPRTL__kmpc_cancel_barrier,
557   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
558   OMPRTL__kmpc_barrier,
559   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
560   OMPRTL__kmpc_for_static_fini,
561   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
562   // global_tid);
563   OMPRTL__kmpc_serialized_parallel,
564   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
565   // global_tid);
566   OMPRTL__kmpc_end_serialized_parallel,
567   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
568   // kmp_int32 num_threads);
569   OMPRTL__kmpc_push_num_threads,
570   // Call to void __kmpc_flush(ident_t *loc);
571   OMPRTL__kmpc_flush,
572   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
573   OMPRTL__kmpc_master,
574   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
575   OMPRTL__kmpc_end_master,
576   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
577   // int end_part);
578   OMPRTL__kmpc_omp_taskyield,
579   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
580   OMPRTL__kmpc_single,
581   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
582   OMPRTL__kmpc_end_single,
583   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
584   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
585   // kmp_routine_entry_t *task_entry);
586   OMPRTL__kmpc_omp_task_alloc,
587   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
588   // new_task);
589   OMPRTL__kmpc_omp_task,
590   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
591   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
592   // kmp_int32 didit);
593   OMPRTL__kmpc_copyprivate,
594   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
595   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
596   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
597   OMPRTL__kmpc_reduce,
598   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
599   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
600   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
601   // *lck);
602   OMPRTL__kmpc_reduce_nowait,
603   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
604   // kmp_critical_name *lck);
605   OMPRTL__kmpc_end_reduce,
606   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
607   // kmp_critical_name *lck);
608   OMPRTL__kmpc_end_reduce_nowait,
609   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
610   // kmp_task_t * new_task);
611   OMPRTL__kmpc_omp_task_begin_if0,
612   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
613   // kmp_task_t * new_task);
614   OMPRTL__kmpc_omp_task_complete_if0,
615   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
616   OMPRTL__kmpc_ordered,
617   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
618   OMPRTL__kmpc_end_ordered,
619   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
620   // global_tid);
621   OMPRTL__kmpc_omp_taskwait,
622   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
623   OMPRTL__kmpc_taskgroup,
624   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
625   OMPRTL__kmpc_end_taskgroup,
626   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
627   // int proc_bind);
628   OMPRTL__kmpc_push_proc_bind,
629   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
630   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
631   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
632   OMPRTL__kmpc_omp_task_with_deps,
633   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
634   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
635   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
636   OMPRTL__kmpc_omp_wait_deps,
637   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
638   // global_tid, kmp_int32 cncl_kind);
639   OMPRTL__kmpc_cancellationpoint,
640   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
641   // kmp_int32 cncl_kind);
642   OMPRTL__kmpc_cancel,
643   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
644   // kmp_int32 num_teams, kmp_int32 thread_limit);
645   OMPRTL__kmpc_push_num_teams,
646   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
647   // microtask, ...);
648   OMPRTL__kmpc_fork_teams,
649   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
650   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
651   // sched, kmp_uint64 grainsize, void *task_dup);
652   OMPRTL__kmpc_taskloop,
653   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
654   // num_dims, struct kmp_dim *dims);
655   OMPRTL__kmpc_doacross_init,
656   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
657   OMPRTL__kmpc_doacross_fini,
658   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
659   // *vec);
660   OMPRTL__kmpc_doacross_post,
661   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
662   // *vec);
663   OMPRTL__kmpc_doacross_wait,
664   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
665   // *data);
666   OMPRTL__kmpc_task_reduction_init,
667   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
668   // *d);
669   OMPRTL__kmpc_task_reduction_get_th_data,
670   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
671   OMPRTL__kmpc_alloc,
672   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
673   OMPRTL__kmpc_free,
674 
675   //
676   // Offloading related calls
677   //
678   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
679   // size);
680   OMPRTL__kmpc_push_target_tripcount,
681   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
682   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
683   // *arg_types);
684   OMPRTL__tgt_target,
685   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
686   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
687   // *arg_types);
688   OMPRTL__tgt_target_nowait,
689   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
690   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
691   // *arg_types, int32_t num_teams, int32_t thread_limit);
692   OMPRTL__tgt_target_teams,
693   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
694   // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
695   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
696   OMPRTL__tgt_target_teams_nowait,
697   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
698   OMPRTL__tgt_register_lib,
699   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
700   OMPRTL__tgt_unregister_lib,
701   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
702   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
703   OMPRTL__tgt_target_data_begin,
704   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
705   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
706   // *arg_types);
707   OMPRTL__tgt_target_data_begin_nowait,
708   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
709   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
710   OMPRTL__tgt_target_data_end,
711   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
712   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
713   // *arg_types);
714   OMPRTL__tgt_target_data_end_nowait,
715   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
716   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
717   OMPRTL__tgt_target_data_update,
718   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
719   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
720   // *arg_types);
721   OMPRTL__tgt_target_data_update_nowait,
722 };
723 
724 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
725 /// region.
726 class CleanupTy final : public EHScopeStack::Cleanup {
727   PrePostActionTy *Action;
728 
729 public:
730   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
731   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
732     if (!CGF.HaveInsertPoint())
733       return;
734     Action->Exit(CGF);
735   }
736 };
737 
738 } // anonymous namespace
739 
740 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
741   CodeGenFunction::RunCleanupsScope Scope(CGF);
742   if (PrePostAction) {
743     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
744     Callback(CodeGen, CGF, *PrePostAction);
745   } else {
746     PrePostActionTy Action;
747     Callback(CodeGen, CGF, Action);
748   }
749 }
750 
751 /// Check if the combiner is a call to UDR combiner and if it is so return the
752 /// UDR decl used for reduction.
753 static const OMPDeclareReductionDecl *
754 getReductionInit(const Expr *ReductionOp) {
755   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
756     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
757       if (const auto *DRE =
758               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
759         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
760           return DRD;
761   return nullptr;
762 }
763 
764 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
765                                              const OMPDeclareReductionDecl *DRD,
766                                              const Expr *InitOp,
767                                              Address Private, Address Original,
768                                              QualType Ty) {
769   if (DRD->getInitializer()) {
770     std::pair<llvm::Function *, llvm::Function *> Reduction =
771         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
772     const auto *CE = cast<CallExpr>(InitOp);
773     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
774     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
775     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
776     const auto *LHSDRE =
777         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
778     const auto *RHSDRE =
779         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
780     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
781     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
782                             [=]() { return Private; });
783     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
784                             [=]() { return Original; });
785     (void)PrivateScope.Privatize();
786     RValue Func = RValue::get(Reduction.second);
787     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
788     CGF.EmitIgnoredExpr(InitOp);
789   } else {
790     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
791     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
792     auto *GV = new llvm::GlobalVariable(
793         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
794         llvm::GlobalValue::PrivateLinkage, Init, Name);
795     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
796     RValue InitRVal;
797     switch (CGF.getEvaluationKind(Ty)) {
798     case TEK_Scalar:
799       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
800       break;
801     case TEK_Complex:
802       InitRVal =
803           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
804       break;
805     case TEK_Aggregate:
806       InitRVal = RValue::getAggregate(LV.getAddress());
807       break;
808     }
809     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
810     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
811     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
812                          /*IsInitializer=*/false);
813   }
814 }
815 
816 /// Emit initialization of arrays of complex types.
817 /// \param DestAddr Address of the array.
818 /// \param Type Type of array.
819 /// \param Init Initial expression of array.
820 /// \param SrcAddr Address of the original array.
821 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
822                                  QualType Type, bool EmitDeclareReductionInit,
823                                  const Expr *Init,
824                                  const OMPDeclareReductionDecl *DRD,
825                                  Address SrcAddr = Address::invalid()) {
826   // Perform element-by-element initialization.
827   QualType ElementTy;
828 
829   // Drill down to the base element type on both arrays.
830   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
831   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
832   DestAddr =
833       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
834   if (DRD)
835     SrcAddr =
836         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
837 
838   llvm::Value *SrcBegin = nullptr;
839   if (DRD)
840     SrcBegin = SrcAddr.getPointer();
841   llvm::Value *DestBegin = DestAddr.getPointer();
842   // Cast from pointer to array type to pointer to single element.
843   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
844   // The basic structure here is a while-do loop.
845   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
846   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
847   llvm::Value *IsEmpty =
848       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
849   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
850 
851   // Enter the loop body, making that address the current address.
852   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
853   CGF.EmitBlock(BodyBB);
854 
855   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
856 
857   llvm::PHINode *SrcElementPHI = nullptr;
858   Address SrcElementCurrent = Address::invalid();
859   if (DRD) {
860     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
861                                           "omp.arraycpy.srcElementPast");
862     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
863     SrcElementCurrent =
864         Address(SrcElementPHI,
865                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
866   }
867   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
868       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
869   DestElementPHI->addIncoming(DestBegin, EntryBB);
870   Address DestElementCurrent =
871       Address(DestElementPHI,
872               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
873 
874   // Emit copy.
875   {
876     CodeGenFunction::RunCleanupsScope InitScope(CGF);
877     if (EmitDeclareReductionInit) {
878       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
879                                        SrcElementCurrent, ElementTy);
880     } else
881       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
882                            /*IsInitializer=*/false);
883   }
884 
885   if (DRD) {
886     // Shift the address forward by one element.
887     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
888         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
889     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
890   }
891 
892   // Shift the address forward by one element.
893   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
894       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
895   // Check whether we've reached the end.
896   llvm::Value *Done =
897       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
898   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
899   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
900 
901   // Done.
902   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
903 }
904 
905 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
906   return CGF.EmitOMPSharedLValue(E);
907 }
908 
909 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
910                                             const Expr *E) {
911   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
912     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
913   return LValue();
914 }
915 
916 void ReductionCodeGen::emitAggregateInitialization(
917     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
918     const OMPDeclareReductionDecl *DRD) {
919   // Emit VarDecl with copy init for arrays.
920   // Get the address of the original variable captured in current
921   // captured region.
922   const auto *PrivateVD =
923       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
924   bool EmitDeclareReductionInit =
925       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
926   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
927                        EmitDeclareReductionInit,
928                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
929                                                 : PrivateVD->getInit(),
930                        DRD, SharedLVal.getAddress());
931 }
932 
933 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
934                                    ArrayRef<const Expr *> Privates,
935                                    ArrayRef<const Expr *> ReductionOps) {
936   ClausesData.reserve(Shareds.size());
937   SharedAddresses.reserve(Shareds.size());
938   Sizes.reserve(Shareds.size());
939   BaseDecls.reserve(Shareds.size());
940   auto IPriv = Privates.begin();
941   auto IRed = ReductionOps.begin();
942   for (const Expr *Ref : Shareds) {
943     ClausesData.emplace_back(Ref, *IPriv, *IRed);
944     std::advance(IPriv, 1);
945     std::advance(IRed, 1);
946   }
947 }
948 
949 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
950   assert(SharedAddresses.size() == N &&
951          "Number of generated lvalues must be exactly N.");
952   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
953   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
954   SharedAddresses.emplace_back(First, Second);
955 }
956 
957 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
958   const auto *PrivateVD =
959       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
960   QualType PrivateType = PrivateVD->getType();
961   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
962   if (!PrivateType->isVariablyModifiedType()) {
963     Sizes.emplace_back(
964         CGF.getTypeSize(
965             SharedAddresses[N].first.getType().getNonReferenceType()),
966         nullptr);
967     return;
968   }
969   llvm::Value *Size;
970   llvm::Value *SizeInChars;
971   auto *ElemType =
972       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
973           ->getElementType();
974   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
975   if (AsArraySection) {
976     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
977                                      SharedAddresses[N].first.getPointer());
978     Size = CGF.Builder.CreateNUWAdd(
979         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
980     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
981   } else {
982     SizeInChars = CGF.getTypeSize(
983         SharedAddresses[N].first.getType().getNonReferenceType());
984     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
985   }
986   Sizes.emplace_back(SizeInChars, Size);
987   CodeGenFunction::OpaqueValueMapping OpaqueMap(
988       CGF,
989       cast<OpaqueValueExpr>(
990           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
991       RValue::get(Size));
992   CGF.EmitVariablyModifiedType(PrivateType);
993 }
994 
995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
996                                          llvm::Value *Size) {
997   const auto *PrivateVD =
998       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
999   QualType PrivateType = PrivateVD->getType();
1000   if (!PrivateType->isVariablyModifiedType()) {
1001     assert(!Size && !Sizes[N].second &&
1002            "Size should be nullptr for non-variably modified reduction "
1003            "items.");
1004     return;
1005   }
1006   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1007       CGF,
1008       cast<OpaqueValueExpr>(
1009           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1010       RValue::get(Size));
1011   CGF.EmitVariablyModifiedType(PrivateType);
1012 }
1013 
1014 void ReductionCodeGen::emitInitialization(
1015     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1016     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1017   assert(SharedAddresses.size() > N && "No variable was generated");
1018   const auto *PrivateVD =
1019       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1020   const OMPDeclareReductionDecl *DRD =
1021       getReductionInit(ClausesData[N].ReductionOp);
1022   QualType PrivateType = PrivateVD->getType();
1023   PrivateAddr = CGF.Builder.CreateElementBitCast(
1024       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1025   QualType SharedType = SharedAddresses[N].first.getType();
1026   SharedLVal = CGF.MakeAddrLValue(
1027       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1028                                        CGF.ConvertTypeForMem(SharedType)),
1029       SharedType, SharedAddresses[N].first.getBaseInfo(),
1030       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1031   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1032     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1033   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1034     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1035                                      PrivateAddr, SharedLVal.getAddress(),
1036                                      SharedLVal.getType());
1037   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1038              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1039     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1040                          PrivateVD->getType().getQualifiers(),
1041                          /*IsInitializer=*/false);
1042   }
1043 }
1044 
1045 bool ReductionCodeGen::needCleanups(unsigned N) {
1046   const auto *PrivateVD =
1047       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1048   QualType PrivateType = PrivateVD->getType();
1049   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1050   return DTorKind != QualType::DK_none;
1051 }
1052 
1053 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1054                                     Address PrivateAddr) {
1055   const auto *PrivateVD =
1056       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1057   QualType PrivateType = PrivateVD->getType();
1058   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1059   if (needCleanups(N)) {
1060     PrivateAddr = CGF.Builder.CreateElementBitCast(
1061         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1062     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1063   }
1064 }
1065 
1066 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1067                           LValue BaseLV) {
1068   BaseTy = BaseTy.getNonReferenceType();
1069   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1070          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1071     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1072       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1073     } else {
1074       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1075       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1076     }
1077     BaseTy = BaseTy->getPointeeType();
1078   }
1079   return CGF.MakeAddrLValue(
1080       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1081                                        CGF.ConvertTypeForMem(ElTy)),
1082       BaseLV.getType(), BaseLV.getBaseInfo(),
1083       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1084 }
1085 
1086 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1087                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1088                           llvm::Value *Addr) {
1089   Address Tmp = Address::invalid();
1090   Address TopTmp = Address::invalid();
1091   Address MostTopTmp = Address::invalid();
1092   BaseTy = BaseTy.getNonReferenceType();
1093   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1094          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1095     Tmp = CGF.CreateMemTemp(BaseTy);
1096     if (TopTmp.isValid())
1097       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1098     else
1099       MostTopTmp = Tmp;
1100     TopTmp = Tmp;
1101     BaseTy = BaseTy->getPointeeType();
1102   }
1103   llvm::Type *Ty = BaseLVType;
1104   if (Tmp.isValid())
1105     Ty = Tmp.getElementType();
1106   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1107   if (Tmp.isValid()) {
1108     CGF.Builder.CreateStore(Addr, Tmp);
1109     return MostTopTmp;
1110   }
1111   return Address(Addr, BaseLVAlignment);
1112 }
1113 
1114 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1115   const VarDecl *OrigVD = nullptr;
1116   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1117     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1118     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1119       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1120     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1121       Base = TempASE->getBase()->IgnoreParenImpCasts();
1122     DE = cast<DeclRefExpr>(Base);
1123     OrigVD = cast<VarDecl>(DE->getDecl());
1124   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1125     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1126     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1127       Base = TempASE->getBase()->IgnoreParenImpCasts();
1128     DE = cast<DeclRefExpr>(Base);
1129     OrigVD = cast<VarDecl>(DE->getDecl());
1130   }
1131   return OrigVD;
1132 }
1133 
1134 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1135                                                Address PrivateAddr) {
1136   const DeclRefExpr *DE;
1137   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1138     BaseDecls.emplace_back(OrigVD);
1139     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1140     LValue BaseLValue =
1141         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1142                     OriginalBaseLValue);
1143     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1144         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1145     llvm::Value *PrivatePointer =
1146         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1147             PrivateAddr.getPointer(),
1148             SharedAddresses[N].first.getAddress().getType());
1149     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1150     return castToBase(CGF, OrigVD->getType(),
1151                       SharedAddresses[N].first.getType(),
1152                       OriginalBaseLValue.getAddress().getType(),
1153                       OriginalBaseLValue.getAlignment(), Ptr);
1154   }
1155   BaseDecls.emplace_back(
1156       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1157   return PrivateAddr;
1158 }
1159 
1160 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1161   const OMPDeclareReductionDecl *DRD =
1162       getReductionInit(ClausesData[N].ReductionOp);
1163   return DRD && DRD->getInitializer();
1164 }
1165 
1166 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1167   return CGF.EmitLoadOfPointerLValue(
1168       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1169       getThreadIDVariable()->getType()->castAs<PointerType>());
1170 }
1171 
1172 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1173   if (!CGF.HaveInsertPoint())
1174     return;
1175   // 1.2.2 OpenMP Language Terminology
1176   // Structured block - An executable statement with a single entry at the
1177   // top and a single exit at the bottom.
1178   // The point of exit cannot be a branch out of the structured block.
1179   // longjmp() and throw() must not violate the entry/exit criteria.
1180   CGF.EHStack.pushTerminate();
1181   CodeGen(CGF);
1182   CGF.EHStack.popTerminate();
1183 }
1184 
1185 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1186     CodeGenFunction &CGF) {
1187   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1188                             getThreadIDVariable()->getType(),
1189                             AlignmentSource::Decl);
1190 }
1191 
1192 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1193                                        QualType FieldTy) {
1194   auto *Field = FieldDecl::Create(
1195       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1196       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1197       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1198   Field->setAccess(AS_public);
1199   DC->addDecl(Field);
1200   return Field;
1201 }
1202 
1203 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1204                                  StringRef Separator)
1205     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1206       OffloadEntriesInfoManager(CGM) {
1207   ASTContext &C = CGM.getContext();
1208   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1209   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1210   RD->startDefinition();
1211   // reserved_1
1212   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1213   // flags
1214   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1215   // reserved_2
1216   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1217   // reserved_3
1218   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1219   // psource
1220   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1221   RD->completeDefinition();
1222   IdentQTy = C.getRecordType(RD);
1223   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1224   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1225 
1226   loadOffloadInfoMetadata();
1227 }
1228 
1229 void CGOpenMPRuntime::clear() {
1230   InternalVars.clear();
1231   // Clean non-target variable declarations possibly used only in debug info.
1232   for (const auto &Data : EmittedNonTargetVariables) {
1233     if (!Data.getValue().pointsToAliveValue())
1234       continue;
1235     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1236     if (!GV)
1237       continue;
1238     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1239       continue;
1240     GV->eraseFromParent();
1241   }
1242 }
1243 
1244 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1245   SmallString<128> Buffer;
1246   llvm::raw_svector_ostream OS(Buffer);
1247   StringRef Sep = FirstSeparator;
1248   for (StringRef Part : Parts) {
1249     OS << Sep << Part;
1250     Sep = Separator;
1251   }
1252   return OS.str();
1253 }
1254 
1255 static llvm::Function *
1256 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1257                           const Expr *CombinerInitializer, const VarDecl *In,
1258                           const VarDecl *Out, bool IsCombiner) {
1259   // void .omp_combiner.(Ty *in, Ty *out);
1260   ASTContext &C = CGM.getContext();
1261   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1262   FunctionArgList Args;
1263   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1264                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1265   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1266                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1267   Args.push_back(&OmpOutParm);
1268   Args.push_back(&OmpInParm);
1269   const CGFunctionInfo &FnInfo =
1270       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1271   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1272   std::string Name = CGM.getOpenMPRuntime().getName(
1273       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1274   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1275                                     Name, &CGM.getModule());
1276   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1277   Fn->removeFnAttr(llvm::Attribute::NoInline);
1278   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1279   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1280   CodeGenFunction CGF(CGM);
1281   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1282   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1283   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1284                     Out->getLocation());
1285   CodeGenFunction::OMPPrivateScope Scope(CGF);
1286   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1287   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1288     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1289         .getAddress();
1290   });
1291   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1292   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1293     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1294         .getAddress();
1295   });
1296   (void)Scope.Privatize();
1297   if (!IsCombiner && Out->hasInit() &&
1298       !CGF.isTrivialInitializer(Out->getInit())) {
1299     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1300                          Out->getType().getQualifiers(),
1301                          /*IsInitializer=*/true);
1302   }
1303   if (CombinerInitializer)
1304     CGF.EmitIgnoredExpr(CombinerInitializer);
1305   Scope.ForceCleanup();
1306   CGF.FinishFunction();
1307   return Fn;
1308 }
1309 
1310 void CGOpenMPRuntime::emitUserDefinedReduction(
1311     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1312   if (UDRMap.count(D) > 0)
1313     return;
1314   llvm::Function *Combiner = emitCombinerOrInitializer(
1315       CGM, D->getType(), D->getCombiner(),
1316       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1317       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1318       /*IsCombiner=*/true);
1319   llvm::Function *Initializer = nullptr;
1320   if (const Expr *Init = D->getInitializer()) {
1321     Initializer = emitCombinerOrInitializer(
1322         CGM, D->getType(),
1323         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1324                                                                      : nullptr,
1325         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1326         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1327         /*IsCombiner=*/false);
1328   }
1329   UDRMap.try_emplace(D, Combiner, Initializer);
1330   if (CGF) {
1331     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1332     Decls.second.push_back(D);
1333   }
1334 }
1335 
1336 std::pair<llvm::Function *, llvm::Function *>
1337 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1338   auto I = UDRMap.find(D);
1339   if (I != UDRMap.end())
1340     return I->second;
1341   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1342   return UDRMap.lookup(D);
1343 }
1344 
1345 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1346     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1347     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1348     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1349   assert(ThreadIDVar->getType()->isPointerType() &&
1350          "thread id variable must be of type kmp_int32 *");
1351   CodeGenFunction CGF(CGM, true);
1352   bool HasCancel = false;
1353   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1354     HasCancel = OPD->hasCancel();
1355   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1356     HasCancel = OPSD->hasCancel();
1357   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1358     HasCancel = OPFD->hasCancel();
1359   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1360     HasCancel = OPFD->hasCancel();
1361   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1362     HasCancel = OPFD->hasCancel();
1363   else if (const auto *OPFD =
1364                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1365     HasCancel = OPFD->hasCancel();
1366   else if (const auto *OPFD =
1367                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1368     HasCancel = OPFD->hasCancel();
1369   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1370                                     HasCancel, OutlinedHelperName);
1371   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1372   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1373 }
1374 
1375 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1376     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1377     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1378   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1379   return emitParallelOrTeamsOutlinedFunction(
1380       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1381 }
1382 
1383 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1384     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1385     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1386   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1387   return emitParallelOrTeamsOutlinedFunction(
1388       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1389 }
1390 
1391 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1392     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1393     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1394     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1395     bool Tied, unsigned &NumberOfParts) {
1396   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1397                                               PrePostActionTy &) {
1398     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1399     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1400     llvm::Value *TaskArgs[] = {
1401         UpLoc, ThreadID,
1402         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1403                                     TaskTVar->getType()->castAs<PointerType>())
1404             .getPointer()};
1405     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1406   };
1407   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1408                                                             UntiedCodeGen);
1409   CodeGen.setAction(Action);
1410   assert(!ThreadIDVar->getType()->isPointerType() &&
1411          "thread id variable must be of type kmp_int32 for tasks");
1412   const OpenMPDirectiveKind Region =
1413       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1414                                                       : OMPD_task;
1415   const CapturedStmt *CS = D.getCapturedStmt(Region);
1416   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1417   CodeGenFunction CGF(CGM, true);
1418   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1419                                         InnermostKind,
1420                                         TD ? TD->hasCancel() : false, Action);
1421   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1422   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1423   if (!Tied)
1424     NumberOfParts = Action.getNumberOfParts();
1425   return Res;
1426 }
1427 
1428 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1429                              const RecordDecl *RD, const CGRecordLayout &RL,
1430                              ArrayRef<llvm::Constant *> Data) {
1431   llvm::StructType *StructTy = RL.getLLVMType();
1432   unsigned PrevIdx = 0;
1433   ConstantInitBuilder CIBuilder(CGM);
1434   auto DI = Data.begin();
1435   for (const FieldDecl *FD : RD->fields()) {
1436     unsigned Idx = RL.getLLVMFieldNo(FD);
1437     // Fill the alignment.
1438     for (unsigned I = PrevIdx; I < Idx; ++I)
1439       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1440     PrevIdx = Idx + 1;
1441     Fields.add(*DI);
1442     ++DI;
1443   }
1444 }
1445 
1446 template <class... As>
1447 static llvm::GlobalVariable *
1448 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1449                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1450                    As &&... Args) {
1451   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1452   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1453   ConstantInitBuilder CIBuilder(CGM);
1454   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1455   buildStructValue(Fields, CGM, RD, RL, Data);
1456   return Fields.finishAndCreateGlobal(
1457       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1458       std::forward<As>(Args)...);
1459 }
1460 
1461 template <typename T>
1462 static void
1463 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1464                                          ArrayRef<llvm::Constant *> Data,
1465                                          T &Parent) {
1466   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1467   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1468   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1469   buildStructValue(Fields, CGM, RD, RL, Data);
1470   Fields.finishAndAddTo(Parent);
1471 }
1472 
1473 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1474   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1475   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1476   FlagsTy FlagsKey(Flags, Reserved2Flags);
1477   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1478   if (!Entry) {
1479     if (!DefaultOpenMPPSource) {
1480       // Initialize default location for psource field of ident_t structure of
1481       // all ident_t objects. Format is ";file;function;line;column;;".
1482       // Taken from
1483       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1484       DefaultOpenMPPSource =
1485           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1486       DefaultOpenMPPSource =
1487           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1488     }
1489 
1490     llvm::Constant *Data[] = {
1491         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1492         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1493         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1494         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1495     llvm::GlobalValue *DefaultOpenMPLocation =
1496         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1497                            llvm::GlobalValue::PrivateLinkage);
1498     DefaultOpenMPLocation->setUnnamedAddr(
1499         llvm::GlobalValue::UnnamedAddr::Global);
1500 
1501     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1502   }
1503   return Address(Entry, Align);
1504 }
1505 
1506 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1507                                              bool AtCurrentPoint) {
1508   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1509   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1510 
1511   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1512   if (AtCurrentPoint) {
1513     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1514         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1515   } else {
1516     Elem.second.ServiceInsertPt =
1517         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1518     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1519   }
1520 }
1521 
1522 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1523   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1524   if (Elem.second.ServiceInsertPt) {
1525     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1526     Elem.second.ServiceInsertPt = nullptr;
1527     Ptr->eraseFromParent();
1528   }
1529 }
1530 
1531 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1532                                                  SourceLocation Loc,
1533                                                  unsigned Flags) {
1534   Flags |= OMP_IDENT_KMPC;
1535   // If no debug info is generated - return global default location.
1536   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1537       Loc.isInvalid())
1538     return getOrCreateDefaultLocation(Flags).getPointer();
1539 
1540   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1541 
1542   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1543   Address LocValue = Address::invalid();
1544   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1545   if (I != OpenMPLocThreadIDMap.end())
1546     LocValue = Address(I->second.DebugLoc, Align);
1547 
1548   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1549   // GetOpenMPThreadID was called before this routine.
1550   if (!LocValue.isValid()) {
1551     // Generate "ident_t .kmpc_loc.addr;"
1552     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1553     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1554     Elem.second.DebugLoc = AI.getPointer();
1555     LocValue = AI;
1556 
1557     if (!Elem.second.ServiceInsertPt)
1558       setLocThreadIdInsertPt(CGF);
1559     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1560     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1561     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1562                              CGF.getTypeSize(IdentQTy));
1563   }
1564 
1565   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1566   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1567   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1568   LValue PSource =
1569       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1570 
1571   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1572   if (OMPDebugLoc == nullptr) {
1573     SmallString<128> Buffer2;
1574     llvm::raw_svector_ostream OS2(Buffer2);
1575     // Build debug location
1576     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1577     OS2 << ";" << PLoc.getFilename() << ";";
1578     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1579       OS2 << FD->getQualifiedNameAsString();
1580     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1581     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1582     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1583   }
1584   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1585   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1586 
1587   // Our callers always pass this to a runtime function, so for
1588   // convenience, go ahead and return a naked pointer.
1589   return LocValue.getPointer();
1590 }
1591 
1592 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1593                                           SourceLocation Loc) {
1594   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1595 
1596   llvm::Value *ThreadID = nullptr;
1597   // Check whether we've already cached a load of the thread id in this
1598   // function.
1599   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1600   if (I != OpenMPLocThreadIDMap.end()) {
1601     ThreadID = I->second.ThreadID;
1602     if (ThreadID != nullptr)
1603       return ThreadID;
1604   }
1605   // If exceptions are enabled, do not use parameter to avoid possible crash.
1606   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1607       !CGF.getLangOpts().CXXExceptions ||
1608       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1609     if (auto *OMPRegionInfo =
1610             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1611       if (OMPRegionInfo->getThreadIDVariable()) {
1612         // Check if this an outlined function with thread id passed as argument.
1613         LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1614         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1615         // If value loaded in entry block, cache it and use it everywhere in
1616         // function.
1617         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1618           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1619           Elem.second.ThreadID = ThreadID;
1620         }
1621         return ThreadID;
1622       }
1623     }
1624   }
1625 
1626   // This is not an outlined function region - need to call __kmpc_int32
1627   // kmpc_global_thread_num(ident_t *loc).
1628   // Generate thread id value and cache this value for use across the
1629   // function.
1630   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1631   if (!Elem.second.ServiceInsertPt)
1632     setLocThreadIdInsertPt(CGF);
1633   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1634   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1635   llvm::CallInst *Call = CGF.Builder.CreateCall(
1636       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1637       emitUpdateLocation(CGF, Loc));
1638   Call->setCallingConv(CGF.getRuntimeCC());
1639   Elem.second.ThreadID = Call;
1640   return Call;
1641 }
1642 
1643 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1644   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1645   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1646     clearLocThreadIdInsertPt(CGF);
1647     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1648   }
1649   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1650     for(auto *D : FunctionUDRMap[CGF.CurFn])
1651       UDRMap.erase(D);
1652     FunctionUDRMap.erase(CGF.CurFn);
1653   }
1654 }
1655 
1656 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1657   return IdentTy->getPointerTo();
1658 }
1659 
1660 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1661   if (!Kmpc_MicroTy) {
1662     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1663     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1664                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1665     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1666   }
1667   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1668 }
1669 
1670 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1671   llvm::FunctionCallee RTLFn = nullptr;
1672   switch (static_cast<OpenMPRTLFunction>(Function)) {
1673   case OMPRTL__kmpc_fork_call: {
1674     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1675     // microtask, ...);
1676     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1677                                 getKmpc_MicroPointerTy()};
1678     auto *FnTy =
1679         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1680     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1681     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1682       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1683         llvm::LLVMContext &Ctx = F->getContext();
1684         llvm::MDBuilder MDB(Ctx);
1685         // Annotate the callback behavior of the __kmpc_fork_call:
1686         //  - The callback callee is argument number 2 (microtask).
1687         //  - The first two arguments of the callback callee are unknown (-1).
1688         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1689         //    callback callee.
1690         F->addMetadata(
1691             llvm::LLVMContext::MD_callback,
1692             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1693                                         2, {-1, -1},
1694                                         /* VarArgsArePassed */ true)}));
1695       }
1696     }
1697     break;
1698   }
1699   case OMPRTL__kmpc_global_thread_num: {
1700     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1701     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1702     auto *FnTy =
1703         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1704     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1705     break;
1706   }
1707   case OMPRTL__kmpc_threadprivate_cached: {
1708     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1709     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1710     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1711                                 CGM.VoidPtrTy, CGM.SizeTy,
1712                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1713     auto *FnTy =
1714         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1715     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1716     break;
1717   }
1718   case OMPRTL__kmpc_critical: {
1719     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1720     // kmp_critical_name *crit);
1721     llvm::Type *TypeParams[] = {
1722         getIdentTyPointerTy(), CGM.Int32Ty,
1723         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1724     auto *FnTy =
1725         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1726     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1727     break;
1728   }
1729   case OMPRTL__kmpc_critical_with_hint: {
1730     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1731     // kmp_critical_name *crit, uintptr_t hint);
1732     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1733                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1734                                 CGM.IntPtrTy};
1735     auto *FnTy =
1736         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1737     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1738     break;
1739   }
1740   case OMPRTL__kmpc_threadprivate_register: {
1741     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1742     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1743     // typedef void *(*kmpc_ctor)(void *);
1744     auto *KmpcCtorTy =
1745         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1746                                 /*isVarArg*/ false)->getPointerTo();
1747     // typedef void *(*kmpc_cctor)(void *, void *);
1748     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1749     auto *KmpcCopyCtorTy =
1750         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1751                                 /*isVarArg*/ false)
1752             ->getPointerTo();
1753     // typedef void (*kmpc_dtor)(void *);
1754     auto *KmpcDtorTy =
1755         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1756             ->getPointerTo();
1757     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1758                               KmpcCopyCtorTy, KmpcDtorTy};
1759     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1760                                         /*isVarArg*/ false);
1761     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1762     break;
1763   }
1764   case OMPRTL__kmpc_end_critical: {
1765     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1766     // kmp_critical_name *crit);
1767     llvm::Type *TypeParams[] = {
1768         getIdentTyPointerTy(), CGM.Int32Ty,
1769         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1770     auto *FnTy =
1771         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1772     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1773     break;
1774   }
1775   case OMPRTL__kmpc_cancel_barrier: {
1776     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1777     // global_tid);
1778     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1779     auto *FnTy =
1780         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1781     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1782     break;
1783   }
1784   case OMPRTL__kmpc_barrier: {
1785     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1786     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1787     auto *FnTy =
1788         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1789     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1790     break;
1791   }
1792   case OMPRTL__kmpc_for_static_fini: {
1793     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1794     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1795     auto *FnTy =
1796         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1797     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1798     break;
1799   }
1800   case OMPRTL__kmpc_push_num_threads: {
1801     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1802     // kmp_int32 num_threads)
1803     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1804                                 CGM.Int32Ty};
1805     auto *FnTy =
1806         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1807     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1808     break;
1809   }
1810   case OMPRTL__kmpc_serialized_parallel: {
1811     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1812     // global_tid);
1813     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1814     auto *FnTy =
1815         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1816     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1817     break;
1818   }
1819   case OMPRTL__kmpc_end_serialized_parallel: {
1820     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1821     // global_tid);
1822     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1823     auto *FnTy =
1824         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1825     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1826     break;
1827   }
1828   case OMPRTL__kmpc_flush: {
1829     // Build void __kmpc_flush(ident_t *loc);
1830     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1831     auto *FnTy =
1832         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1833     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1834     break;
1835   }
1836   case OMPRTL__kmpc_master: {
1837     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1838     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1839     auto *FnTy =
1840         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1841     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1842     break;
1843   }
1844   case OMPRTL__kmpc_end_master: {
1845     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1846     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1847     auto *FnTy =
1848         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1849     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1850     break;
1851   }
1852   case OMPRTL__kmpc_omp_taskyield: {
1853     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1854     // int end_part);
1855     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1856     auto *FnTy =
1857         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1858     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1859     break;
1860   }
1861   case OMPRTL__kmpc_single: {
1862     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1863     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1864     auto *FnTy =
1865         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1866     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1867     break;
1868   }
1869   case OMPRTL__kmpc_end_single: {
1870     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1871     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1872     auto *FnTy =
1873         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1874     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1875     break;
1876   }
1877   case OMPRTL__kmpc_omp_task_alloc: {
1878     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1879     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1880     // kmp_routine_entry_t *task_entry);
1881     assert(KmpRoutineEntryPtrTy != nullptr &&
1882            "Type kmp_routine_entry_t must be created.");
1883     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1884                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1885     // Return void * and then cast to particular kmp_task_t type.
1886     auto *FnTy =
1887         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1888     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1889     break;
1890   }
1891   case OMPRTL__kmpc_omp_task: {
1892     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1893     // *new_task);
1894     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1895                                 CGM.VoidPtrTy};
1896     auto *FnTy =
1897         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1898     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1899     break;
1900   }
1901   case OMPRTL__kmpc_copyprivate: {
1902     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1903     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1904     // kmp_int32 didit);
1905     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1906     auto *CpyFnTy =
1907         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1908     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1909                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1910                                 CGM.Int32Ty};
1911     auto *FnTy =
1912         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1913     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1914     break;
1915   }
1916   case OMPRTL__kmpc_reduce: {
1917     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1918     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1919     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1920     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1921     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1922                                                /*isVarArg=*/false);
1923     llvm::Type *TypeParams[] = {
1924         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1925         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1926         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1927     auto *FnTy =
1928         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1929     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1930     break;
1931   }
1932   case OMPRTL__kmpc_reduce_nowait: {
1933     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1934     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1935     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1936     // *lck);
1937     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1938     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1939                                                /*isVarArg=*/false);
1940     llvm::Type *TypeParams[] = {
1941         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1942         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1943         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1944     auto *FnTy =
1945         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1946     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1947     break;
1948   }
1949   case OMPRTL__kmpc_end_reduce: {
1950     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1951     // kmp_critical_name *lck);
1952     llvm::Type *TypeParams[] = {
1953         getIdentTyPointerTy(), CGM.Int32Ty,
1954         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1955     auto *FnTy =
1956         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1957     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1958     break;
1959   }
1960   case OMPRTL__kmpc_end_reduce_nowait: {
1961     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1962     // kmp_critical_name *lck);
1963     llvm::Type *TypeParams[] = {
1964         getIdentTyPointerTy(), CGM.Int32Ty,
1965         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1966     auto *FnTy =
1967         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1968     RTLFn =
1969         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1970     break;
1971   }
1972   case OMPRTL__kmpc_omp_task_begin_if0: {
1973     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1974     // *new_task);
1975     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1976                                 CGM.VoidPtrTy};
1977     auto *FnTy =
1978         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1979     RTLFn =
1980         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1981     break;
1982   }
1983   case OMPRTL__kmpc_omp_task_complete_if0: {
1984     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1985     // *new_task);
1986     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1987                                 CGM.VoidPtrTy};
1988     auto *FnTy =
1989         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1990     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1991                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1992     break;
1993   }
1994   case OMPRTL__kmpc_ordered: {
1995     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1996     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1997     auto *FnTy =
1998         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1999     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2000     break;
2001   }
2002   case OMPRTL__kmpc_end_ordered: {
2003     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2004     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2005     auto *FnTy =
2006         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2007     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2008     break;
2009   }
2010   case OMPRTL__kmpc_omp_taskwait: {
2011     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2012     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2013     auto *FnTy =
2014         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2015     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2016     break;
2017   }
2018   case OMPRTL__kmpc_taskgroup: {
2019     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2020     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2021     auto *FnTy =
2022         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2023     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2024     break;
2025   }
2026   case OMPRTL__kmpc_end_taskgroup: {
2027     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2028     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2029     auto *FnTy =
2030         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2031     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2032     break;
2033   }
2034   case OMPRTL__kmpc_push_proc_bind: {
2035     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2036     // int proc_bind)
2037     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2038     auto *FnTy =
2039         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2040     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2041     break;
2042   }
2043   case OMPRTL__kmpc_omp_task_with_deps: {
2044     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2045     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2046     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2047     llvm::Type *TypeParams[] = {
2048         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2049         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2050     auto *FnTy =
2051         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2052     RTLFn =
2053         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2054     break;
2055   }
2056   case OMPRTL__kmpc_omp_wait_deps: {
2057     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2058     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2059     // kmp_depend_info_t *noalias_dep_list);
2060     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2061                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2062                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2063     auto *FnTy =
2064         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2065     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2066     break;
2067   }
2068   case OMPRTL__kmpc_cancellationpoint: {
2069     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2070     // global_tid, kmp_int32 cncl_kind)
2071     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2072     auto *FnTy =
2073         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2074     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2075     break;
2076   }
2077   case OMPRTL__kmpc_cancel: {
2078     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2079     // kmp_int32 cncl_kind)
2080     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2081     auto *FnTy =
2082         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2083     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2084     break;
2085   }
2086   case OMPRTL__kmpc_push_num_teams: {
2087     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2088     // kmp_int32 num_teams, kmp_int32 num_threads)
2089     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2090         CGM.Int32Ty};
2091     auto *FnTy =
2092         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2093     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2094     break;
2095   }
2096   case OMPRTL__kmpc_fork_teams: {
2097     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2098     // microtask, ...);
2099     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2100                                 getKmpc_MicroPointerTy()};
2101     auto *FnTy =
2102         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2103     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2104     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2105       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2106         llvm::LLVMContext &Ctx = F->getContext();
2107         llvm::MDBuilder MDB(Ctx);
2108         // Annotate the callback behavior of the __kmpc_fork_teams:
2109         //  - The callback callee is argument number 2 (microtask).
2110         //  - The first two arguments of the callback callee are unknown (-1).
2111         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2112         //    callback callee.
2113         F->addMetadata(
2114             llvm::LLVMContext::MD_callback,
2115             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2116                                         2, {-1, -1},
2117                                         /* VarArgsArePassed */ true)}));
2118       }
2119     }
2120     break;
2121   }
2122   case OMPRTL__kmpc_taskloop: {
2123     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2124     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2125     // sched, kmp_uint64 grainsize, void *task_dup);
2126     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2127                                 CGM.IntTy,
2128                                 CGM.VoidPtrTy,
2129                                 CGM.IntTy,
2130                                 CGM.Int64Ty->getPointerTo(),
2131                                 CGM.Int64Ty->getPointerTo(),
2132                                 CGM.Int64Ty,
2133                                 CGM.IntTy,
2134                                 CGM.IntTy,
2135                                 CGM.Int64Ty,
2136                                 CGM.VoidPtrTy};
2137     auto *FnTy =
2138         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2139     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2140     break;
2141   }
2142   case OMPRTL__kmpc_doacross_init: {
2143     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2144     // num_dims, struct kmp_dim *dims);
2145     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2146                                 CGM.Int32Ty,
2147                                 CGM.Int32Ty,
2148                                 CGM.VoidPtrTy};
2149     auto *FnTy =
2150         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2151     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2152     break;
2153   }
2154   case OMPRTL__kmpc_doacross_fini: {
2155     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2156     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2157     auto *FnTy =
2158         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2159     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2160     break;
2161   }
2162   case OMPRTL__kmpc_doacross_post: {
2163     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2164     // *vec);
2165     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2166                                 CGM.Int64Ty->getPointerTo()};
2167     auto *FnTy =
2168         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2169     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2170     break;
2171   }
2172   case OMPRTL__kmpc_doacross_wait: {
2173     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2174     // *vec);
2175     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2176                                 CGM.Int64Ty->getPointerTo()};
2177     auto *FnTy =
2178         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2179     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2180     break;
2181   }
2182   case OMPRTL__kmpc_task_reduction_init: {
2183     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2184     // *data);
2185     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2186     auto *FnTy =
2187         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2188     RTLFn =
2189         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2190     break;
2191   }
2192   case OMPRTL__kmpc_task_reduction_get_th_data: {
2193     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2194     // *d);
2195     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2196     auto *FnTy =
2197         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2198     RTLFn = CGM.CreateRuntimeFunction(
2199         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2200     break;
2201   }
2202   case OMPRTL__kmpc_alloc: {
2203     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2204     // al); omp_allocator_handle_t type is void *.
2205     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2206     auto *FnTy =
2207         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2208     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2209     break;
2210   }
2211   case OMPRTL__kmpc_free: {
2212     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2213     // al); omp_allocator_handle_t type is void *.
2214     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2215     auto *FnTy =
2216         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2217     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2218     break;
2219   }
2220   case OMPRTL__kmpc_push_target_tripcount: {
2221     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2222     // size);
2223     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2224     llvm::FunctionType *FnTy =
2225         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2226     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2227     break;
2228   }
2229   case OMPRTL__tgt_target: {
2230     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2231     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2232     // *arg_types);
2233     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2234                                 CGM.VoidPtrTy,
2235                                 CGM.Int32Ty,
2236                                 CGM.VoidPtrPtrTy,
2237                                 CGM.VoidPtrPtrTy,
2238                                 CGM.SizeTy->getPointerTo(),
2239                                 CGM.Int64Ty->getPointerTo()};
2240     auto *FnTy =
2241         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2242     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2243     break;
2244   }
2245   case OMPRTL__tgt_target_nowait: {
2246     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2247     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2248     // int64_t *arg_types);
2249     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2250                                 CGM.VoidPtrTy,
2251                                 CGM.Int32Ty,
2252                                 CGM.VoidPtrPtrTy,
2253                                 CGM.VoidPtrPtrTy,
2254                                 CGM.SizeTy->getPointerTo(),
2255                                 CGM.Int64Ty->getPointerTo()};
2256     auto *FnTy =
2257         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2258     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2259     break;
2260   }
2261   case OMPRTL__tgt_target_teams: {
2262     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2263     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2264     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2265     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2266                                 CGM.VoidPtrTy,
2267                                 CGM.Int32Ty,
2268                                 CGM.VoidPtrPtrTy,
2269                                 CGM.VoidPtrPtrTy,
2270                                 CGM.SizeTy->getPointerTo(),
2271                                 CGM.Int64Ty->getPointerTo(),
2272                                 CGM.Int32Ty,
2273                                 CGM.Int32Ty};
2274     auto *FnTy =
2275         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2276     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2277     break;
2278   }
2279   case OMPRTL__tgt_target_teams_nowait: {
2280     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2281     // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2282     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2283     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2284                                 CGM.VoidPtrTy,
2285                                 CGM.Int32Ty,
2286                                 CGM.VoidPtrPtrTy,
2287                                 CGM.VoidPtrPtrTy,
2288                                 CGM.SizeTy->getPointerTo(),
2289                                 CGM.Int64Ty->getPointerTo(),
2290                                 CGM.Int32Ty,
2291                                 CGM.Int32Ty};
2292     auto *FnTy =
2293         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2294     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2295     break;
2296   }
2297   case OMPRTL__tgt_register_lib: {
2298     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2299     QualType ParamTy =
2300         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2301     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2302     auto *FnTy =
2303         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2304     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2305     break;
2306   }
2307   case OMPRTL__tgt_unregister_lib: {
2308     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2309     QualType ParamTy =
2310         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2311     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2312     auto *FnTy =
2313         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2314     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2315     break;
2316   }
2317   case OMPRTL__tgt_target_data_begin: {
2318     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2319     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2320     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2321                                 CGM.Int32Ty,
2322                                 CGM.VoidPtrPtrTy,
2323                                 CGM.VoidPtrPtrTy,
2324                                 CGM.SizeTy->getPointerTo(),
2325                                 CGM.Int64Ty->getPointerTo()};
2326     auto *FnTy =
2327         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2328     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2329     break;
2330   }
2331   case OMPRTL__tgt_target_data_begin_nowait: {
2332     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2333     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2334     // *arg_types);
2335     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2336                                 CGM.Int32Ty,
2337                                 CGM.VoidPtrPtrTy,
2338                                 CGM.VoidPtrPtrTy,
2339                                 CGM.SizeTy->getPointerTo(),
2340                                 CGM.Int64Ty->getPointerTo()};
2341     auto *FnTy =
2342         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2343     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2344     break;
2345   }
2346   case OMPRTL__tgt_target_data_end: {
2347     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2348     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2349     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2350                                 CGM.Int32Ty,
2351                                 CGM.VoidPtrPtrTy,
2352                                 CGM.VoidPtrPtrTy,
2353                                 CGM.SizeTy->getPointerTo(),
2354                                 CGM.Int64Ty->getPointerTo()};
2355     auto *FnTy =
2356         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2357     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2358     break;
2359   }
2360   case OMPRTL__tgt_target_data_end_nowait: {
2361     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2362     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2363     // *arg_types);
2364     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2365                                 CGM.Int32Ty,
2366                                 CGM.VoidPtrPtrTy,
2367                                 CGM.VoidPtrPtrTy,
2368                                 CGM.SizeTy->getPointerTo(),
2369                                 CGM.Int64Ty->getPointerTo()};
2370     auto *FnTy =
2371         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2372     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2373     break;
2374   }
2375   case OMPRTL__tgt_target_data_update: {
2376     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2377     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2378     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2379                                 CGM.Int32Ty,
2380                                 CGM.VoidPtrPtrTy,
2381                                 CGM.VoidPtrPtrTy,
2382                                 CGM.SizeTy->getPointerTo(),
2383                                 CGM.Int64Ty->getPointerTo()};
2384     auto *FnTy =
2385         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2386     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2387     break;
2388   }
2389   case OMPRTL__tgt_target_data_update_nowait: {
2390     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2391     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2392     // *arg_types);
2393     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2394                                 CGM.Int32Ty,
2395                                 CGM.VoidPtrPtrTy,
2396                                 CGM.VoidPtrPtrTy,
2397                                 CGM.SizeTy->getPointerTo(),
2398                                 CGM.Int64Ty->getPointerTo()};
2399     auto *FnTy =
2400         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2401     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2402     break;
2403   }
2404   }
2405   assert(RTLFn && "Unable to find OpenMP runtime function");
2406   return RTLFn;
2407 }
2408 
2409 llvm::FunctionCallee
2410 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2411   assert((IVSize == 32 || IVSize == 64) &&
2412          "IV size is not compatible with the omp runtime");
2413   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2414                                             : "__kmpc_for_static_init_4u")
2415                                 : (IVSigned ? "__kmpc_for_static_init_8"
2416                                             : "__kmpc_for_static_init_8u");
2417   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2418   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2419   llvm::Type *TypeParams[] = {
2420     getIdentTyPointerTy(),                     // loc
2421     CGM.Int32Ty,                               // tid
2422     CGM.Int32Ty,                               // schedtype
2423     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2424     PtrTy,                                     // p_lower
2425     PtrTy,                                     // p_upper
2426     PtrTy,                                     // p_stride
2427     ITy,                                       // incr
2428     ITy                                        // chunk
2429   };
2430   auto *FnTy =
2431       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2432   return CGM.CreateRuntimeFunction(FnTy, Name);
2433 }
2434 
2435 llvm::FunctionCallee
2436 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2437   assert((IVSize == 32 || IVSize == 64) &&
2438          "IV size is not compatible with the omp runtime");
2439   StringRef Name =
2440       IVSize == 32
2441           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2442           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2443   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2444   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2445                                CGM.Int32Ty,           // tid
2446                                CGM.Int32Ty,           // schedtype
2447                                ITy,                   // lower
2448                                ITy,                   // upper
2449                                ITy,                   // stride
2450                                ITy                    // chunk
2451   };
2452   auto *FnTy =
2453       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2454   return CGM.CreateRuntimeFunction(FnTy, Name);
2455 }
2456 
2457 llvm::FunctionCallee
2458 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2459   assert((IVSize == 32 || IVSize == 64) &&
2460          "IV size is not compatible with the omp runtime");
2461   StringRef Name =
2462       IVSize == 32
2463           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2464           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2465   llvm::Type *TypeParams[] = {
2466       getIdentTyPointerTy(), // loc
2467       CGM.Int32Ty,           // tid
2468   };
2469   auto *FnTy =
2470       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2471   return CGM.CreateRuntimeFunction(FnTy, Name);
2472 }
2473 
2474 llvm::FunctionCallee
2475 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2476   assert((IVSize == 32 || IVSize == 64) &&
2477          "IV size is not compatible with the omp runtime");
2478   StringRef Name =
2479       IVSize == 32
2480           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2481           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2482   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2483   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2484   llvm::Type *TypeParams[] = {
2485     getIdentTyPointerTy(),                     // loc
2486     CGM.Int32Ty,                               // tid
2487     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2488     PtrTy,                                     // p_lower
2489     PtrTy,                                     // p_upper
2490     PtrTy                                      // p_stride
2491   };
2492   auto *FnTy =
2493       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2494   return CGM.CreateRuntimeFunction(FnTy, Name);
2495 }
2496 
2497 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) {
2498   if (CGM.getLangOpts().OpenMPSimd)
2499     return Address::invalid();
2500   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2501       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2502   if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
2503     SmallString<64> PtrName;
2504     {
2505       llvm::raw_svector_ostream OS(PtrName);
2506       OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
2507     }
2508     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2509     if (!Ptr) {
2510       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2511       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2512                                         PtrName);
2513       if (!CGM.getLangOpts().OpenMPIsDevice) {
2514         auto *GV = cast<llvm::GlobalVariable>(Ptr);
2515         GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2516         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2517       }
2518       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2519       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2520     }
2521     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2522   }
2523   return Address::invalid();
2524 }
2525 
2526 llvm::Constant *
2527 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2528   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2529          !CGM.getContext().getTargetInfo().isTLSSupported());
2530   // Lookup the entry, lazily creating it if necessary.
2531   std::string Suffix = getName({"cache", ""});
2532   return getOrCreateInternalVariable(
2533       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2534 }
2535 
2536 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2537                                                 const VarDecl *VD,
2538                                                 Address VDAddr,
2539                                                 SourceLocation Loc) {
2540   if (CGM.getLangOpts().OpenMPUseTLS &&
2541       CGM.getContext().getTargetInfo().isTLSSupported())
2542     return VDAddr;
2543 
2544   llvm::Type *VarTy = VDAddr.getElementType();
2545   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2546                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2547                                                        CGM.Int8PtrTy),
2548                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2549                          getOrCreateThreadPrivateCache(VD)};
2550   return Address(CGF.EmitRuntimeCall(
2551       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2552                  VDAddr.getAlignment());
2553 }
2554 
2555 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2556     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2557     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2558   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2559   // library.
2560   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2561   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2562                       OMPLoc);
2563   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2564   // to register constructor/destructor for variable.
2565   llvm::Value *Args[] = {
2566       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2567       Ctor, CopyCtor, Dtor};
2568   CGF.EmitRuntimeCall(
2569       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2570 }
2571 
2572 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2573     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2574     bool PerformInit, CodeGenFunction *CGF) {
2575   if (CGM.getLangOpts().OpenMPUseTLS &&
2576       CGM.getContext().getTargetInfo().isTLSSupported())
2577     return nullptr;
2578 
2579   VD = VD->getDefinition(CGM.getContext());
2580   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2581     QualType ASTTy = VD->getType();
2582 
2583     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2584     const Expr *Init = VD->getAnyInitializer();
2585     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2586       // Generate function that re-emits the declaration's initializer into the
2587       // threadprivate copy of the variable VD
2588       CodeGenFunction CtorCGF(CGM);
2589       FunctionArgList Args;
2590       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2591                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2592                             ImplicitParamDecl::Other);
2593       Args.push_back(&Dst);
2594 
2595       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2596           CGM.getContext().VoidPtrTy, Args);
2597       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2598       std::string Name = getName({"__kmpc_global_ctor_", ""});
2599       llvm::Function *Fn =
2600           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2601       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2602                             Args, Loc, Loc);
2603       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2604           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2605           CGM.getContext().VoidPtrTy, Dst.getLocation());
2606       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2607       Arg = CtorCGF.Builder.CreateElementBitCast(
2608           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2609       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2610                                /*IsInitializer=*/true);
2611       ArgVal = CtorCGF.EmitLoadOfScalar(
2612           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2613           CGM.getContext().VoidPtrTy, Dst.getLocation());
2614       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2615       CtorCGF.FinishFunction();
2616       Ctor = Fn;
2617     }
2618     if (VD->getType().isDestructedType() != QualType::DK_none) {
2619       // Generate function that emits destructor call for the threadprivate copy
2620       // of the variable VD
2621       CodeGenFunction DtorCGF(CGM);
2622       FunctionArgList Args;
2623       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2624                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2625                             ImplicitParamDecl::Other);
2626       Args.push_back(&Dst);
2627 
2628       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2629           CGM.getContext().VoidTy, Args);
2630       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2631       std::string Name = getName({"__kmpc_global_dtor_", ""});
2632       llvm::Function *Fn =
2633           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2634       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2635       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2636                             Loc, Loc);
2637       // Create a scope with an artificial location for the body of this function.
2638       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2639       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2640           DtorCGF.GetAddrOfLocalVar(&Dst),
2641           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2642       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2643                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2644                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2645       DtorCGF.FinishFunction();
2646       Dtor = Fn;
2647     }
2648     // Do not emit init function if it is not required.
2649     if (!Ctor && !Dtor)
2650       return nullptr;
2651 
2652     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2653     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2654                                                /*isVarArg=*/false)
2655                            ->getPointerTo();
2656     // Copying constructor for the threadprivate variable.
2657     // Must be NULL - reserved by runtime, but currently it requires that this
2658     // parameter is always NULL. Otherwise it fires assertion.
2659     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2660     if (Ctor == nullptr) {
2661       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2662                                              /*isVarArg=*/false)
2663                          ->getPointerTo();
2664       Ctor = llvm::Constant::getNullValue(CtorTy);
2665     }
2666     if (Dtor == nullptr) {
2667       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2668                                              /*isVarArg=*/false)
2669                          ->getPointerTo();
2670       Dtor = llvm::Constant::getNullValue(DtorTy);
2671     }
2672     if (!CGF) {
2673       auto *InitFunctionTy =
2674           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2675       std::string Name = getName({"__omp_threadprivate_init_", ""});
2676       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2677           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2678       CodeGenFunction InitCGF(CGM);
2679       FunctionArgList ArgList;
2680       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2681                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2682                             Loc, Loc);
2683       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2684       InitCGF.FinishFunction();
2685       return InitFunction;
2686     }
2687     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2688   }
2689   return nullptr;
2690 }
2691 
2692 /// Obtain information that uniquely identifies a target entry. This
2693 /// consists of the file and device IDs as well as line number associated with
2694 /// the relevant entry source location.
2695 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2696                                      unsigned &DeviceID, unsigned &FileID,
2697                                      unsigned &LineNum) {
2698   SourceManager &SM = C.getSourceManager();
2699 
2700   // The loc should be always valid and have a file ID (the user cannot use
2701   // #pragma directives in macros)
2702 
2703   assert(Loc.isValid() && "Source location is expected to be always valid.");
2704 
2705   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2706   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2707 
2708   llvm::sys::fs::UniqueID ID;
2709   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2710     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2711         << PLoc.getFilename() << EC.message();
2712 
2713   DeviceID = ID.getDevice();
2714   FileID = ID.getFile();
2715   LineNum = PLoc.getLine();
2716 }
2717 
2718 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2719                                                      llvm::GlobalVariable *Addr,
2720                                                      bool PerformInit) {
2721   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2722       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2723   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
2724     return CGM.getLangOpts().OpenMPIsDevice;
2725   VD = VD->getDefinition(CGM.getContext());
2726   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2727     return CGM.getLangOpts().OpenMPIsDevice;
2728 
2729   QualType ASTTy = VD->getType();
2730 
2731   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2732   // Produce the unique prefix to identify the new target regions. We use
2733   // the source location of the variable declaration which we know to not
2734   // conflict with any target region.
2735   unsigned DeviceID;
2736   unsigned FileID;
2737   unsigned Line;
2738   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2739   SmallString<128> Buffer, Out;
2740   {
2741     llvm::raw_svector_ostream OS(Buffer);
2742     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2743        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2744   }
2745 
2746   const Expr *Init = VD->getAnyInitializer();
2747   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2748     llvm::Constant *Ctor;
2749     llvm::Constant *ID;
2750     if (CGM.getLangOpts().OpenMPIsDevice) {
2751       // Generate function that re-emits the declaration's initializer into
2752       // the threadprivate copy of the variable VD
2753       CodeGenFunction CtorCGF(CGM);
2754 
2755       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2756       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2757       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2758           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2759       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2760       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2761                             FunctionArgList(), Loc, Loc);
2762       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2763       CtorCGF.EmitAnyExprToMem(Init,
2764                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2765                                Init->getType().getQualifiers(),
2766                                /*IsInitializer=*/true);
2767       CtorCGF.FinishFunction();
2768       Ctor = Fn;
2769       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2770       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2771     } else {
2772       Ctor = new llvm::GlobalVariable(
2773           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2774           llvm::GlobalValue::PrivateLinkage,
2775           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2776       ID = Ctor;
2777     }
2778 
2779     // Register the information for the entry associated with the constructor.
2780     Out.clear();
2781     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2782         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2783         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2784   }
2785   if (VD->getType().isDestructedType() != QualType::DK_none) {
2786     llvm::Constant *Dtor;
2787     llvm::Constant *ID;
2788     if (CGM.getLangOpts().OpenMPIsDevice) {
2789       // Generate function that emits destructor call for the threadprivate
2790       // copy of the variable VD
2791       CodeGenFunction DtorCGF(CGM);
2792 
2793       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2794       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2795       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2796           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2797       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2798       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2799                             FunctionArgList(), Loc, Loc);
2800       // Create a scope with an artificial location for the body of this
2801       // function.
2802       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2803       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2804                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2805                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2806       DtorCGF.FinishFunction();
2807       Dtor = Fn;
2808       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2809       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2810     } else {
2811       Dtor = new llvm::GlobalVariable(
2812           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2813           llvm::GlobalValue::PrivateLinkage,
2814           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2815       ID = Dtor;
2816     }
2817     // Register the information for the entry associated with the destructor.
2818     Out.clear();
2819     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2820         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2821         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2822   }
2823   return CGM.getLangOpts().OpenMPIsDevice;
2824 }
2825 
2826 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2827                                                           QualType VarType,
2828                                                           StringRef Name) {
2829   std::string Suffix = getName({"artificial", ""});
2830   std::string CacheSuffix = getName({"cache", ""});
2831   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2832   llvm::Value *GAddr =
2833       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2834   llvm::Value *Args[] = {
2835       emitUpdateLocation(CGF, SourceLocation()),
2836       getThreadID(CGF, SourceLocation()),
2837       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2838       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2839                                 /*IsSigned=*/false),
2840       getOrCreateInternalVariable(
2841           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2842   return Address(
2843       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2844           CGF.EmitRuntimeCall(
2845               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2846           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2847       CGM.getPointerAlign());
2848 }
2849 
2850 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2851                                       const RegionCodeGenTy &ThenGen,
2852                                       const RegionCodeGenTy &ElseGen) {
2853   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2854 
2855   // If the condition constant folds and can be elided, try to avoid emitting
2856   // the condition and the dead arm of the if/else.
2857   bool CondConstant;
2858   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2859     if (CondConstant)
2860       ThenGen(CGF);
2861     else
2862       ElseGen(CGF);
2863     return;
2864   }
2865 
2866   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2867   // emit the conditional branch.
2868   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2869   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2870   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2871   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2872 
2873   // Emit the 'then' code.
2874   CGF.EmitBlock(ThenBlock);
2875   ThenGen(CGF);
2876   CGF.EmitBranch(ContBlock);
2877   // Emit the 'else' code if present.
2878   // There is no need to emit line number for unconditional branch.
2879   (void)ApplyDebugLocation::CreateEmpty(CGF);
2880   CGF.EmitBlock(ElseBlock);
2881   ElseGen(CGF);
2882   // There is no need to emit line number for unconditional branch.
2883   (void)ApplyDebugLocation::CreateEmpty(CGF);
2884   CGF.EmitBranch(ContBlock);
2885   // Emit the continuation block for code after the if.
2886   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2887 }
2888 
2889 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2890                                        llvm::Function *OutlinedFn,
2891                                        ArrayRef<llvm::Value *> CapturedVars,
2892                                        const Expr *IfCond) {
2893   if (!CGF.HaveInsertPoint())
2894     return;
2895   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2896   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2897                                                      PrePostActionTy &) {
2898     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2899     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2900     llvm::Value *Args[] = {
2901         RTLoc,
2902         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2903         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2904     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2905     RealArgs.append(std::begin(Args), std::end(Args));
2906     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2907 
2908     llvm::FunctionCallee RTLFn =
2909         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2910     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2911   };
2912   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2913                                                           PrePostActionTy &) {
2914     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2915     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2916     // Build calls:
2917     // __kmpc_serialized_parallel(&Loc, GTid);
2918     llvm::Value *Args[] = {RTLoc, ThreadID};
2919     CGF.EmitRuntimeCall(
2920         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2921 
2922     // OutlinedFn(&GTid, &zero, CapturedStruct);
2923     Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2924                                                         /*Name*/ ".zero.addr");
2925     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2926     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2927     // ThreadId for serialized parallels is 0.
2928     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2929     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2930     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2931     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2932 
2933     // __kmpc_end_serialized_parallel(&Loc, GTid);
2934     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2935     CGF.EmitRuntimeCall(
2936         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2937         EndArgs);
2938   };
2939   if (IfCond) {
2940     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2941   } else {
2942     RegionCodeGenTy ThenRCG(ThenGen);
2943     ThenRCG(CGF);
2944   }
2945 }
2946 
2947 // If we're inside an (outlined) parallel region, use the region info's
2948 // thread-ID variable (it is passed in a first argument of the outlined function
2949 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2950 // regular serial code region, get thread ID by calling kmp_int32
2951 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2952 // return the address of that temp.
2953 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2954                                              SourceLocation Loc) {
2955   if (auto *OMPRegionInfo =
2956           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2957     if (OMPRegionInfo->getThreadIDVariable())
2958       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2959 
2960   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2961   QualType Int32Ty =
2962       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2963   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2964   CGF.EmitStoreOfScalar(ThreadID,
2965                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2966 
2967   return ThreadIDTemp;
2968 }
2969 
2970 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2971     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2972   SmallString<256> Buffer;
2973   llvm::raw_svector_ostream Out(Buffer);
2974   Out << Name;
2975   StringRef RuntimeName = Out.str();
2976   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2977   if (Elem.second) {
2978     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2979            "OMP internal variable has different type than requested");
2980     return &*Elem.second;
2981   }
2982 
2983   return Elem.second = new llvm::GlobalVariable(
2984              CGM.getModule(), Ty, /*IsConstant*/ false,
2985              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2986              Elem.first(), /*InsertBefore=*/nullptr,
2987              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2988 }
2989 
2990 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2991   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2992   std::string Name = getName({Prefix, "var"});
2993   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2994 }
2995 
2996 namespace {
2997 /// Common pre(post)-action for different OpenMP constructs.
2998 class CommonActionTy final : public PrePostActionTy {
2999   llvm::FunctionCallee EnterCallee;
3000   ArrayRef<llvm::Value *> EnterArgs;
3001   llvm::FunctionCallee ExitCallee;
3002   ArrayRef<llvm::Value *> ExitArgs;
3003   bool Conditional;
3004   llvm::BasicBlock *ContBlock = nullptr;
3005 
3006 public:
3007   CommonActionTy(llvm::FunctionCallee EnterCallee,
3008                  ArrayRef<llvm::Value *> EnterArgs,
3009                  llvm::FunctionCallee ExitCallee,
3010                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3011       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3012         ExitArgs(ExitArgs), Conditional(Conditional) {}
3013   void Enter(CodeGenFunction &CGF) override {
3014     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3015     if (Conditional) {
3016       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3017       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3018       ContBlock = CGF.createBasicBlock("omp_if.end");
3019       // Generate the branch (If-stmt)
3020       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3021       CGF.EmitBlock(ThenBlock);
3022     }
3023   }
3024   void Done(CodeGenFunction &CGF) {
3025     // Emit the rest of blocks/branches
3026     CGF.EmitBranch(ContBlock);
3027     CGF.EmitBlock(ContBlock, true);
3028   }
3029   void Exit(CodeGenFunction &CGF) override {
3030     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3031   }
3032 };
3033 } // anonymous namespace
3034 
3035 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3036                                          StringRef CriticalName,
3037                                          const RegionCodeGenTy &CriticalOpGen,
3038                                          SourceLocation Loc, const Expr *Hint) {
3039   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3040   // CriticalOpGen();
3041   // __kmpc_end_critical(ident_t *, gtid, Lock);
3042   // Prepare arguments and build a call to __kmpc_critical
3043   if (!CGF.HaveInsertPoint())
3044     return;
3045   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3046                          getCriticalRegionLock(CriticalName)};
3047   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3048                                                 std::end(Args));
3049   if (Hint) {
3050     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3051         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3052   }
3053   CommonActionTy Action(
3054       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3055                                  : OMPRTL__kmpc_critical),
3056       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3057   CriticalOpGen.setAction(Action);
3058   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3059 }
3060 
3061 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3062                                        const RegionCodeGenTy &MasterOpGen,
3063                                        SourceLocation Loc) {
3064   if (!CGF.HaveInsertPoint())
3065     return;
3066   // if(__kmpc_master(ident_t *, gtid)) {
3067   //   MasterOpGen();
3068   //   __kmpc_end_master(ident_t *, gtid);
3069   // }
3070   // Prepare arguments and build a call to __kmpc_master
3071   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3072   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3073                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3074                         /*Conditional=*/true);
3075   MasterOpGen.setAction(Action);
3076   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3077   Action.Done(CGF);
3078 }
3079 
3080 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3081                                         SourceLocation Loc) {
3082   if (!CGF.HaveInsertPoint())
3083     return;
3084   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3085   llvm::Value *Args[] = {
3086       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3087       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3088   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3089   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3090     Region->emitUntiedSwitch(CGF);
3091 }
3092 
3093 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3094                                           const RegionCodeGenTy &TaskgroupOpGen,
3095                                           SourceLocation Loc) {
3096   if (!CGF.HaveInsertPoint())
3097     return;
3098   // __kmpc_taskgroup(ident_t *, gtid);
3099   // TaskgroupOpGen();
3100   // __kmpc_end_taskgroup(ident_t *, gtid);
3101   // Prepare arguments and build a call to __kmpc_taskgroup
3102   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3103   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3104                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3105                         Args);
3106   TaskgroupOpGen.setAction(Action);
3107   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3108 }
3109 
3110 /// Given an array of pointers to variables, project the address of a
3111 /// given variable.
3112 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3113                                       unsigned Index, const VarDecl *Var) {
3114   // Pull out the pointer to the variable.
3115   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3116   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3117 
3118   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3119   Addr = CGF.Builder.CreateElementBitCast(
3120       Addr, CGF.ConvertTypeForMem(Var->getType()));
3121   return Addr;
3122 }
3123 
3124 static llvm::Value *emitCopyprivateCopyFunction(
3125     CodeGenModule &CGM, llvm::Type *ArgsType,
3126     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3127     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3128     SourceLocation Loc) {
3129   ASTContext &C = CGM.getContext();
3130   // void copy_func(void *LHSArg, void *RHSArg);
3131   FunctionArgList Args;
3132   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3133                            ImplicitParamDecl::Other);
3134   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3135                            ImplicitParamDecl::Other);
3136   Args.push_back(&LHSArg);
3137   Args.push_back(&RHSArg);
3138   const auto &CGFI =
3139       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3140   std::string Name =
3141       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3142   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3143                                     llvm::GlobalValue::InternalLinkage, Name,
3144                                     &CGM.getModule());
3145   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3146   Fn->setDoesNotRecurse();
3147   CodeGenFunction CGF(CGM);
3148   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3149   // Dest = (void*[n])(LHSArg);
3150   // Src = (void*[n])(RHSArg);
3151   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3152       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3153       ArgsType), CGF.getPointerAlign());
3154   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3155       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3156       ArgsType), CGF.getPointerAlign());
3157   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3158   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3159   // ...
3160   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3161   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3162     const auto *DestVar =
3163         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3164     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3165 
3166     const auto *SrcVar =
3167         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3168     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3169 
3170     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3171     QualType Type = VD->getType();
3172     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3173   }
3174   CGF.FinishFunction();
3175   return Fn;
3176 }
3177 
3178 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3179                                        const RegionCodeGenTy &SingleOpGen,
3180                                        SourceLocation Loc,
3181                                        ArrayRef<const Expr *> CopyprivateVars,
3182                                        ArrayRef<const Expr *> SrcExprs,
3183                                        ArrayRef<const Expr *> DstExprs,
3184                                        ArrayRef<const Expr *> AssignmentOps) {
3185   if (!CGF.HaveInsertPoint())
3186     return;
3187   assert(CopyprivateVars.size() == SrcExprs.size() &&
3188          CopyprivateVars.size() == DstExprs.size() &&
3189          CopyprivateVars.size() == AssignmentOps.size());
3190   ASTContext &C = CGM.getContext();
3191   // int32 did_it = 0;
3192   // if(__kmpc_single(ident_t *, gtid)) {
3193   //   SingleOpGen();
3194   //   __kmpc_end_single(ident_t *, gtid);
3195   //   did_it = 1;
3196   // }
3197   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3198   // <copy_func>, did_it);
3199 
3200   Address DidIt = Address::invalid();
3201   if (!CopyprivateVars.empty()) {
3202     // int32 did_it = 0;
3203     QualType KmpInt32Ty =
3204         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3205     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3206     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3207   }
3208   // Prepare arguments and build a call to __kmpc_single
3209   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3210   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3211                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3212                         /*Conditional=*/true);
3213   SingleOpGen.setAction(Action);
3214   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3215   if (DidIt.isValid()) {
3216     // did_it = 1;
3217     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3218   }
3219   Action.Done(CGF);
3220   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3221   // <copy_func>, did_it);
3222   if (DidIt.isValid()) {
3223     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3224     QualType CopyprivateArrayTy =
3225         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3226                                /*IndexTypeQuals=*/0);
3227     // Create a list of all private variables for copyprivate.
3228     Address CopyprivateList =
3229         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3230     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3231       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3232       CGF.Builder.CreateStore(
3233           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3234               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3235           Elem);
3236     }
3237     // Build function that copies private values from single region to all other
3238     // threads in the corresponding parallel region.
3239     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3240         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3241         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3242     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3243     Address CL =
3244       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3245                                                       CGF.VoidPtrTy);
3246     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3247     llvm::Value *Args[] = {
3248         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3249         getThreadID(CGF, Loc),        // i32 <gtid>
3250         BufSize,                      // size_t <buf_size>
3251         CL.getPointer(),              // void *<copyprivate list>
3252         CpyFn,                        // void (*) (void *, void *) <copy_func>
3253         DidItVal                      // i32 did_it
3254     };
3255     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3256   }
3257 }
3258 
3259 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3260                                         const RegionCodeGenTy &OrderedOpGen,
3261                                         SourceLocation Loc, bool IsThreads) {
3262   if (!CGF.HaveInsertPoint())
3263     return;
3264   // __kmpc_ordered(ident_t *, gtid);
3265   // OrderedOpGen();
3266   // __kmpc_end_ordered(ident_t *, gtid);
3267   // Prepare arguments and build a call to __kmpc_ordered
3268   if (IsThreads) {
3269     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3270     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3271                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3272                           Args);
3273     OrderedOpGen.setAction(Action);
3274     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3275     return;
3276   }
3277   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3278 }
3279 
3280 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3281   unsigned Flags;
3282   if (Kind == OMPD_for)
3283     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3284   else if (Kind == OMPD_sections)
3285     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3286   else if (Kind == OMPD_single)
3287     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3288   else if (Kind == OMPD_barrier)
3289     Flags = OMP_IDENT_BARRIER_EXPL;
3290   else
3291     Flags = OMP_IDENT_BARRIER_IMPL;
3292   return Flags;
3293 }
3294 
3295 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3296     CodeGenFunction &CGF, const OMPLoopDirective &S,
3297     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3298   // Check if the loop directive is actually a doacross loop directive. In this
3299   // case choose static, 1 schedule.
3300   if (llvm::any_of(
3301           S.getClausesOfKind<OMPOrderedClause>(),
3302           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3303     ScheduleKind = OMPC_SCHEDULE_static;
3304     // Chunk size is 1 in this case.
3305     llvm::APInt ChunkSize(32, 1);
3306     ChunkExpr = IntegerLiteral::Create(
3307         CGF.getContext(), ChunkSize,
3308         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3309         SourceLocation());
3310   }
3311 }
3312 
3313 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3314                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3315                                       bool ForceSimpleCall) {
3316   if (!CGF.HaveInsertPoint())
3317     return;
3318   // Build call __kmpc_cancel_barrier(loc, thread_id);
3319   // Build call __kmpc_barrier(loc, thread_id);
3320   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3321   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3322   // thread_id);
3323   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3324                          getThreadID(CGF, Loc)};
3325   if (auto *OMPRegionInfo =
3326           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3327     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3328       llvm::Value *Result = CGF.EmitRuntimeCall(
3329           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3330       if (EmitChecks) {
3331         // if (__kmpc_cancel_barrier()) {
3332         //   exit from construct;
3333         // }
3334         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3335         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3336         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3337         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3338         CGF.EmitBlock(ExitBB);
3339         //   exit from construct;
3340         CodeGenFunction::JumpDest CancelDestination =
3341             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3342         CGF.EmitBranchThroughCleanup(CancelDestination);
3343         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3344       }
3345       return;
3346     }
3347   }
3348   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3349 }
3350 
3351 /// Map the OpenMP loop schedule to the runtime enumeration.
3352 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3353                                           bool Chunked, bool Ordered) {
3354   switch (ScheduleKind) {
3355   case OMPC_SCHEDULE_static:
3356     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3357                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3358   case OMPC_SCHEDULE_dynamic:
3359     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3360   case OMPC_SCHEDULE_guided:
3361     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3362   case OMPC_SCHEDULE_runtime:
3363     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3364   case OMPC_SCHEDULE_auto:
3365     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3366   case OMPC_SCHEDULE_unknown:
3367     assert(!Chunked && "chunk was specified but schedule kind not known");
3368     return Ordered ? OMP_ord_static : OMP_sch_static;
3369   }
3370   llvm_unreachable("Unexpected runtime schedule");
3371 }
3372 
3373 /// Map the OpenMP distribute schedule to the runtime enumeration.
3374 static OpenMPSchedType
3375 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3376   // only static is allowed for dist_schedule
3377   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3378 }
3379 
3380 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3381                                          bool Chunked) const {
3382   OpenMPSchedType Schedule =
3383       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3384   return Schedule == OMP_sch_static;
3385 }
3386 
3387 bool CGOpenMPRuntime::isStaticNonchunked(
3388     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3389   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3390   return Schedule == OMP_dist_sch_static;
3391 }
3392 
3393 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3394                                       bool Chunked) const {
3395   OpenMPSchedType Schedule =
3396       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3397   return Schedule == OMP_sch_static_chunked;
3398 }
3399 
3400 bool CGOpenMPRuntime::isStaticChunked(
3401     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3402   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3403   return Schedule == OMP_dist_sch_static_chunked;
3404 }
3405 
3406 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3407   OpenMPSchedType Schedule =
3408       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3409   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3410   return Schedule != OMP_sch_static;
3411 }
3412 
3413 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3414                                   OpenMPScheduleClauseModifier M1,
3415                                   OpenMPScheduleClauseModifier M2) {
3416   int Modifier = 0;
3417   switch (M1) {
3418   case OMPC_SCHEDULE_MODIFIER_monotonic:
3419     Modifier = OMP_sch_modifier_monotonic;
3420     break;
3421   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3422     Modifier = OMP_sch_modifier_nonmonotonic;
3423     break;
3424   case OMPC_SCHEDULE_MODIFIER_simd:
3425     if (Schedule == OMP_sch_static_chunked)
3426       Schedule = OMP_sch_static_balanced_chunked;
3427     break;
3428   case OMPC_SCHEDULE_MODIFIER_last:
3429   case OMPC_SCHEDULE_MODIFIER_unknown:
3430     break;
3431   }
3432   switch (M2) {
3433   case OMPC_SCHEDULE_MODIFIER_monotonic:
3434     Modifier = OMP_sch_modifier_monotonic;
3435     break;
3436   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3437     Modifier = OMP_sch_modifier_nonmonotonic;
3438     break;
3439   case OMPC_SCHEDULE_MODIFIER_simd:
3440     if (Schedule == OMP_sch_static_chunked)
3441       Schedule = OMP_sch_static_balanced_chunked;
3442     break;
3443   case OMPC_SCHEDULE_MODIFIER_last:
3444   case OMPC_SCHEDULE_MODIFIER_unknown:
3445     break;
3446   }
3447   return Schedule | Modifier;
3448 }
3449 
3450 void CGOpenMPRuntime::emitForDispatchInit(
3451     CodeGenFunction &CGF, SourceLocation Loc,
3452     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3453     bool Ordered, const DispatchRTInput &DispatchValues) {
3454   if (!CGF.HaveInsertPoint())
3455     return;
3456   OpenMPSchedType Schedule = getRuntimeSchedule(
3457       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3458   assert(Ordered ||
3459          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3460           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3461           Schedule != OMP_sch_static_balanced_chunked));
3462   // Call __kmpc_dispatch_init(
3463   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3464   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3465   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3466 
3467   // If the Chunk was not specified in the clause - use default value 1.
3468   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3469                                             : CGF.Builder.getIntN(IVSize, 1);
3470   llvm::Value *Args[] = {
3471       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3472       CGF.Builder.getInt32(addMonoNonMonoModifier(
3473           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3474       DispatchValues.LB,                                // Lower
3475       DispatchValues.UB,                                // Upper
3476       CGF.Builder.getIntN(IVSize, 1),                   // Stride
3477       Chunk                                             // Chunk
3478   };
3479   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3480 }
3481 
3482 static void emitForStaticInitCall(
3483     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3484     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3485     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3486     const CGOpenMPRuntime::StaticRTInput &Values) {
3487   if (!CGF.HaveInsertPoint())
3488     return;
3489 
3490   assert(!Values.Ordered);
3491   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3492          Schedule == OMP_sch_static_balanced_chunked ||
3493          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3494          Schedule == OMP_dist_sch_static ||
3495          Schedule == OMP_dist_sch_static_chunked);
3496 
3497   // Call __kmpc_for_static_init(
3498   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3499   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3500   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3501   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3502   llvm::Value *Chunk = Values.Chunk;
3503   if (Chunk == nullptr) {
3504     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3505             Schedule == OMP_dist_sch_static) &&
3506            "expected static non-chunked schedule");
3507     // If the Chunk was not specified in the clause - use default value 1.
3508     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3509   } else {
3510     assert((Schedule == OMP_sch_static_chunked ||
3511             Schedule == OMP_sch_static_balanced_chunked ||
3512             Schedule == OMP_ord_static_chunked ||
3513             Schedule == OMP_dist_sch_static_chunked) &&
3514            "expected static chunked schedule");
3515   }
3516   llvm::Value *Args[] = {
3517       UpdateLocation,
3518       ThreadId,
3519       CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3520                                                   M2)), // Schedule type
3521       Values.IL.getPointer(),                           // &isLastIter
3522       Values.LB.getPointer(),                           // &LB
3523       Values.UB.getPointer(),                           // &UB
3524       Values.ST.getPointer(),                           // &Stride
3525       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3526       Chunk                                             // Chunk
3527   };
3528   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3529 }
3530 
3531 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3532                                         SourceLocation Loc,
3533                                         OpenMPDirectiveKind DKind,
3534                                         const OpenMPScheduleTy &ScheduleKind,
3535                                         const StaticRTInput &Values) {
3536   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3537       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3538   assert(isOpenMPWorksharingDirective(DKind) &&
3539          "Expected loop-based or sections-based directive.");
3540   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3541                                              isOpenMPLoopDirective(DKind)
3542                                                  ? OMP_IDENT_WORK_LOOP
3543                                                  : OMP_IDENT_WORK_SECTIONS);
3544   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3545   llvm::FunctionCallee StaticInitFunction =
3546       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3547   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3548                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3549 }
3550 
3551 void CGOpenMPRuntime::emitDistributeStaticInit(
3552     CodeGenFunction &CGF, SourceLocation Loc,
3553     OpenMPDistScheduleClauseKind SchedKind,
3554     const CGOpenMPRuntime::StaticRTInput &Values) {
3555   OpenMPSchedType ScheduleNum =
3556       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3557   llvm::Value *UpdatedLocation =
3558       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3559   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3560   llvm::FunctionCallee StaticInitFunction =
3561       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3562   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3563                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3564                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3565 }
3566 
3567 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3568                                           SourceLocation Loc,
3569                                           OpenMPDirectiveKind DKind) {
3570   if (!CGF.HaveInsertPoint())
3571     return;
3572   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3573   llvm::Value *Args[] = {
3574       emitUpdateLocation(CGF, Loc,
3575                          isOpenMPDistributeDirective(DKind)
3576                              ? OMP_IDENT_WORK_DISTRIBUTE
3577                              : isOpenMPLoopDirective(DKind)
3578                                    ? OMP_IDENT_WORK_LOOP
3579                                    : OMP_IDENT_WORK_SECTIONS),
3580       getThreadID(CGF, Loc)};
3581   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3582                       Args);
3583 }
3584 
3585 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3586                                                  SourceLocation Loc,
3587                                                  unsigned IVSize,
3588                                                  bool IVSigned) {
3589   if (!CGF.HaveInsertPoint())
3590     return;
3591   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3592   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3593   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3594 }
3595 
3596 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3597                                           SourceLocation Loc, unsigned IVSize,
3598                                           bool IVSigned, Address IL,
3599                                           Address LB, Address UB,
3600                                           Address ST) {
3601   // Call __kmpc_dispatch_next(
3602   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3603   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3604   //          kmp_int[32|64] *p_stride);
3605   llvm::Value *Args[] = {
3606       emitUpdateLocation(CGF, Loc),
3607       getThreadID(CGF, Loc),
3608       IL.getPointer(), // &isLastIter
3609       LB.getPointer(), // &Lower
3610       UB.getPointer(), // &Upper
3611       ST.getPointer()  // &Stride
3612   };
3613   llvm::Value *Call =
3614       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3615   return CGF.EmitScalarConversion(
3616       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3617       CGF.getContext().BoolTy, Loc);
3618 }
3619 
3620 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3621                                            llvm::Value *NumThreads,
3622                                            SourceLocation Loc) {
3623   if (!CGF.HaveInsertPoint())
3624     return;
3625   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3626   llvm::Value *Args[] = {
3627       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3628       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3629   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3630                       Args);
3631 }
3632 
3633 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3634                                          OpenMPProcBindClauseKind ProcBind,
3635                                          SourceLocation Loc) {
3636   if (!CGF.HaveInsertPoint())
3637     return;
3638   // Constants for proc bind value accepted by the runtime.
3639   enum ProcBindTy {
3640     ProcBindFalse = 0,
3641     ProcBindTrue,
3642     ProcBindMaster,
3643     ProcBindClose,
3644     ProcBindSpread,
3645     ProcBindIntel,
3646     ProcBindDefault
3647   } RuntimeProcBind;
3648   switch (ProcBind) {
3649   case OMPC_PROC_BIND_master:
3650     RuntimeProcBind = ProcBindMaster;
3651     break;
3652   case OMPC_PROC_BIND_close:
3653     RuntimeProcBind = ProcBindClose;
3654     break;
3655   case OMPC_PROC_BIND_spread:
3656     RuntimeProcBind = ProcBindSpread;
3657     break;
3658   case OMPC_PROC_BIND_unknown:
3659     llvm_unreachable("Unsupported proc_bind value.");
3660   }
3661   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3662   llvm::Value *Args[] = {
3663       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3664       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3665   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3666 }
3667 
3668 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3669                                 SourceLocation Loc) {
3670   if (!CGF.HaveInsertPoint())
3671     return;
3672   // Build call void __kmpc_flush(ident_t *loc)
3673   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3674                       emitUpdateLocation(CGF, Loc));
3675 }
3676 
3677 namespace {
3678 /// Indexes of fields for type kmp_task_t.
3679 enum KmpTaskTFields {
3680   /// List of shared variables.
3681   KmpTaskTShareds,
3682   /// Task routine.
3683   KmpTaskTRoutine,
3684   /// Partition id for the untied tasks.
3685   KmpTaskTPartId,
3686   /// Function with call of destructors for private variables.
3687   Data1,
3688   /// Task priority.
3689   Data2,
3690   /// (Taskloops only) Lower bound.
3691   KmpTaskTLowerBound,
3692   /// (Taskloops only) Upper bound.
3693   KmpTaskTUpperBound,
3694   /// (Taskloops only) Stride.
3695   KmpTaskTStride,
3696   /// (Taskloops only) Is last iteration flag.
3697   KmpTaskTLastIter,
3698   /// (Taskloops only) Reduction data.
3699   KmpTaskTReductions,
3700 };
3701 } // anonymous namespace
3702 
3703 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3704   return OffloadEntriesTargetRegion.empty() &&
3705          OffloadEntriesDeviceGlobalVar.empty();
3706 }
3707 
3708 /// Initialize target region entry.
3709 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3710     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3711                                     StringRef ParentName, unsigned LineNum,
3712                                     unsigned Order) {
3713   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3714                                              "only required for the device "
3715                                              "code generation.");
3716   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3717       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3718                                    OMPTargetRegionEntryTargetRegion);
3719   ++OffloadingEntriesNum;
3720 }
3721 
3722 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3723     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3724                                   StringRef ParentName, unsigned LineNum,
3725                                   llvm::Constant *Addr, llvm::Constant *ID,
3726                                   OMPTargetRegionEntryKind Flags) {
3727   // If we are emitting code for a target, the entry is already initialized,
3728   // only has to be registered.
3729   if (CGM.getLangOpts().OpenMPIsDevice) {
3730     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3731       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3732           DiagnosticsEngine::Error,
3733           "Unable to find target region on line '%0' in the device code.");
3734       CGM.getDiags().Report(DiagID) << LineNum;
3735       return;
3736     }
3737     auto &Entry =
3738         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3739     assert(Entry.isValid() && "Entry not initialized!");
3740     Entry.setAddress(Addr);
3741     Entry.setID(ID);
3742     Entry.setFlags(Flags);
3743   } else {
3744     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3745     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3746     ++OffloadingEntriesNum;
3747   }
3748 }
3749 
3750 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3751     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3752     unsigned LineNum) const {
3753   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3754   if (PerDevice == OffloadEntriesTargetRegion.end())
3755     return false;
3756   auto PerFile = PerDevice->second.find(FileID);
3757   if (PerFile == PerDevice->second.end())
3758     return false;
3759   auto PerParentName = PerFile->second.find(ParentName);
3760   if (PerParentName == PerFile->second.end())
3761     return false;
3762   auto PerLine = PerParentName->second.find(LineNum);
3763   if (PerLine == PerParentName->second.end())
3764     return false;
3765   // Fail if this entry is already registered.
3766   if (PerLine->second.getAddress() || PerLine->second.getID())
3767     return false;
3768   return true;
3769 }
3770 
3771 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3772     const OffloadTargetRegionEntryInfoActTy &Action) {
3773   // Scan all target region entries and perform the provided action.
3774   for (const auto &D : OffloadEntriesTargetRegion)
3775     for (const auto &F : D.second)
3776       for (const auto &P : F.second)
3777         for (const auto &L : P.second)
3778           Action(D.first, F.first, P.first(), L.first, L.second);
3779 }
3780 
3781 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3782     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3783                                        OMPTargetGlobalVarEntryKind Flags,
3784                                        unsigned Order) {
3785   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3786                                              "only required for the device "
3787                                              "code generation.");
3788   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3789   ++OffloadingEntriesNum;
3790 }
3791 
3792 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3793     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3794                                      CharUnits VarSize,
3795                                      OMPTargetGlobalVarEntryKind Flags,
3796                                      llvm::GlobalValue::LinkageTypes Linkage) {
3797   if (CGM.getLangOpts().OpenMPIsDevice) {
3798     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3799     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3800            "Entry not initialized!");
3801     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3802            "Resetting with the new address.");
3803     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3804       if (Entry.getVarSize().isZero()) {
3805         Entry.setVarSize(VarSize);
3806         Entry.setLinkage(Linkage);
3807       }
3808       return;
3809     }
3810     Entry.setVarSize(VarSize);
3811     Entry.setLinkage(Linkage);
3812     Entry.setAddress(Addr);
3813   } else {
3814     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3815       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3816       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3817              "Entry not initialized!");
3818       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3819              "Resetting with the new address.");
3820       if (Entry.getVarSize().isZero()) {
3821         Entry.setVarSize(VarSize);
3822         Entry.setLinkage(Linkage);
3823       }
3824       return;
3825     }
3826     OffloadEntriesDeviceGlobalVar.try_emplace(
3827         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3828     ++OffloadingEntriesNum;
3829   }
3830 }
3831 
3832 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3833     actOnDeviceGlobalVarEntriesInfo(
3834         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3835   // Scan all target region entries and perform the provided action.
3836   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3837     Action(E.getKey(), E.getValue());
3838 }
3839 
3840 llvm::Function *
3841 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3842   // If we don't have entries or if we are emitting code for the device, we
3843   // don't need to do anything.
3844   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3845     return nullptr;
3846 
3847   llvm::Module &M = CGM.getModule();
3848   ASTContext &C = CGM.getContext();
3849 
3850   // Get list of devices we care about
3851   const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3852 
3853   // We should be creating an offloading descriptor only if there are devices
3854   // specified.
3855   assert(!Devices.empty() && "No OpenMP offloading devices??");
3856 
3857   // Create the external variables that will point to the begin and end of the
3858   // host entries section. These will be defined by the linker.
3859   llvm::Type *OffloadEntryTy =
3860       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3861   std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3862   auto *HostEntriesBegin = new llvm::GlobalVariable(
3863       M, OffloadEntryTy, /*isConstant=*/true,
3864       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3865       EntriesBeginName);
3866   std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3867   auto *HostEntriesEnd =
3868       new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3869                                llvm::GlobalValue::ExternalLinkage,
3870                                /*Initializer=*/nullptr, EntriesEndName);
3871 
3872   // Create all device images
3873   auto *DeviceImageTy = cast<llvm::StructType>(
3874       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3875   ConstantInitBuilder DeviceImagesBuilder(CGM);
3876   ConstantArrayBuilder DeviceImagesEntries =
3877       DeviceImagesBuilder.beginArray(DeviceImageTy);
3878 
3879   for (const llvm::Triple &Device : Devices) {
3880     StringRef T = Device.getTriple();
3881     std::string BeginName = getName({"omp_offloading", "img_start", ""});
3882     auto *ImgBegin = new llvm::GlobalVariable(
3883         M, CGM.Int8Ty, /*isConstant=*/true,
3884         llvm::GlobalValue::ExternalWeakLinkage,
3885         /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3886     std::string EndName = getName({"omp_offloading", "img_end", ""});
3887     auto *ImgEnd = new llvm::GlobalVariable(
3888         M, CGM.Int8Ty, /*isConstant=*/true,
3889         llvm::GlobalValue::ExternalWeakLinkage,
3890         /*Initializer=*/nullptr, Twine(EndName).concat(T));
3891 
3892     llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3893                               HostEntriesEnd};
3894     createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
3895                                              DeviceImagesEntries);
3896   }
3897 
3898   // Create device images global array.
3899   std::string ImagesName = getName({"omp_offloading", "device_images"});
3900   llvm::GlobalVariable *DeviceImages =
3901       DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3902                                                 CGM.getPointerAlign(),
3903                                                 /*isConstant=*/true);
3904   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3905 
3906   // This is a Zero array to be used in the creation of the constant expressions
3907   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3908                              llvm::Constant::getNullValue(CGM.Int32Ty)};
3909 
3910   // Create the target region descriptor.
3911   llvm::Constant *Data[] = {
3912       llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3913       llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3914                                            DeviceImages, Index),
3915       HostEntriesBegin, HostEntriesEnd};
3916   std::string Descriptor = getName({"omp_offloading", "descriptor"});
3917   llvm::GlobalVariable *Desc = createGlobalStruct(
3918       CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3919 
3920   // Emit code to register or unregister the descriptor at execution
3921   // startup or closing, respectively.
3922 
3923   llvm::Function *UnRegFn;
3924   {
3925     FunctionArgList Args;
3926     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3927     Args.push_back(&DummyPtr);
3928 
3929     CodeGenFunction CGF(CGM);
3930     // Disable debug info for global (de-)initializer because they are not part
3931     // of some particular construct.
3932     CGF.disableDebugInfo();
3933     const auto &FI =
3934         CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3935     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3936     std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3937     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
3938     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
3939     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
3940                         Desc);
3941     CGF.FinishFunction();
3942   }
3943   llvm::Function *RegFn;
3944   {
3945     CodeGenFunction CGF(CGM);
3946     // Disable debug info for global (de-)initializer because they are not part
3947     // of some particular construct.
3948     CGF.disableDebugInfo();
3949     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
3950     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3951 
3952     // Encode offload target triples into the registration function name. It
3953     // will serve as a comdat key for the registration/unregistration code for
3954     // this particular combination of offloading targets.
3955     SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
3956     RegFnNameParts[0] = "omp_offloading";
3957     RegFnNameParts[1] = "descriptor_reg";
3958     llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
3959                     [](const llvm::Triple &T) -> const std::string& {
3960                       return T.getTriple();
3961                     });
3962     llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
3963     std::string Descriptor = getName(RegFnNameParts);
3964     RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
3965     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
3966     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
3967     // Create a variable to drive the registration and unregistration of the
3968     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3969     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
3970                                   SourceLocation(), nullptr, C.CharTy,
3971                                   ImplicitParamDecl::Other);
3972     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3973     CGF.FinishFunction();
3974   }
3975   if (CGM.supportsCOMDAT()) {
3976     // It is sufficient to call registration function only once, so create a
3977     // COMDAT group for registration/unregistration functions and associated
3978     // data. That would reduce startup time and code size. Registration
3979     // function serves as a COMDAT group key.
3980     llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
3981     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3982     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3983     RegFn->setComdat(ComdatKey);
3984     UnRegFn->setComdat(ComdatKey);
3985     DeviceImages->setComdat(ComdatKey);
3986     Desc->setComdat(ComdatKey);
3987   }
3988   return RegFn;
3989 }
3990 
3991 void CGOpenMPRuntime::createOffloadEntry(
3992     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3993     llvm::GlobalValue::LinkageTypes Linkage) {
3994   StringRef Name = Addr->getName();
3995   llvm::Module &M = CGM.getModule();
3996   llvm::LLVMContext &C = M.getContext();
3997 
3998   // Create constant string with the name.
3999   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4000 
4001   std::string StringName = getName({"omp_offloading", "entry_name"});
4002   auto *Str = new llvm::GlobalVariable(
4003       M, StrPtrInit->getType(), /*isConstant=*/true,
4004       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4005   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4006 
4007   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4008                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4009                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4010                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4011                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4012   std::string EntryName = getName({"omp_offloading", "entry", ""});
4013   llvm::GlobalVariable *Entry = createGlobalStruct(
4014       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4015       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4016 
4017   // The entry has to be created in the section the linker expects it to be.
4018   std::string Section = getName({"omp_offloading", "entries"});
4019   Entry->setSection(Section);
4020 }
4021 
4022 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4023   // Emit the offloading entries and metadata so that the device codegen side
4024   // can easily figure out what to emit. The produced metadata looks like
4025   // this:
4026   //
4027   // !omp_offload.info = !{!1, ...}
4028   //
4029   // Right now we only generate metadata for function that contain target
4030   // regions.
4031 
4032   // If we do not have entries, we don't need to do anything.
4033   if (OffloadEntriesInfoManager.empty())
4034     return;
4035 
4036   llvm::Module &M = CGM.getModule();
4037   llvm::LLVMContext &C = M.getContext();
4038   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
4039       OrderedEntries(OffloadEntriesInfoManager.size());
4040   llvm::SmallVector<StringRef, 16> ParentFunctions(
4041       OffloadEntriesInfoManager.size());
4042 
4043   // Auxiliary methods to create metadata values and strings.
4044   auto &&GetMDInt = [this](unsigned V) {
4045     return llvm::ConstantAsMetadata::get(
4046         llvm::ConstantInt::get(CGM.Int32Ty, V));
4047   };
4048 
4049   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4050 
4051   // Create the offloading info metadata node.
4052   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4053 
4054   // Create function that emits metadata for each target region entry;
4055   auto &&TargetRegionMetadataEmitter =
4056       [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4057           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4058           unsigned Line,
4059           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4060         // Generate metadata for target regions. Each entry of this metadata
4061         // contains:
4062         // - Entry 0 -> Kind of this type of metadata (0).
4063         // - Entry 1 -> Device ID of the file where the entry was identified.
4064         // - Entry 2 -> File ID of the file where the entry was identified.
4065         // - Entry 3 -> Mangled name of the function where the entry was
4066         // identified.
4067         // - Entry 4 -> Line in the file where the entry was identified.
4068         // - Entry 5 -> Order the entry was created.
4069         // The first element of the metadata node is the kind.
4070         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4071                                  GetMDInt(FileID),      GetMDString(ParentName),
4072                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4073 
4074         // Save this entry in the right position of the ordered entries array.
4075         OrderedEntries[E.getOrder()] = &E;
4076         ParentFunctions[E.getOrder()] = ParentName;
4077 
4078         // Add metadata to the named metadata node.
4079         MD->addOperand(llvm::MDNode::get(C, Ops));
4080       };
4081 
4082   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4083       TargetRegionMetadataEmitter);
4084 
4085   // Create function that emits metadata for each device global variable entry;
4086   auto &&DeviceGlobalVarMetadataEmitter =
4087       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4088        MD](StringRef MangledName,
4089            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4090                &E) {
4091         // Generate metadata for global variables. Each entry of this metadata
4092         // contains:
4093         // - Entry 0 -> Kind of this type of metadata (1).
4094         // - Entry 1 -> Mangled name of the variable.
4095         // - Entry 2 -> Declare target kind.
4096         // - Entry 3 -> Order the entry was created.
4097         // The first element of the metadata node is the kind.
4098         llvm::Metadata *Ops[] = {
4099             GetMDInt(E.getKind()), GetMDString(MangledName),
4100             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4101 
4102         // Save this entry in the right position of the ordered entries array.
4103         OrderedEntries[E.getOrder()] = &E;
4104 
4105         // Add metadata to the named metadata node.
4106         MD->addOperand(llvm::MDNode::get(C, Ops));
4107       };
4108 
4109   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4110       DeviceGlobalVarMetadataEmitter);
4111 
4112   for (const auto *E : OrderedEntries) {
4113     assert(E && "All ordered entries must exist!");
4114     if (const auto *CE =
4115             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4116                 E)) {
4117       if (!CE->getID() || !CE->getAddress()) {
4118         // Do not blame the entry if the parent funtion is not emitted.
4119         StringRef FnName = ParentFunctions[CE->getOrder()];
4120         if (!CGM.GetGlobalValue(FnName))
4121           continue;
4122         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4123             DiagnosticsEngine::Error,
4124             "Offloading entry for target region is incorrect: either the "
4125             "address or the ID is invalid.");
4126         CGM.getDiags().Report(DiagID);
4127         continue;
4128       }
4129       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4130                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4131     } else if (const auto *CE =
4132                    dyn_cast<OffloadEntriesInfoManagerTy::
4133                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
4134       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4135           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4136               CE->getFlags());
4137       switch (Flags) {
4138       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4139         if (!CE->getAddress()) {
4140           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4141               DiagnosticsEngine::Error,
4142               "Offloading entry for declare target variable is incorrect: the "
4143               "address is invalid.");
4144           CGM.getDiags().Report(DiagID);
4145           continue;
4146         }
4147         // The vaiable has no definition - no need to add the entry.
4148         if (CE->getVarSize().isZero())
4149           continue;
4150         break;
4151       }
4152       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4153         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4154                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4155                "Declaret target link address is set.");
4156         if (CGM.getLangOpts().OpenMPIsDevice)
4157           continue;
4158         if (!CE->getAddress()) {
4159           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4160               DiagnosticsEngine::Error,
4161               "Offloading entry for declare target variable is incorrect: the "
4162               "address is invalid.");
4163           CGM.getDiags().Report(DiagID);
4164           continue;
4165         }
4166         break;
4167       }
4168       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4169                          CE->getVarSize().getQuantity(), Flags,
4170                          CE->getLinkage());
4171     } else {
4172       llvm_unreachable("Unsupported entry kind.");
4173     }
4174   }
4175 }
4176 
4177 /// Loads all the offload entries information from the host IR
4178 /// metadata.
4179 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4180   // If we are in target mode, load the metadata from the host IR. This code has
4181   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4182 
4183   if (!CGM.getLangOpts().OpenMPIsDevice)
4184     return;
4185 
4186   if (CGM.getLangOpts().OMPHostIRFile.empty())
4187     return;
4188 
4189   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4190   if (auto EC = Buf.getError()) {
4191     CGM.getDiags().Report(diag::err_cannot_open_file)
4192         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4193     return;
4194   }
4195 
4196   llvm::LLVMContext C;
4197   auto ME = expectedToErrorOrAndEmitErrors(
4198       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4199 
4200   if (auto EC = ME.getError()) {
4201     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4202         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4203     CGM.getDiags().Report(DiagID)
4204         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4205     return;
4206   }
4207 
4208   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4209   if (!MD)
4210     return;
4211 
4212   for (llvm::MDNode *MN : MD->operands()) {
4213     auto &&GetMDInt = [MN](unsigned Idx) {
4214       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4215       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4216     };
4217 
4218     auto &&GetMDString = [MN](unsigned Idx) {
4219       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4220       return V->getString();
4221     };
4222 
4223     switch (GetMDInt(0)) {
4224     default:
4225       llvm_unreachable("Unexpected metadata!");
4226       break;
4227     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4228         OffloadingEntryInfoTargetRegion:
4229       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4230           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4231           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4232           /*Order=*/GetMDInt(5));
4233       break;
4234     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4235         OffloadingEntryInfoDeviceGlobalVar:
4236       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4237           /*MangledName=*/GetMDString(1),
4238           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4239               /*Flags=*/GetMDInt(2)),
4240           /*Order=*/GetMDInt(3));
4241       break;
4242     }
4243   }
4244 }
4245 
4246 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4247   if (!KmpRoutineEntryPtrTy) {
4248     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4249     ASTContext &C = CGM.getContext();
4250     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4251     FunctionProtoType::ExtProtoInfo EPI;
4252     KmpRoutineEntryPtrQTy = C.getPointerType(
4253         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4254     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4255   }
4256 }
4257 
4258 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4259   // Make sure the type of the entry is already created. This is the type we
4260   // have to create:
4261   // struct __tgt_offload_entry{
4262   //   void      *addr;       // Pointer to the offload entry info.
4263   //                          // (function or global)
4264   //   char      *name;       // Name of the function or global.
4265   //   size_t     size;       // Size of the entry info (0 if it a function).
4266   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4267   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4268   // };
4269   if (TgtOffloadEntryQTy.isNull()) {
4270     ASTContext &C = CGM.getContext();
4271     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4272     RD->startDefinition();
4273     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4274     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4275     addFieldToRecordDecl(C, RD, C.getSizeType());
4276     addFieldToRecordDecl(
4277         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4278     addFieldToRecordDecl(
4279         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4280     RD->completeDefinition();
4281     RD->addAttr(PackedAttr::CreateImplicit(C));
4282     TgtOffloadEntryQTy = C.getRecordType(RD);
4283   }
4284   return TgtOffloadEntryQTy;
4285 }
4286 
4287 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4288   // These are the types we need to build:
4289   // struct __tgt_device_image{
4290   // void   *ImageStart;       // Pointer to the target code start.
4291   // void   *ImageEnd;         // Pointer to the target code end.
4292   // // We also add the host entries to the device image, as it may be useful
4293   // // for the target runtime to have access to that information.
4294   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4295   //                                       // the entries.
4296   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4297   //                                       // entries (non inclusive).
4298   // };
4299   if (TgtDeviceImageQTy.isNull()) {
4300     ASTContext &C = CGM.getContext();
4301     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4302     RD->startDefinition();
4303     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4304     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4305     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4306     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4307     RD->completeDefinition();
4308     TgtDeviceImageQTy = C.getRecordType(RD);
4309   }
4310   return TgtDeviceImageQTy;
4311 }
4312 
4313 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4314   // struct __tgt_bin_desc{
4315   //   int32_t              NumDevices;      // Number of devices supported.
4316   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4317   //                                         // (one per device).
4318   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4319   //                                         // entries.
4320   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4321   //                                         // entries (non inclusive).
4322   // };
4323   if (TgtBinaryDescriptorQTy.isNull()) {
4324     ASTContext &C = CGM.getContext();
4325     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4326     RD->startDefinition();
4327     addFieldToRecordDecl(
4328         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4329     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4330     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4331     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4332     RD->completeDefinition();
4333     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4334   }
4335   return TgtBinaryDescriptorQTy;
4336 }
4337 
4338 namespace {
4339 struct PrivateHelpersTy {
4340   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4341                    const VarDecl *PrivateElemInit)
4342       : Original(Original), PrivateCopy(PrivateCopy),
4343         PrivateElemInit(PrivateElemInit) {}
4344   const VarDecl *Original;
4345   const VarDecl *PrivateCopy;
4346   const VarDecl *PrivateElemInit;
4347 };
4348 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4349 } // anonymous namespace
4350 
4351 static RecordDecl *
4352 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4353   if (!Privates.empty()) {
4354     ASTContext &C = CGM.getContext();
4355     // Build struct .kmp_privates_t. {
4356     //         /*  private vars  */
4357     //       };
4358     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4359     RD->startDefinition();
4360     for (const auto &Pair : Privates) {
4361       const VarDecl *VD = Pair.second.Original;
4362       QualType Type = VD->getType().getNonReferenceType();
4363       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4364       if (VD->hasAttrs()) {
4365         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4366              E(VD->getAttrs().end());
4367              I != E; ++I)
4368           FD->addAttr(*I);
4369       }
4370     }
4371     RD->completeDefinition();
4372     return RD;
4373   }
4374   return nullptr;
4375 }
4376 
4377 static RecordDecl *
4378 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4379                          QualType KmpInt32Ty,
4380                          QualType KmpRoutineEntryPointerQTy) {
4381   ASTContext &C = CGM.getContext();
4382   // Build struct kmp_task_t {
4383   //         void *              shareds;
4384   //         kmp_routine_entry_t routine;
4385   //         kmp_int32           part_id;
4386   //         kmp_cmplrdata_t data1;
4387   //         kmp_cmplrdata_t data2;
4388   // For taskloops additional fields:
4389   //         kmp_uint64          lb;
4390   //         kmp_uint64          ub;
4391   //         kmp_int64           st;
4392   //         kmp_int32           liter;
4393   //         void *              reductions;
4394   //       };
4395   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4396   UD->startDefinition();
4397   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4398   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4399   UD->completeDefinition();
4400   QualType KmpCmplrdataTy = C.getRecordType(UD);
4401   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4402   RD->startDefinition();
4403   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4404   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4405   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4406   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4407   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4408   if (isOpenMPTaskLoopDirective(Kind)) {
4409     QualType KmpUInt64Ty =
4410         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4411     QualType KmpInt64Ty =
4412         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4413     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4414     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4415     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4416     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4417     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4418   }
4419   RD->completeDefinition();
4420   return RD;
4421 }
4422 
4423 static RecordDecl *
4424 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4425                                      ArrayRef<PrivateDataTy> Privates) {
4426   ASTContext &C = CGM.getContext();
4427   // Build struct kmp_task_t_with_privates {
4428   //         kmp_task_t task_data;
4429   //         .kmp_privates_t. privates;
4430   //       };
4431   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4432   RD->startDefinition();
4433   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4434   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4435     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4436   RD->completeDefinition();
4437   return RD;
4438 }
4439 
4440 /// Emit a proxy function which accepts kmp_task_t as the second
4441 /// argument.
4442 /// \code
4443 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4444 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4445 ///   For taskloops:
4446 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4447 ///   tt->reductions, tt->shareds);
4448 ///   return 0;
4449 /// }
4450 /// \endcode
4451 static llvm::Function *
4452 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4453                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4454                       QualType KmpTaskTWithPrivatesPtrQTy,
4455                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4456                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4457                       llvm::Value *TaskPrivatesMap) {
4458   ASTContext &C = CGM.getContext();
4459   FunctionArgList Args;
4460   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4461                             ImplicitParamDecl::Other);
4462   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4463                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4464                                 ImplicitParamDecl::Other);
4465   Args.push_back(&GtidArg);
4466   Args.push_back(&TaskTypeArg);
4467   const auto &TaskEntryFnInfo =
4468       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4469   llvm::FunctionType *TaskEntryTy =
4470       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4471   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4472   auto *TaskEntry = llvm::Function::Create(
4473       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4474   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4475   TaskEntry->setDoesNotRecurse();
4476   CodeGenFunction CGF(CGM);
4477   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4478                     Loc, Loc);
4479 
4480   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4481   // tt,
4482   // For taskloops:
4483   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4484   // tt->task_data.shareds);
4485   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4486       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4487   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4488       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4489       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4490   const auto *KmpTaskTWithPrivatesQTyRD =
4491       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4492   LValue Base =
4493       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4494   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4495   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4496   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4497   llvm::Value *PartidParam = PartIdLVal.getPointer();
4498 
4499   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4500   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4501   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4502       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4503       CGF.ConvertTypeForMem(SharedsPtrTy));
4504 
4505   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4506   llvm::Value *PrivatesParam;
4507   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4508     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4509     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4510         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4511   } else {
4512     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4513   }
4514 
4515   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4516                                TaskPrivatesMap,
4517                                CGF.Builder
4518                                    .CreatePointerBitCastOrAddrSpaceCast(
4519                                        TDBase.getAddress(), CGF.VoidPtrTy)
4520                                    .getPointer()};
4521   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4522                                           std::end(CommonArgs));
4523   if (isOpenMPTaskLoopDirective(Kind)) {
4524     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4525     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4526     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4527     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4528     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4529     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4530     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4531     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4532     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4533     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4534     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4535     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4536     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4537     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4538     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4539     CallArgs.push_back(LBParam);
4540     CallArgs.push_back(UBParam);
4541     CallArgs.push_back(StParam);
4542     CallArgs.push_back(LIParam);
4543     CallArgs.push_back(RParam);
4544   }
4545   CallArgs.push_back(SharedsParam);
4546 
4547   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4548                                                   CallArgs);
4549   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4550                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4551   CGF.FinishFunction();
4552   return TaskEntry;
4553 }
4554 
4555 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4556                                             SourceLocation Loc,
4557                                             QualType KmpInt32Ty,
4558                                             QualType KmpTaskTWithPrivatesPtrQTy,
4559                                             QualType KmpTaskTWithPrivatesQTy) {
4560   ASTContext &C = CGM.getContext();
4561   FunctionArgList Args;
4562   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4563                             ImplicitParamDecl::Other);
4564   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4565                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4566                                 ImplicitParamDecl::Other);
4567   Args.push_back(&GtidArg);
4568   Args.push_back(&TaskTypeArg);
4569   const auto &DestructorFnInfo =
4570       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4571   llvm::FunctionType *DestructorFnTy =
4572       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4573   std::string Name =
4574       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4575   auto *DestructorFn =
4576       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4577                              Name, &CGM.getModule());
4578   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4579                                     DestructorFnInfo);
4580   DestructorFn->setDoesNotRecurse();
4581   CodeGenFunction CGF(CGM);
4582   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4583                     Args, Loc, Loc);
4584 
4585   LValue Base = CGF.EmitLoadOfPointerLValue(
4586       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4587       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4588   const auto *KmpTaskTWithPrivatesQTyRD =
4589       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4590   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4591   Base = CGF.EmitLValueForField(Base, *FI);
4592   for (const auto *Field :
4593        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4594     if (QualType::DestructionKind DtorKind =
4595             Field->getType().isDestructedType()) {
4596       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4597       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4598     }
4599   }
4600   CGF.FinishFunction();
4601   return DestructorFn;
4602 }
4603 
4604 /// Emit a privates mapping function for correct handling of private and
4605 /// firstprivate variables.
4606 /// \code
4607 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4608 /// **noalias priv1,...,  <tyn> **noalias privn) {
4609 ///   *priv1 = &.privates.priv1;
4610 ///   ...;
4611 ///   *privn = &.privates.privn;
4612 /// }
4613 /// \endcode
4614 static llvm::Value *
4615 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4616                                ArrayRef<const Expr *> PrivateVars,
4617                                ArrayRef<const Expr *> FirstprivateVars,
4618                                ArrayRef<const Expr *> LastprivateVars,
4619                                QualType PrivatesQTy,
4620                                ArrayRef<PrivateDataTy> Privates) {
4621   ASTContext &C = CGM.getContext();
4622   FunctionArgList Args;
4623   ImplicitParamDecl TaskPrivatesArg(
4624       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4625       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4626       ImplicitParamDecl::Other);
4627   Args.push_back(&TaskPrivatesArg);
4628   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4629   unsigned Counter = 1;
4630   for (const Expr *E : PrivateVars) {
4631     Args.push_back(ImplicitParamDecl::Create(
4632         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4633         C.getPointerType(C.getPointerType(E->getType()))
4634             .withConst()
4635             .withRestrict(),
4636         ImplicitParamDecl::Other));
4637     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4638     PrivateVarsPos[VD] = Counter;
4639     ++Counter;
4640   }
4641   for (const Expr *E : FirstprivateVars) {
4642     Args.push_back(ImplicitParamDecl::Create(
4643         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4644         C.getPointerType(C.getPointerType(E->getType()))
4645             .withConst()
4646             .withRestrict(),
4647         ImplicitParamDecl::Other));
4648     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4649     PrivateVarsPos[VD] = Counter;
4650     ++Counter;
4651   }
4652   for (const Expr *E : LastprivateVars) {
4653     Args.push_back(ImplicitParamDecl::Create(
4654         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4655         C.getPointerType(C.getPointerType(E->getType()))
4656             .withConst()
4657             .withRestrict(),
4658         ImplicitParamDecl::Other));
4659     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4660     PrivateVarsPos[VD] = Counter;
4661     ++Counter;
4662   }
4663   const auto &TaskPrivatesMapFnInfo =
4664       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4665   llvm::FunctionType *TaskPrivatesMapTy =
4666       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4667   std::string Name =
4668       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4669   auto *TaskPrivatesMap = llvm::Function::Create(
4670       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4671       &CGM.getModule());
4672   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4673                                     TaskPrivatesMapFnInfo);
4674   TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4675   TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4676   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4677   CodeGenFunction CGF(CGM);
4678   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4679                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4680 
4681   // *privi = &.privates.privi;
4682   LValue Base = CGF.EmitLoadOfPointerLValue(
4683       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4684       TaskPrivatesArg.getType()->castAs<PointerType>());
4685   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4686   Counter = 0;
4687   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4688     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4689     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4690     LValue RefLVal =
4691         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4692     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4693         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4694     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4695     ++Counter;
4696   }
4697   CGF.FinishFunction();
4698   return TaskPrivatesMap;
4699 }
4700 
4701 static bool stable_sort_comparator(const PrivateDataTy P1,
4702                                    const PrivateDataTy P2) {
4703   return P1.first > P2.first;
4704 }
4705 
4706 /// Emit initialization for private variables in task-based directives.
4707 static void emitPrivatesInit(CodeGenFunction &CGF,
4708                              const OMPExecutableDirective &D,
4709                              Address KmpTaskSharedsPtr, LValue TDBase,
4710                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4711                              QualType SharedsTy, QualType SharedsPtrTy,
4712                              const OMPTaskDataTy &Data,
4713                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4714   ASTContext &C = CGF.getContext();
4715   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4716   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4717   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4718                                  ? OMPD_taskloop
4719                                  : OMPD_task;
4720   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4721   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4722   LValue SrcBase;
4723   bool IsTargetTask =
4724       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4725       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4726   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4727   // PointersArray and SizesArray. The original variables for these arrays are
4728   // not captured and we get their addresses explicitly.
4729   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4730       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4731     SrcBase = CGF.MakeAddrLValue(
4732         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4733             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4734         SharedsTy);
4735   }
4736   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4737   for (const PrivateDataTy &Pair : Privates) {
4738     const VarDecl *VD = Pair.second.PrivateCopy;
4739     const Expr *Init = VD->getAnyInitializer();
4740     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4741                              !CGF.isTrivialInitializer(Init)))) {
4742       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4743       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4744         const VarDecl *OriginalVD = Pair.second.Original;
4745         // Check if the variable is the target-based BasePointersArray,
4746         // PointersArray or SizesArray.
4747         LValue SharedRefLValue;
4748         QualType Type = PrivateLValue.getType();
4749         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4750         if (IsTargetTask && !SharedField) {
4751           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4752                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4753                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4754                          ->getNumParams() == 0 &&
4755                  isa<TranslationUnitDecl>(
4756                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4757                          ->getDeclContext()) &&
4758                  "Expected artificial target data variable.");
4759           SharedRefLValue =
4760               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4761         } else {
4762           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4763           SharedRefLValue = CGF.MakeAddrLValue(
4764               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4765               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4766               SharedRefLValue.getTBAAInfo());
4767         }
4768         if (Type->isArrayType()) {
4769           // Initialize firstprivate array.
4770           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4771             // Perform simple memcpy.
4772             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4773           } else {
4774             // Initialize firstprivate array using element-by-element
4775             // initialization.
4776             CGF.EmitOMPAggregateAssign(
4777                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4778                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4779                                                   Address SrcElement) {
4780                   // Clean up any temporaries needed by the initialization.
4781                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4782                   InitScope.addPrivate(
4783                       Elem, [SrcElement]() -> Address { return SrcElement; });
4784                   (void)InitScope.Privatize();
4785                   // Emit initialization for single element.
4786                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4787                       CGF, &CapturesInfo);
4788                   CGF.EmitAnyExprToMem(Init, DestElement,
4789                                        Init->getType().getQualifiers(),
4790                                        /*IsInitializer=*/false);
4791                 });
4792           }
4793         } else {
4794           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4795           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4796             return SharedRefLValue.getAddress();
4797           });
4798           (void)InitScope.Privatize();
4799           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4800           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4801                              /*capturedByInit=*/false);
4802         }
4803       } else {
4804         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4805       }
4806     }
4807     ++FI;
4808   }
4809 }
4810 
4811 /// Check if duplication function is required for taskloops.
4812 static bool checkInitIsRequired(CodeGenFunction &CGF,
4813                                 ArrayRef<PrivateDataTy> Privates) {
4814   bool InitRequired = false;
4815   for (const PrivateDataTy &Pair : Privates) {
4816     const VarDecl *VD = Pair.second.PrivateCopy;
4817     const Expr *Init = VD->getAnyInitializer();
4818     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4819                                     !CGF.isTrivialInitializer(Init));
4820     if (InitRequired)
4821       break;
4822   }
4823   return InitRequired;
4824 }
4825 
4826 
4827 /// Emit task_dup function (for initialization of
4828 /// private/firstprivate/lastprivate vars and last_iter flag)
4829 /// \code
4830 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4831 /// lastpriv) {
4832 /// // setup lastprivate flag
4833 ///    task_dst->last = lastpriv;
4834 /// // could be constructor calls here...
4835 /// }
4836 /// \endcode
4837 static llvm::Value *
4838 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4839                     const OMPExecutableDirective &D,
4840                     QualType KmpTaskTWithPrivatesPtrQTy,
4841                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4842                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4843                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4844                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4845   ASTContext &C = CGM.getContext();
4846   FunctionArgList Args;
4847   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4848                            KmpTaskTWithPrivatesPtrQTy,
4849                            ImplicitParamDecl::Other);
4850   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4851                            KmpTaskTWithPrivatesPtrQTy,
4852                            ImplicitParamDecl::Other);
4853   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4854                                 ImplicitParamDecl::Other);
4855   Args.push_back(&DstArg);
4856   Args.push_back(&SrcArg);
4857   Args.push_back(&LastprivArg);
4858   const auto &TaskDupFnInfo =
4859       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4860   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4861   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4862   auto *TaskDup = llvm::Function::Create(
4863       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4864   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4865   TaskDup->setDoesNotRecurse();
4866   CodeGenFunction CGF(CGM);
4867   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4868                     Loc);
4869 
4870   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4871       CGF.GetAddrOfLocalVar(&DstArg),
4872       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4873   // task_dst->liter = lastpriv;
4874   if (WithLastIter) {
4875     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4876     LValue Base = CGF.EmitLValueForField(
4877         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4878     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4879     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4880         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4881     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4882   }
4883 
4884   // Emit initial values for private copies (if any).
4885   assert(!Privates.empty());
4886   Address KmpTaskSharedsPtr = Address::invalid();
4887   if (!Data.FirstprivateVars.empty()) {
4888     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4889         CGF.GetAddrOfLocalVar(&SrcArg),
4890         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4891     LValue Base = CGF.EmitLValueForField(
4892         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4893     KmpTaskSharedsPtr = Address(
4894         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4895                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4896                                                   KmpTaskTShareds)),
4897                              Loc),
4898         CGF.getNaturalTypeAlignment(SharedsTy));
4899   }
4900   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4901                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4902   CGF.FinishFunction();
4903   return TaskDup;
4904 }
4905 
4906 /// Checks if destructor function is required to be generated.
4907 /// \return true if cleanups are required, false otherwise.
4908 static bool
4909 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4910   bool NeedsCleanup = false;
4911   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4912   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4913   for (const FieldDecl *FD : PrivateRD->fields()) {
4914     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4915     if (NeedsCleanup)
4916       break;
4917   }
4918   return NeedsCleanup;
4919 }
4920 
4921 CGOpenMPRuntime::TaskResultTy
4922 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4923                               const OMPExecutableDirective &D,
4924                               llvm::Function *TaskFunction, QualType SharedsTy,
4925                               Address Shareds, const OMPTaskDataTy &Data) {
4926   ASTContext &C = CGM.getContext();
4927   llvm::SmallVector<PrivateDataTy, 4> Privates;
4928   // Aggregate privates and sort them by the alignment.
4929   auto I = Data.PrivateCopies.begin();
4930   for (const Expr *E : Data.PrivateVars) {
4931     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4932     Privates.emplace_back(
4933         C.getDeclAlign(VD),
4934         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4935                          /*PrivateElemInit=*/nullptr));
4936     ++I;
4937   }
4938   I = Data.FirstprivateCopies.begin();
4939   auto IElemInitRef = Data.FirstprivateInits.begin();
4940   for (const Expr *E : Data.FirstprivateVars) {
4941     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4942     Privates.emplace_back(
4943         C.getDeclAlign(VD),
4944         PrivateHelpersTy(
4945             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4946             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4947     ++I;
4948     ++IElemInitRef;
4949   }
4950   I = Data.LastprivateCopies.begin();
4951   for (const Expr *E : Data.LastprivateVars) {
4952     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4953     Privates.emplace_back(
4954         C.getDeclAlign(VD),
4955         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4956                          /*PrivateElemInit=*/nullptr));
4957     ++I;
4958   }
4959   std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator);
4960   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4961   // Build type kmp_routine_entry_t (if not built yet).
4962   emitKmpRoutineEntryT(KmpInt32Ty);
4963   // Build type kmp_task_t (if not built yet).
4964   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4965     if (SavedKmpTaskloopTQTy.isNull()) {
4966       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4967           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4968     }
4969     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4970   } else {
4971     assert((D.getDirectiveKind() == OMPD_task ||
4972             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4973             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4974            "Expected taskloop, task or target directive");
4975     if (SavedKmpTaskTQTy.isNull()) {
4976       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4977           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4978     }
4979     KmpTaskTQTy = SavedKmpTaskTQTy;
4980   }
4981   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4982   // Build particular struct kmp_task_t for the given task.
4983   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4984       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4985   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4986   QualType KmpTaskTWithPrivatesPtrQTy =
4987       C.getPointerType(KmpTaskTWithPrivatesQTy);
4988   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4989   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4990       KmpTaskTWithPrivatesTy->getPointerTo();
4991   llvm::Value *KmpTaskTWithPrivatesTySize =
4992       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4993   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4994 
4995   // Emit initial values for private copies (if any).
4996   llvm::Value *TaskPrivatesMap = nullptr;
4997   llvm::Type *TaskPrivatesMapTy =
4998       std::next(TaskFunction->arg_begin(), 3)->getType();
4999   if (!Privates.empty()) {
5000     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5001     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5002         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5003         FI->getType(), Privates);
5004     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5005         TaskPrivatesMap, TaskPrivatesMapTy);
5006   } else {
5007     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5008         cast<llvm::PointerType>(TaskPrivatesMapTy));
5009   }
5010   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5011   // kmp_task_t *tt);
5012   llvm::Function *TaskEntry = emitProxyTaskFunction(
5013       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5014       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5015       TaskPrivatesMap);
5016 
5017   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5018   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5019   // kmp_routine_entry_t *task_entry);
5020   // Task flags. Format is taken from
5021   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5022   // description of kmp_tasking_flags struct.
5023   enum {
5024     TiedFlag = 0x1,
5025     FinalFlag = 0x2,
5026     DestructorsFlag = 0x8,
5027     PriorityFlag = 0x20
5028   };
5029   unsigned Flags = Data.Tied ? TiedFlag : 0;
5030   bool NeedsCleanup = false;
5031   if (!Privates.empty()) {
5032     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5033     if (NeedsCleanup)
5034       Flags = Flags | DestructorsFlag;
5035   }
5036   if (Data.Priority.getInt())
5037     Flags = Flags | PriorityFlag;
5038   llvm::Value *TaskFlags =
5039       Data.Final.getPointer()
5040           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5041                                      CGF.Builder.getInt32(FinalFlag),
5042                                      CGF.Builder.getInt32(/*C=*/0))
5043           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5044   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5045   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5046   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
5047                               getThreadID(CGF, Loc), TaskFlags,
5048                               KmpTaskTWithPrivatesTySize, SharedsSize,
5049                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5050                                   TaskEntry, KmpRoutineEntryPtrTy)};
5051   llvm::Value *NewTask = CGF.EmitRuntimeCall(
5052       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5053   llvm::Value *NewTaskNewTaskTTy =
5054       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5055           NewTask, KmpTaskTWithPrivatesPtrTy);
5056   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5057                                                KmpTaskTWithPrivatesQTy);
5058   LValue TDBase =
5059       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5060   // Fill the data in the resulting kmp_task_t record.
5061   // Copy shareds if there are any.
5062   Address KmpTaskSharedsPtr = Address::invalid();
5063   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5064     KmpTaskSharedsPtr =
5065         Address(CGF.EmitLoadOfScalar(
5066                     CGF.EmitLValueForField(
5067                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5068                                            KmpTaskTShareds)),
5069                     Loc),
5070                 CGF.getNaturalTypeAlignment(SharedsTy));
5071     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5072     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5073     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5074   }
5075   // Emit initial values for private copies (if any).
5076   TaskResultTy Result;
5077   if (!Privates.empty()) {
5078     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5079                      SharedsTy, SharedsPtrTy, Data, Privates,
5080                      /*ForDup=*/false);
5081     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5082         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5083       Result.TaskDupFn = emitTaskDupFunction(
5084           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5085           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5086           /*WithLastIter=*/!Data.LastprivateVars.empty());
5087     }
5088   }
5089   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5090   enum { Priority = 0, Destructors = 1 };
5091   // Provide pointer to function with destructors for privates.
5092   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5093   const RecordDecl *KmpCmplrdataUD =
5094       (*FI)->getType()->getAsUnionType()->getDecl();
5095   if (NeedsCleanup) {
5096     llvm::Value *DestructorFn = emitDestructorsFunction(
5097         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5098         KmpTaskTWithPrivatesQTy);
5099     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5100     LValue DestructorsLV = CGF.EmitLValueForField(
5101         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5102     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5103                               DestructorFn, KmpRoutineEntryPtrTy),
5104                           DestructorsLV);
5105   }
5106   // Set priority.
5107   if (Data.Priority.getInt()) {
5108     LValue Data2LV = CGF.EmitLValueForField(
5109         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5110     LValue PriorityLV = CGF.EmitLValueForField(
5111         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5112     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5113   }
5114   Result.NewTask = NewTask;
5115   Result.TaskEntry = TaskEntry;
5116   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5117   Result.TDBase = TDBase;
5118   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5119   return Result;
5120 }
5121 
5122 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5123                                    const OMPExecutableDirective &D,
5124                                    llvm::Function *TaskFunction,
5125                                    QualType SharedsTy, Address Shareds,
5126                                    const Expr *IfCond,
5127                                    const OMPTaskDataTy &Data) {
5128   if (!CGF.HaveInsertPoint())
5129     return;
5130 
5131   TaskResultTy Result =
5132       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5133   llvm::Value *NewTask = Result.NewTask;
5134   llvm::Function *TaskEntry = Result.TaskEntry;
5135   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5136   LValue TDBase = Result.TDBase;
5137   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5138   ASTContext &C = CGM.getContext();
5139   // Process list of dependences.
5140   Address DependenciesArray = Address::invalid();
5141   unsigned NumDependencies = Data.Dependences.size();
5142   if (NumDependencies) {
5143     // Dependence kind for RTL.
5144     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5145     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5146     RecordDecl *KmpDependInfoRD;
5147     QualType FlagsTy =
5148         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5149     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5150     if (KmpDependInfoTy.isNull()) {
5151       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5152       KmpDependInfoRD->startDefinition();
5153       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5154       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5155       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5156       KmpDependInfoRD->completeDefinition();
5157       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5158     } else {
5159       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5160     }
5161     // Define type kmp_depend_info[<Dependences.size()>];
5162     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5163         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5164         ArrayType::Normal, /*IndexTypeQuals=*/0);
5165     // kmp_depend_info[<Dependences.size()>] deps;
5166     DependenciesArray =
5167         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5168     for (unsigned I = 0; I < NumDependencies; ++I) {
5169       const Expr *E = Data.Dependences[I].second;
5170       LValue Addr = CGF.EmitLValue(E);
5171       llvm::Value *Size;
5172       QualType Ty = E->getType();
5173       if (const auto *ASE =
5174               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5175         LValue UpAddrLVal =
5176             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
5177         llvm::Value *UpAddr =
5178             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5179         llvm::Value *LowIntPtr =
5180             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5181         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5182         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5183       } else {
5184         Size = CGF.getTypeSize(Ty);
5185       }
5186       LValue Base = CGF.MakeAddrLValue(
5187           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5188           KmpDependInfoTy);
5189       // deps[i].base_addr = &<Dependences[i].second>;
5190       LValue BaseAddrLVal = CGF.EmitLValueForField(
5191           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5192       CGF.EmitStoreOfScalar(
5193           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5194           BaseAddrLVal);
5195       // deps[i].len = sizeof(<Dependences[i].second>);
5196       LValue LenLVal = CGF.EmitLValueForField(
5197           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5198       CGF.EmitStoreOfScalar(Size, LenLVal);
5199       // deps[i].flags = <Dependences[i].first>;
5200       RTLDependenceKindTy DepKind;
5201       switch (Data.Dependences[I].first) {
5202       case OMPC_DEPEND_in:
5203         DepKind = DepIn;
5204         break;
5205       // Out and InOut dependencies must use the same code.
5206       case OMPC_DEPEND_out:
5207       case OMPC_DEPEND_inout:
5208         DepKind = DepInOut;
5209         break;
5210       case OMPC_DEPEND_mutexinoutset:
5211         DepKind = DepMutexInOutSet;
5212         break;
5213       case OMPC_DEPEND_source:
5214       case OMPC_DEPEND_sink:
5215       case OMPC_DEPEND_unknown:
5216         llvm_unreachable("Unknown task dependence type");
5217       }
5218       LValue FlagsLVal = CGF.EmitLValueForField(
5219           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5220       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5221                             FlagsLVal);
5222     }
5223     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5224         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5225   }
5226 
5227   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5228   // libcall.
5229   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5230   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5231   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5232   // list is not empty
5233   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5234   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5235   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5236   llvm::Value *DepTaskArgs[7];
5237   if (NumDependencies) {
5238     DepTaskArgs[0] = UpLoc;
5239     DepTaskArgs[1] = ThreadID;
5240     DepTaskArgs[2] = NewTask;
5241     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5242     DepTaskArgs[4] = DependenciesArray.getPointer();
5243     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5244     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5245   }
5246   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5247                         &TaskArgs,
5248                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5249     if (!Data.Tied) {
5250       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5251       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5252       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5253     }
5254     if (NumDependencies) {
5255       CGF.EmitRuntimeCall(
5256           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5257     } else {
5258       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5259                           TaskArgs);
5260     }
5261     // Check if parent region is untied and build return for untied task;
5262     if (auto *Region =
5263             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5264       Region->emitUntiedSwitch(CGF);
5265   };
5266 
5267   llvm::Value *DepWaitTaskArgs[6];
5268   if (NumDependencies) {
5269     DepWaitTaskArgs[0] = UpLoc;
5270     DepWaitTaskArgs[1] = ThreadID;
5271     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5272     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5273     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5274     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5275   }
5276   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5277                         NumDependencies, &DepWaitTaskArgs,
5278                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5279     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5280     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5281     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5282     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5283     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5284     // is specified.
5285     if (NumDependencies)
5286       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5287                           DepWaitTaskArgs);
5288     // Call proxy_task_entry(gtid, new_task);
5289     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5290                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5291       Action.Enter(CGF);
5292       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5293       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5294                                                           OutlinedFnArgs);
5295     };
5296 
5297     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5298     // kmp_task_t *new_task);
5299     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5300     // kmp_task_t *new_task);
5301     RegionCodeGenTy RCG(CodeGen);
5302     CommonActionTy Action(
5303         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5304         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5305     RCG.setAction(Action);
5306     RCG(CGF);
5307   };
5308 
5309   if (IfCond) {
5310     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5311   } else {
5312     RegionCodeGenTy ThenRCG(ThenCodeGen);
5313     ThenRCG(CGF);
5314   }
5315 }
5316 
5317 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5318                                        const OMPLoopDirective &D,
5319                                        llvm::Function *TaskFunction,
5320                                        QualType SharedsTy, Address Shareds,
5321                                        const Expr *IfCond,
5322                                        const OMPTaskDataTy &Data) {
5323   if (!CGF.HaveInsertPoint())
5324     return;
5325   TaskResultTy Result =
5326       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5327   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5328   // libcall.
5329   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5330   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5331   // sched, kmp_uint64 grainsize, void *task_dup);
5332   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5333   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5334   llvm::Value *IfVal;
5335   if (IfCond) {
5336     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5337                                       /*isSigned=*/true);
5338   } else {
5339     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5340   }
5341 
5342   LValue LBLVal = CGF.EmitLValueForField(
5343       Result.TDBase,
5344       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5345   const auto *LBVar =
5346       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5347   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5348                        /*IsInitializer=*/true);
5349   LValue UBLVal = CGF.EmitLValueForField(
5350       Result.TDBase,
5351       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5352   const auto *UBVar =
5353       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5354   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5355                        /*IsInitializer=*/true);
5356   LValue StLVal = CGF.EmitLValueForField(
5357       Result.TDBase,
5358       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5359   const auto *StVar =
5360       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5361   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5362                        /*IsInitializer=*/true);
5363   // Store reductions address.
5364   LValue RedLVal = CGF.EmitLValueForField(
5365       Result.TDBase,
5366       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5367   if (Data.Reductions) {
5368     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5369   } else {
5370     CGF.EmitNullInitialization(RedLVal.getAddress(),
5371                                CGF.getContext().VoidPtrTy);
5372   }
5373   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5374   llvm::Value *TaskArgs[] = {
5375       UpLoc,
5376       ThreadID,
5377       Result.NewTask,
5378       IfVal,
5379       LBLVal.getPointer(),
5380       UBLVal.getPointer(),
5381       CGF.EmitLoadOfScalar(StLVal, Loc),
5382       llvm::ConstantInt::getSigned(
5383               CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5384       llvm::ConstantInt::getSigned(
5385           CGF.IntTy, Data.Schedule.getPointer()
5386                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5387                          : NoSchedule),
5388       Data.Schedule.getPointer()
5389           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5390                                       /*isSigned=*/false)
5391           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5392       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5393                              Result.TaskDupFn, CGF.VoidPtrTy)
5394                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5395   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5396 }
5397 
5398 /// Emit reduction operation for each element of array (required for
5399 /// array sections) LHS op = RHS.
5400 /// \param Type Type of array.
5401 /// \param LHSVar Variable on the left side of the reduction operation
5402 /// (references element of array in original variable).
5403 /// \param RHSVar Variable on the right side of the reduction operation
5404 /// (references element of array in original variable).
5405 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5406 /// RHSVar.
5407 static void EmitOMPAggregateReduction(
5408     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5409     const VarDecl *RHSVar,
5410     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5411                                   const Expr *, const Expr *)> &RedOpGen,
5412     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5413     const Expr *UpExpr = nullptr) {
5414   // Perform element-by-element initialization.
5415   QualType ElementTy;
5416   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5417   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5418 
5419   // Drill down to the base element type on both arrays.
5420   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5421   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5422 
5423   llvm::Value *RHSBegin = RHSAddr.getPointer();
5424   llvm::Value *LHSBegin = LHSAddr.getPointer();
5425   // Cast from pointer to array type to pointer to single element.
5426   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5427   // The basic structure here is a while-do loop.
5428   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5429   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5430   llvm::Value *IsEmpty =
5431       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5432   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5433 
5434   // Enter the loop body, making that address the current address.
5435   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5436   CGF.EmitBlock(BodyBB);
5437 
5438   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5439 
5440   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5441       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5442   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5443   Address RHSElementCurrent =
5444       Address(RHSElementPHI,
5445               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5446 
5447   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5448       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5449   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5450   Address LHSElementCurrent =
5451       Address(LHSElementPHI,
5452               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5453 
5454   // Emit copy.
5455   CodeGenFunction::OMPPrivateScope Scope(CGF);
5456   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5457   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5458   Scope.Privatize();
5459   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5460   Scope.ForceCleanup();
5461 
5462   // Shift the address forward by one element.
5463   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5464       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5465   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5466       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5467   // Check whether we've reached the end.
5468   llvm::Value *Done =
5469       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5470   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5471   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5472   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5473 
5474   // Done.
5475   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5476 }
5477 
5478 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5479 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5480 /// UDR combiner function.
5481 static void emitReductionCombiner(CodeGenFunction &CGF,
5482                                   const Expr *ReductionOp) {
5483   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5484     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5485       if (const auto *DRE =
5486               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5487         if (const auto *DRD =
5488                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5489           std::pair<llvm::Function *, llvm::Function *> Reduction =
5490               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5491           RValue Func = RValue::get(Reduction.first);
5492           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5493           CGF.EmitIgnoredExpr(ReductionOp);
5494           return;
5495         }
5496   CGF.EmitIgnoredExpr(ReductionOp);
5497 }
5498 
5499 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5500     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5501     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5502     ArrayRef<const Expr *> ReductionOps) {
5503   ASTContext &C = CGM.getContext();
5504 
5505   // void reduction_func(void *LHSArg, void *RHSArg);
5506   FunctionArgList Args;
5507   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5508                            ImplicitParamDecl::Other);
5509   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5510                            ImplicitParamDecl::Other);
5511   Args.push_back(&LHSArg);
5512   Args.push_back(&RHSArg);
5513   const auto &CGFI =
5514       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5515   std::string Name = getName({"omp", "reduction", "reduction_func"});
5516   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5517                                     llvm::GlobalValue::InternalLinkage, Name,
5518                                     &CGM.getModule());
5519   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5520   Fn->setDoesNotRecurse();
5521   CodeGenFunction CGF(CGM);
5522   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5523 
5524   // Dst = (void*[n])(LHSArg);
5525   // Src = (void*[n])(RHSArg);
5526   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5527       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5528       ArgsType), CGF.getPointerAlign());
5529   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5530       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5531       ArgsType), CGF.getPointerAlign());
5532 
5533   //  ...
5534   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5535   //  ...
5536   CodeGenFunction::OMPPrivateScope Scope(CGF);
5537   auto IPriv = Privates.begin();
5538   unsigned Idx = 0;
5539   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5540     const auto *RHSVar =
5541         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5542     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5543       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5544     });
5545     const auto *LHSVar =
5546         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5547     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5548       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5549     });
5550     QualType PrivTy = (*IPriv)->getType();
5551     if (PrivTy->isVariablyModifiedType()) {
5552       // Get array size and emit VLA type.
5553       ++Idx;
5554       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5555       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5556       const VariableArrayType *VLA =
5557           CGF.getContext().getAsVariableArrayType(PrivTy);
5558       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5559       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5560           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5561       CGF.EmitVariablyModifiedType(PrivTy);
5562     }
5563   }
5564   Scope.Privatize();
5565   IPriv = Privates.begin();
5566   auto ILHS = LHSExprs.begin();
5567   auto IRHS = RHSExprs.begin();
5568   for (const Expr *E : ReductionOps) {
5569     if ((*IPriv)->getType()->isArrayType()) {
5570       // Emit reduction for array section.
5571       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5572       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5573       EmitOMPAggregateReduction(
5574           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5575           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5576             emitReductionCombiner(CGF, E);
5577           });
5578     } else {
5579       // Emit reduction for array subscript or single variable.
5580       emitReductionCombiner(CGF, E);
5581     }
5582     ++IPriv;
5583     ++ILHS;
5584     ++IRHS;
5585   }
5586   Scope.ForceCleanup();
5587   CGF.FinishFunction();
5588   return Fn;
5589 }
5590 
5591 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5592                                                   const Expr *ReductionOp,
5593                                                   const Expr *PrivateRef,
5594                                                   const DeclRefExpr *LHS,
5595                                                   const DeclRefExpr *RHS) {
5596   if (PrivateRef->getType()->isArrayType()) {
5597     // Emit reduction for array section.
5598     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5599     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5600     EmitOMPAggregateReduction(
5601         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5602         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5603           emitReductionCombiner(CGF, ReductionOp);
5604         });
5605   } else {
5606     // Emit reduction for array subscript or single variable.
5607     emitReductionCombiner(CGF, ReductionOp);
5608   }
5609 }
5610 
5611 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5612                                     ArrayRef<const Expr *> Privates,
5613                                     ArrayRef<const Expr *> LHSExprs,
5614                                     ArrayRef<const Expr *> RHSExprs,
5615                                     ArrayRef<const Expr *> ReductionOps,
5616                                     ReductionOptionsTy Options) {
5617   if (!CGF.HaveInsertPoint())
5618     return;
5619 
5620   bool WithNowait = Options.WithNowait;
5621   bool SimpleReduction = Options.SimpleReduction;
5622 
5623   // Next code should be emitted for reduction:
5624   //
5625   // static kmp_critical_name lock = { 0 };
5626   //
5627   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5628   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5629   //  ...
5630   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5631   //  *(Type<n>-1*)rhs[<n>-1]);
5632   // }
5633   //
5634   // ...
5635   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5636   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5637   // RedList, reduce_func, &<lock>)) {
5638   // case 1:
5639   //  ...
5640   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5641   //  ...
5642   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5643   // break;
5644   // case 2:
5645   //  ...
5646   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5647   //  ...
5648   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5649   // break;
5650   // default:;
5651   // }
5652   //
5653   // if SimpleReduction is true, only the next code is generated:
5654   //  ...
5655   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5656   //  ...
5657 
5658   ASTContext &C = CGM.getContext();
5659 
5660   if (SimpleReduction) {
5661     CodeGenFunction::RunCleanupsScope Scope(CGF);
5662     auto IPriv = Privates.begin();
5663     auto ILHS = LHSExprs.begin();
5664     auto IRHS = RHSExprs.begin();
5665     for (const Expr *E : ReductionOps) {
5666       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5667                                   cast<DeclRefExpr>(*IRHS));
5668       ++IPriv;
5669       ++ILHS;
5670       ++IRHS;
5671     }
5672     return;
5673   }
5674 
5675   // 1. Build a list of reduction variables.
5676   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5677   auto Size = RHSExprs.size();
5678   for (const Expr *E : Privates) {
5679     if (E->getType()->isVariablyModifiedType())
5680       // Reserve place for array size.
5681       ++Size;
5682   }
5683   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5684   QualType ReductionArrayTy =
5685       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5686                              /*IndexTypeQuals=*/0);
5687   Address ReductionList =
5688       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5689   auto IPriv = Privates.begin();
5690   unsigned Idx = 0;
5691   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5692     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5693     CGF.Builder.CreateStore(
5694         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5695             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5696         Elem);
5697     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5698       // Store array size.
5699       ++Idx;
5700       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5701       llvm::Value *Size = CGF.Builder.CreateIntCast(
5702           CGF.getVLASize(
5703                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5704               .NumElts,
5705           CGF.SizeTy, /*isSigned=*/false);
5706       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5707                               Elem);
5708     }
5709   }
5710 
5711   // 2. Emit reduce_func().
5712   llvm::Function *ReductionFn = emitReductionFunction(
5713       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5714       LHSExprs, RHSExprs, ReductionOps);
5715 
5716   // 3. Create static kmp_critical_name lock = { 0 };
5717   std::string Name = getName({"reduction"});
5718   llvm::Value *Lock = getCriticalRegionLock(Name);
5719 
5720   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5721   // RedList, reduce_func, &<lock>);
5722   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5723   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5724   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5725   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5726       ReductionList.getPointer(), CGF.VoidPtrTy);
5727   llvm::Value *Args[] = {
5728       IdentTLoc,                             // ident_t *<loc>
5729       ThreadId,                              // i32 <gtid>
5730       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5731       ReductionArrayTySize,                  // size_type sizeof(RedList)
5732       RL,                                    // void *RedList
5733       ReductionFn, // void (*) (void *, void *) <reduce_func>
5734       Lock         // kmp_critical_name *&<lock>
5735   };
5736   llvm::Value *Res = CGF.EmitRuntimeCall(
5737       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5738                                        : OMPRTL__kmpc_reduce),
5739       Args);
5740 
5741   // 5. Build switch(res)
5742   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5743   llvm::SwitchInst *SwInst =
5744       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5745 
5746   // 6. Build case 1:
5747   //  ...
5748   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5749   //  ...
5750   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5751   // break;
5752   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5753   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5754   CGF.EmitBlock(Case1BB);
5755 
5756   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5757   llvm::Value *EndArgs[] = {
5758       IdentTLoc, // ident_t *<loc>
5759       ThreadId,  // i32 <gtid>
5760       Lock       // kmp_critical_name *&<lock>
5761   };
5762   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5763                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5764     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5765     auto IPriv = Privates.begin();
5766     auto ILHS = LHSExprs.begin();
5767     auto IRHS = RHSExprs.begin();
5768     for (const Expr *E : ReductionOps) {
5769       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5770                                      cast<DeclRefExpr>(*IRHS));
5771       ++IPriv;
5772       ++ILHS;
5773       ++IRHS;
5774     }
5775   };
5776   RegionCodeGenTy RCG(CodeGen);
5777   CommonActionTy Action(
5778       nullptr, llvm::None,
5779       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5780                                        : OMPRTL__kmpc_end_reduce),
5781       EndArgs);
5782   RCG.setAction(Action);
5783   RCG(CGF);
5784 
5785   CGF.EmitBranch(DefaultBB);
5786 
5787   // 7. Build case 2:
5788   //  ...
5789   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5790   //  ...
5791   // break;
5792   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5793   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5794   CGF.EmitBlock(Case2BB);
5795 
5796   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5797                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5798     auto ILHS = LHSExprs.begin();
5799     auto IRHS = RHSExprs.begin();
5800     auto IPriv = Privates.begin();
5801     for (const Expr *E : ReductionOps) {
5802       const Expr *XExpr = nullptr;
5803       const Expr *EExpr = nullptr;
5804       const Expr *UpExpr = nullptr;
5805       BinaryOperatorKind BO = BO_Comma;
5806       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5807         if (BO->getOpcode() == BO_Assign) {
5808           XExpr = BO->getLHS();
5809           UpExpr = BO->getRHS();
5810         }
5811       }
5812       // Try to emit update expression as a simple atomic.
5813       const Expr *RHSExpr = UpExpr;
5814       if (RHSExpr) {
5815         // Analyze RHS part of the whole expression.
5816         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5817                 RHSExpr->IgnoreParenImpCasts())) {
5818           // If this is a conditional operator, analyze its condition for
5819           // min/max reduction operator.
5820           RHSExpr = ACO->getCond();
5821         }
5822         if (const auto *BORHS =
5823                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5824           EExpr = BORHS->getRHS();
5825           BO = BORHS->getOpcode();
5826         }
5827       }
5828       if (XExpr) {
5829         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5830         auto &&AtomicRedGen = [BO, VD,
5831                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5832                                     const Expr *EExpr, const Expr *UpExpr) {
5833           LValue X = CGF.EmitLValue(XExpr);
5834           RValue E;
5835           if (EExpr)
5836             E = CGF.EmitAnyExpr(EExpr);
5837           CGF.EmitOMPAtomicSimpleUpdateExpr(
5838               X, E, BO, /*IsXLHSInRHSPart=*/true,
5839               llvm::AtomicOrdering::Monotonic, Loc,
5840               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5841                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5842                 PrivateScope.addPrivate(
5843                     VD, [&CGF, VD, XRValue, Loc]() {
5844                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5845                       CGF.emitOMPSimpleStore(
5846                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5847                           VD->getType().getNonReferenceType(), Loc);
5848                       return LHSTemp;
5849                     });
5850                 (void)PrivateScope.Privatize();
5851                 return CGF.EmitAnyExpr(UpExpr);
5852               });
5853         };
5854         if ((*IPriv)->getType()->isArrayType()) {
5855           // Emit atomic reduction for array section.
5856           const auto *RHSVar =
5857               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5858           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5859                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5860         } else {
5861           // Emit atomic reduction for array subscript or single variable.
5862           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5863         }
5864       } else {
5865         // Emit as a critical region.
5866         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5867                                            const Expr *, const Expr *) {
5868           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5869           std::string Name = RT.getName({"atomic_reduction"});
5870           RT.emitCriticalRegion(
5871               CGF, Name,
5872               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5873                 Action.Enter(CGF);
5874                 emitReductionCombiner(CGF, E);
5875               },
5876               Loc);
5877         };
5878         if ((*IPriv)->getType()->isArrayType()) {
5879           const auto *LHSVar =
5880               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5881           const auto *RHSVar =
5882               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5883           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5884                                     CritRedGen);
5885         } else {
5886           CritRedGen(CGF, nullptr, nullptr, nullptr);
5887         }
5888       }
5889       ++ILHS;
5890       ++IRHS;
5891       ++IPriv;
5892     }
5893   };
5894   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5895   if (!WithNowait) {
5896     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5897     llvm::Value *EndArgs[] = {
5898         IdentTLoc, // ident_t *<loc>
5899         ThreadId,  // i32 <gtid>
5900         Lock       // kmp_critical_name *&<lock>
5901     };
5902     CommonActionTy Action(nullptr, llvm::None,
5903                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5904                           EndArgs);
5905     AtomicRCG.setAction(Action);
5906     AtomicRCG(CGF);
5907   } else {
5908     AtomicRCG(CGF);
5909   }
5910 
5911   CGF.EmitBranch(DefaultBB);
5912   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5913 }
5914 
5915 /// Generates unique name for artificial threadprivate variables.
5916 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5917 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5918                                       const Expr *Ref) {
5919   SmallString<256> Buffer;
5920   llvm::raw_svector_ostream Out(Buffer);
5921   const clang::DeclRefExpr *DE;
5922   const VarDecl *D = ::getBaseDecl(Ref, DE);
5923   if (!D)
5924     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5925   D = D->getCanonicalDecl();
5926   std::string Name = CGM.getOpenMPRuntime().getName(
5927       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5928   Out << Prefix << Name << "_"
5929       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5930   return Out.str();
5931 }
5932 
5933 /// Emits reduction initializer function:
5934 /// \code
5935 /// void @.red_init(void* %arg) {
5936 /// %0 = bitcast void* %arg to <type>*
5937 /// store <type> <init>, <type>* %0
5938 /// ret void
5939 /// }
5940 /// \endcode
5941 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5942                                            SourceLocation Loc,
5943                                            ReductionCodeGen &RCG, unsigned N) {
5944   ASTContext &C = CGM.getContext();
5945   FunctionArgList Args;
5946   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5947                           ImplicitParamDecl::Other);
5948   Args.emplace_back(&Param);
5949   const auto &FnInfo =
5950       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5951   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5952   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5953   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5954                                     Name, &CGM.getModule());
5955   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5956   Fn->setDoesNotRecurse();
5957   CodeGenFunction CGF(CGM);
5958   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5959   Address PrivateAddr = CGF.EmitLoadOfPointer(
5960       CGF.GetAddrOfLocalVar(&Param),
5961       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5962   llvm::Value *Size = nullptr;
5963   // If the size of the reduction item is non-constant, load it from global
5964   // threadprivate variable.
5965   if (RCG.getSizes(N).second) {
5966     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5967         CGF, CGM.getContext().getSizeType(),
5968         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5969     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5970                                 CGM.getContext().getSizeType(), Loc);
5971   }
5972   RCG.emitAggregateType(CGF, N, Size);
5973   LValue SharedLVal;
5974   // If initializer uses initializer from declare reduction construct, emit a
5975   // pointer to the address of the original reduction item (reuired by reduction
5976   // initializer)
5977   if (RCG.usesReductionInitializer(N)) {
5978     Address SharedAddr =
5979         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5980             CGF, CGM.getContext().VoidPtrTy,
5981             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
5982     SharedAddr = CGF.EmitLoadOfPointer(
5983         SharedAddr,
5984         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5985     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5986   } else {
5987     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
5988         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5989         CGM.getContext().VoidPtrTy);
5990   }
5991   // Emit the initializer:
5992   // %0 = bitcast void* %arg to <type>*
5993   // store <type> <init>, <type>* %0
5994   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
5995                          [](CodeGenFunction &) { return false; });
5996   CGF.FinishFunction();
5997   return Fn;
5998 }
5999 
6000 /// Emits reduction combiner function:
6001 /// \code
6002 /// void @.red_comb(void* %arg0, void* %arg1) {
6003 /// %lhs = bitcast void* %arg0 to <type>*
6004 /// %rhs = bitcast void* %arg1 to <type>*
6005 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6006 /// store <type> %2, <type>* %lhs
6007 /// ret void
6008 /// }
6009 /// \endcode
6010 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6011                                            SourceLocation Loc,
6012                                            ReductionCodeGen &RCG, unsigned N,
6013                                            const Expr *ReductionOp,
6014                                            const Expr *LHS, const Expr *RHS,
6015                                            const Expr *PrivateRef) {
6016   ASTContext &C = CGM.getContext();
6017   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6018   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6019   FunctionArgList Args;
6020   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6021                                C.VoidPtrTy, ImplicitParamDecl::Other);
6022   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6023                             ImplicitParamDecl::Other);
6024   Args.emplace_back(&ParamInOut);
6025   Args.emplace_back(&ParamIn);
6026   const auto &FnInfo =
6027       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6028   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6029   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6030   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6031                                     Name, &CGM.getModule());
6032   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6033   Fn->setDoesNotRecurse();
6034   CodeGenFunction CGF(CGM);
6035   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6036   llvm::Value *Size = nullptr;
6037   // If the size of the reduction item is non-constant, load it from global
6038   // threadprivate variable.
6039   if (RCG.getSizes(N).second) {
6040     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6041         CGF, CGM.getContext().getSizeType(),
6042         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6043     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6044                                 CGM.getContext().getSizeType(), Loc);
6045   }
6046   RCG.emitAggregateType(CGF, N, Size);
6047   // Remap lhs and rhs variables to the addresses of the function arguments.
6048   // %lhs = bitcast void* %arg0 to <type>*
6049   // %rhs = bitcast void* %arg1 to <type>*
6050   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6051   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6052     // Pull out the pointer to the variable.
6053     Address PtrAddr = CGF.EmitLoadOfPointer(
6054         CGF.GetAddrOfLocalVar(&ParamInOut),
6055         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6056     return CGF.Builder.CreateElementBitCast(
6057         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6058   });
6059   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6060     // Pull out the pointer to the variable.
6061     Address PtrAddr = CGF.EmitLoadOfPointer(
6062         CGF.GetAddrOfLocalVar(&ParamIn),
6063         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6064     return CGF.Builder.CreateElementBitCast(
6065         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6066   });
6067   PrivateScope.Privatize();
6068   // Emit the combiner body:
6069   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6070   // store <type> %2, <type>* %lhs
6071   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6072       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6073       cast<DeclRefExpr>(RHS));
6074   CGF.FinishFunction();
6075   return Fn;
6076 }
6077 
6078 /// Emits reduction finalizer function:
6079 /// \code
6080 /// void @.red_fini(void* %arg) {
6081 /// %0 = bitcast void* %arg to <type>*
6082 /// <destroy>(<type>* %0)
6083 /// ret void
6084 /// }
6085 /// \endcode
6086 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6087                                            SourceLocation Loc,
6088                                            ReductionCodeGen &RCG, unsigned N) {
6089   if (!RCG.needCleanups(N))
6090     return nullptr;
6091   ASTContext &C = CGM.getContext();
6092   FunctionArgList Args;
6093   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6094                           ImplicitParamDecl::Other);
6095   Args.emplace_back(&Param);
6096   const auto &FnInfo =
6097       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6098   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6099   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6100   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6101                                     Name, &CGM.getModule());
6102   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6103   Fn->setDoesNotRecurse();
6104   CodeGenFunction CGF(CGM);
6105   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6106   Address PrivateAddr = CGF.EmitLoadOfPointer(
6107       CGF.GetAddrOfLocalVar(&Param),
6108       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6109   llvm::Value *Size = nullptr;
6110   // If the size of the reduction item is non-constant, load it from global
6111   // threadprivate variable.
6112   if (RCG.getSizes(N).second) {
6113     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6114         CGF, CGM.getContext().getSizeType(),
6115         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6116     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6117                                 CGM.getContext().getSizeType(), Loc);
6118   }
6119   RCG.emitAggregateType(CGF, N, Size);
6120   // Emit the finalizer body:
6121   // <destroy>(<type>* %0)
6122   RCG.emitCleanups(CGF, N, PrivateAddr);
6123   CGF.FinishFunction();
6124   return Fn;
6125 }
6126 
6127 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6128     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6129     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6130   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6131     return nullptr;
6132 
6133   // Build typedef struct:
6134   // kmp_task_red_input {
6135   //   void *reduce_shar; // shared reduction item
6136   //   size_t reduce_size; // size of data item
6137   //   void *reduce_init; // data initialization routine
6138   //   void *reduce_fini; // data finalization routine
6139   //   void *reduce_comb; // data combiner routine
6140   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6141   // } kmp_task_red_input_t;
6142   ASTContext &C = CGM.getContext();
6143   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6144   RD->startDefinition();
6145   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6146   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6147   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6148   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6149   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6150   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6151       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6152   RD->completeDefinition();
6153   QualType RDType = C.getRecordType(RD);
6154   unsigned Size = Data.ReductionVars.size();
6155   llvm::APInt ArraySize(/*numBits=*/64, Size);
6156   QualType ArrayRDType = C.getConstantArrayType(
6157       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6158   // kmp_task_red_input_t .rd_input.[Size];
6159   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6160   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6161                        Data.ReductionOps);
6162   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6163     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6164     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6165                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6166     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6167         TaskRedInput.getPointer(), Idxs,
6168         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6169         ".rd_input.gep.");
6170     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6171     // ElemLVal.reduce_shar = &Shareds[Cnt];
6172     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6173     RCG.emitSharedLValue(CGF, Cnt);
6174     llvm::Value *CastedShared =
6175         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6176     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6177     RCG.emitAggregateType(CGF, Cnt);
6178     llvm::Value *SizeValInChars;
6179     llvm::Value *SizeVal;
6180     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6181     // We use delayed creation/initialization for VLAs, array sections and
6182     // custom reduction initializations. It is required because runtime does not
6183     // provide the way to pass the sizes of VLAs/array sections to
6184     // initializer/combiner/finalizer functions and does not pass the pointer to
6185     // original reduction item to the initializer. Instead threadprivate global
6186     // variables are used to store these values and use them in the functions.
6187     bool DelayedCreation = !!SizeVal;
6188     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6189                                                /*isSigned=*/false);
6190     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6191     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6192     // ElemLVal.reduce_init = init;
6193     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6194     llvm::Value *InitAddr =
6195         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6196     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6197     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6198     // ElemLVal.reduce_fini = fini;
6199     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6200     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6201     llvm::Value *FiniAddr = Fini
6202                                 ? CGF.EmitCastToVoidPtr(Fini)
6203                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6204     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6205     // ElemLVal.reduce_comb = comb;
6206     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6207     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6208         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6209         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6210     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6211     // ElemLVal.flags = 0;
6212     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6213     if (DelayedCreation) {
6214       CGF.EmitStoreOfScalar(
6215           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
6216           FlagsLVal);
6217     } else
6218       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6219   }
6220   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6221   // *data);
6222   llvm::Value *Args[] = {
6223       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6224                                 /*isSigned=*/true),
6225       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6226       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6227                                                       CGM.VoidPtrTy)};
6228   return CGF.EmitRuntimeCall(
6229       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6230 }
6231 
6232 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6233                                               SourceLocation Loc,
6234                                               ReductionCodeGen &RCG,
6235                                               unsigned N) {
6236   auto Sizes = RCG.getSizes(N);
6237   // Emit threadprivate global variable if the type is non-constant
6238   // (Sizes.second = nullptr).
6239   if (Sizes.second) {
6240     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6241                                                      /*isSigned=*/false);
6242     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6243         CGF, CGM.getContext().getSizeType(),
6244         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6245     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6246   }
6247   // Store address of the original reduction item if custom initializer is used.
6248   if (RCG.usesReductionInitializer(N)) {
6249     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6250         CGF, CGM.getContext().VoidPtrTy,
6251         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6252     CGF.Builder.CreateStore(
6253         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6254             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6255         SharedAddr, /*IsVolatile=*/false);
6256   }
6257 }
6258 
6259 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6260                                               SourceLocation Loc,
6261                                               llvm::Value *ReductionsPtr,
6262                                               LValue SharedLVal) {
6263   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6264   // *d);
6265   llvm::Value *Args[] = {
6266       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6267                                 /*isSigned=*/true),
6268       ReductionsPtr,
6269       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6270                                                       CGM.VoidPtrTy)};
6271   return Address(
6272       CGF.EmitRuntimeCall(
6273           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6274       SharedLVal.getAlignment());
6275 }
6276 
6277 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6278                                        SourceLocation Loc) {
6279   if (!CGF.HaveInsertPoint())
6280     return;
6281   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6282   // global_tid);
6283   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6284   // Ignore return result until untied tasks are supported.
6285   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6286   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6287     Region->emitUntiedSwitch(CGF);
6288 }
6289 
6290 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6291                                            OpenMPDirectiveKind InnerKind,
6292                                            const RegionCodeGenTy &CodeGen,
6293                                            bool HasCancel) {
6294   if (!CGF.HaveInsertPoint())
6295     return;
6296   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6297   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6298 }
6299 
6300 namespace {
6301 enum RTCancelKind {
6302   CancelNoreq = 0,
6303   CancelParallel = 1,
6304   CancelLoop = 2,
6305   CancelSections = 3,
6306   CancelTaskgroup = 4
6307 };
6308 } // anonymous namespace
6309 
6310 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6311   RTCancelKind CancelKind = CancelNoreq;
6312   if (CancelRegion == OMPD_parallel)
6313     CancelKind = CancelParallel;
6314   else if (CancelRegion == OMPD_for)
6315     CancelKind = CancelLoop;
6316   else if (CancelRegion == OMPD_sections)
6317     CancelKind = CancelSections;
6318   else {
6319     assert(CancelRegion == OMPD_taskgroup);
6320     CancelKind = CancelTaskgroup;
6321   }
6322   return CancelKind;
6323 }
6324 
6325 void CGOpenMPRuntime::emitCancellationPointCall(
6326     CodeGenFunction &CGF, SourceLocation Loc,
6327     OpenMPDirectiveKind CancelRegion) {
6328   if (!CGF.HaveInsertPoint())
6329     return;
6330   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6331   // global_tid, kmp_int32 cncl_kind);
6332   if (auto *OMPRegionInfo =
6333           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6334     // For 'cancellation point taskgroup', the task region info may not have a
6335     // cancel. This may instead happen in another adjacent task.
6336     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6337       llvm::Value *Args[] = {
6338           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6339           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6340       // Ignore return result until untied tasks are supported.
6341       llvm::Value *Result = CGF.EmitRuntimeCall(
6342           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6343       // if (__kmpc_cancellationpoint()) {
6344       //   exit from construct;
6345       // }
6346       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6347       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6348       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6349       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6350       CGF.EmitBlock(ExitBB);
6351       // exit from construct;
6352       CodeGenFunction::JumpDest CancelDest =
6353           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6354       CGF.EmitBranchThroughCleanup(CancelDest);
6355       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6356     }
6357   }
6358 }
6359 
6360 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6361                                      const Expr *IfCond,
6362                                      OpenMPDirectiveKind CancelRegion) {
6363   if (!CGF.HaveInsertPoint())
6364     return;
6365   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6366   // kmp_int32 cncl_kind);
6367   if (auto *OMPRegionInfo =
6368           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6369     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6370                                                         PrePostActionTy &) {
6371       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6372       llvm::Value *Args[] = {
6373           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6374           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6375       // Ignore return result until untied tasks are supported.
6376       llvm::Value *Result = CGF.EmitRuntimeCall(
6377           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6378       // if (__kmpc_cancel()) {
6379       //   exit from construct;
6380       // }
6381       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6382       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6383       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6384       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6385       CGF.EmitBlock(ExitBB);
6386       // exit from construct;
6387       CodeGenFunction::JumpDest CancelDest =
6388           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6389       CGF.EmitBranchThroughCleanup(CancelDest);
6390       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6391     };
6392     if (IfCond) {
6393       emitOMPIfClause(CGF, IfCond, ThenGen,
6394                       [](CodeGenFunction &, PrePostActionTy &) {});
6395     } else {
6396       RegionCodeGenTy ThenRCG(ThenGen);
6397       ThenRCG(CGF);
6398     }
6399   }
6400 }
6401 
6402 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6403     const OMPExecutableDirective &D, StringRef ParentName,
6404     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6405     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6406   assert(!ParentName.empty() && "Invalid target region parent name!");
6407   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6408                                    IsOffloadEntry, CodeGen);
6409 }
6410 
6411 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6412     const OMPExecutableDirective &D, StringRef ParentName,
6413     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6414     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6415   // Create a unique name for the entry function using the source location
6416   // information of the current target region. The name will be something like:
6417   //
6418   // __omp_offloading_DD_FFFF_PP_lBB
6419   //
6420   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6421   // mangled name of the function that encloses the target region and BB is the
6422   // line number of the target region.
6423 
6424   unsigned DeviceID;
6425   unsigned FileID;
6426   unsigned Line;
6427   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6428                            Line);
6429   SmallString<64> EntryFnName;
6430   {
6431     llvm::raw_svector_ostream OS(EntryFnName);
6432     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6433        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6434   }
6435 
6436   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6437 
6438   CodeGenFunction CGF(CGM, true);
6439   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6440   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6441 
6442   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6443 
6444   // If this target outline function is not an offload entry, we don't need to
6445   // register it.
6446   if (!IsOffloadEntry)
6447     return;
6448 
6449   // The target region ID is used by the runtime library to identify the current
6450   // target region, so it only has to be unique and not necessarily point to
6451   // anything. It could be the pointer to the outlined function that implements
6452   // the target region, but we aren't using that so that the compiler doesn't
6453   // need to keep that, and could therefore inline the host function if proven
6454   // worthwhile during optimization. In the other hand, if emitting code for the
6455   // device, the ID has to be the function address so that it can retrieved from
6456   // the offloading entry and launched by the runtime library. We also mark the
6457   // outlined function to have external linkage in case we are emitting code for
6458   // the device, because these functions will be entry points to the device.
6459 
6460   if (CGM.getLangOpts().OpenMPIsDevice) {
6461     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6462     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6463     OutlinedFn->setDSOLocal(false);
6464   } else {
6465     std::string Name = getName({EntryFnName, "region_id"});
6466     OutlinedFnID = new llvm::GlobalVariable(
6467         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6468         llvm::GlobalValue::WeakAnyLinkage,
6469         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6470   }
6471 
6472   // Register the information for the entry associated with this target region.
6473   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6474       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6475       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6476 }
6477 
6478 /// Checks if the expression is constant or does not have non-trivial function
6479 /// calls.
6480 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6481   // We can skip constant expressions.
6482   // We can skip expressions with trivial calls or simple expressions.
6483   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6484           !E->hasNonTrivialCall(Ctx)) &&
6485          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6486 }
6487 
6488 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6489                                                     const Stmt *Body) {
6490   const Stmt *Child = Body->IgnoreContainers();
6491   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6492     Child = nullptr;
6493     for (const Stmt *S : C->body()) {
6494       if (const auto *E = dyn_cast<Expr>(S)) {
6495         if (isTrivial(Ctx, E))
6496           continue;
6497       }
6498       // Some of the statements can be ignored.
6499       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6500           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6501         continue;
6502       // Analyze declarations.
6503       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6504         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6505               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6506                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6507                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6508                   isa<UsingDirectiveDecl>(D) ||
6509                   isa<OMPDeclareReductionDecl>(D) ||
6510                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6511                 return true;
6512               const auto *VD = dyn_cast<VarDecl>(D);
6513               if (!VD)
6514                 return false;
6515               return VD->isConstexpr() ||
6516                      ((VD->getType().isTrivialType(Ctx) ||
6517                        VD->getType()->isReferenceType()) &&
6518                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6519             }))
6520           continue;
6521       }
6522       // Found multiple children - cannot get the one child only.
6523       if (Child)
6524         return nullptr;
6525       Child = S;
6526     }
6527     if (Child)
6528       Child = Child->IgnoreContainers();
6529   }
6530   return Child;
6531 }
6532 
6533 /// Emit the number of teams for a target directive.  Inspect the num_teams
6534 /// clause associated with a teams construct combined or closely nested
6535 /// with the target directive.
6536 ///
6537 /// Emit a team of size one for directives such as 'target parallel' that
6538 /// have no associated teams construct.
6539 ///
6540 /// Otherwise, return nullptr.
6541 static llvm::Value *
6542 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6543                                const OMPExecutableDirective &D) {
6544   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6545          "Clauses associated with the teams directive expected to be emitted "
6546          "only for the host!");
6547   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6548   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6549          "Expected target-based executable directive.");
6550   CGBuilderTy &Bld = CGF.Builder;
6551   switch (DirectiveKind) {
6552   case OMPD_target: {
6553     const auto *CS = D.getInnermostCapturedStmt();
6554     const auto *Body =
6555         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6556     const Stmt *ChildStmt =
6557         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6558     if (const auto *NestedDir =
6559             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6560       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6561         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6562           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6563           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6564           const Expr *NumTeams =
6565               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6566           llvm::Value *NumTeamsVal =
6567               CGF.EmitScalarExpr(NumTeams,
6568                                  /*IgnoreResultAssign*/ true);
6569           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6570                                    /*IsSigned=*/true);
6571         }
6572         return Bld.getInt32(0);
6573       }
6574       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6575           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6576         return Bld.getInt32(1);
6577       return Bld.getInt32(0);
6578     }
6579     return nullptr;
6580   }
6581   case OMPD_target_teams:
6582   case OMPD_target_teams_distribute:
6583   case OMPD_target_teams_distribute_simd:
6584   case OMPD_target_teams_distribute_parallel_for:
6585   case OMPD_target_teams_distribute_parallel_for_simd: {
6586     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6587       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6588       const Expr *NumTeams =
6589           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6590       llvm::Value *NumTeamsVal =
6591           CGF.EmitScalarExpr(NumTeams,
6592                              /*IgnoreResultAssign*/ true);
6593       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6594                                /*IsSigned=*/true);
6595     }
6596     return Bld.getInt32(0);
6597   }
6598   case OMPD_target_parallel:
6599   case OMPD_target_parallel_for:
6600   case OMPD_target_parallel_for_simd:
6601   case OMPD_target_simd:
6602     return Bld.getInt32(1);
6603   case OMPD_parallel:
6604   case OMPD_for:
6605   case OMPD_parallel_for:
6606   case OMPD_parallel_sections:
6607   case OMPD_for_simd:
6608   case OMPD_parallel_for_simd:
6609   case OMPD_cancel:
6610   case OMPD_cancellation_point:
6611   case OMPD_ordered:
6612   case OMPD_threadprivate:
6613   case OMPD_allocate:
6614   case OMPD_task:
6615   case OMPD_simd:
6616   case OMPD_sections:
6617   case OMPD_section:
6618   case OMPD_single:
6619   case OMPD_master:
6620   case OMPD_critical:
6621   case OMPD_taskyield:
6622   case OMPD_barrier:
6623   case OMPD_taskwait:
6624   case OMPD_taskgroup:
6625   case OMPD_atomic:
6626   case OMPD_flush:
6627   case OMPD_teams:
6628   case OMPD_target_data:
6629   case OMPD_target_exit_data:
6630   case OMPD_target_enter_data:
6631   case OMPD_distribute:
6632   case OMPD_distribute_simd:
6633   case OMPD_distribute_parallel_for:
6634   case OMPD_distribute_parallel_for_simd:
6635   case OMPD_teams_distribute:
6636   case OMPD_teams_distribute_simd:
6637   case OMPD_teams_distribute_parallel_for:
6638   case OMPD_teams_distribute_parallel_for_simd:
6639   case OMPD_target_update:
6640   case OMPD_declare_simd:
6641   case OMPD_declare_target:
6642   case OMPD_end_declare_target:
6643   case OMPD_declare_reduction:
6644   case OMPD_declare_mapper:
6645   case OMPD_taskloop:
6646   case OMPD_taskloop_simd:
6647   case OMPD_requires:
6648   case OMPD_unknown:
6649     break;
6650   }
6651   llvm_unreachable("Unexpected directive kind.");
6652 }
6653 
6654 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6655                                   llvm::Value *DefaultThreadLimitVal) {
6656   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6657       CGF.getContext(), CS->getCapturedStmt());
6658   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6659     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6660       llvm::Value *NumThreads = nullptr;
6661       llvm::Value *CondVal = nullptr;
6662       // Handle if clause. If if clause present, the number of threads is
6663       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6664       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6665         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6666         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6667         const OMPIfClause *IfClause = nullptr;
6668         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6669           if (C->getNameModifier() == OMPD_unknown ||
6670               C->getNameModifier() == OMPD_parallel) {
6671             IfClause = C;
6672             break;
6673           }
6674         }
6675         if (IfClause) {
6676           const Expr *Cond = IfClause->getCondition();
6677           bool Result;
6678           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6679             if (!Result)
6680               return CGF.Builder.getInt32(1);
6681           } else {
6682             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6683             if (const auto *PreInit =
6684                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6685               for (const auto *I : PreInit->decls()) {
6686                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6687                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6688                 } else {
6689                   CodeGenFunction::AutoVarEmission Emission =
6690                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6691                   CGF.EmitAutoVarCleanups(Emission);
6692                 }
6693               }
6694             }
6695             CondVal = CGF.EvaluateExprAsBool(Cond);
6696           }
6697         }
6698       }
6699       // Check the value of num_threads clause iff if clause was not specified
6700       // or is not evaluated to false.
6701       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6702         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6703         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6704         const auto *NumThreadsClause =
6705             Dir->getSingleClause<OMPNumThreadsClause>();
6706         CodeGenFunction::LexicalScope Scope(
6707             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6708         if (const auto *PreInit =
6709                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6710           for (const auto *I : PreInit->decls()) {
6711             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6712               CGF.EmitVarDecl(cast<VarDecl>(*I));
6713             } else {
6714               CodeGenFunction::AutoVarEmission Emission =
6715                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6716               CGF.EmitAutoVarCleanups(Emission);
6717             }
6718           }
6719         }
6720         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6721         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6722                                                /*IsSigned=*/false);
6723         if (DefaultThreadLimitVal)
6724           NumThreads = CGF.Builder.CreateSelect(
6725               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6726               DefaultThreadLimitVal, NumThreads);
6727       } else {
6728         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6729                                            : CGF.Builder.getInt32(0);
6730       }
6731       // Process condition of the if clause.
6732       if (CondVal) {
6733         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6734                                               CGF.Builder.getInt32(1));
6735       }
6736       return NumThreads;
6737     }
6738     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6739       return CGF.Builder.getInt32(1);
6740     return DefaultThreadLimitVal;
6741   }
6742   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6743                                : CGF.Builder.getInt32(0);
6744 }
6745 
6746 /// Emit the number of threads for a target directive.  Inspect the
6747 /// thread_limit clause associated with a teams construct combined or closely
6748 /// nested with the target directive.
6749 ///
6750 /// Emit the num_threads clause for directives such as 'target parallel' that
6751 /// have no associated teams construct.
6752 ///
6753 /// Otherwise, return nullptr.
6754 static llvm::Value *
6755 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6756                                  const OMPExecutableDirective &D) {
6757   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6758          "Clauses associated with the teams directive expected to be emitted "
6759          "only for the host!");
6760   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6761   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6762          "Expected target-based executable directive.");
6763   CGBuilderTy &Bld = CGF.Builder;
6764   llvm::Value *ThreadLimitVal = nullptr;
6765   llvm::Value *NumThreadsVal = nullptr;
6766   switch (DirectiveKind) {
6767   case OMPD_target: {
6768     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6769     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6770       return NumThreads;
6771     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6772         CGF.getContext(), CS->getCapturedStmt());
6773     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6774       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6775         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6776         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6777         const auto *ThreadLimitClause =
6778             Dir->getSingleClause<OMPThreadLimitClause>();
6779         CodeGenFunction::LexicalScope Scope(
6780             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6781         if (const auto *PreInit =
6782                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6783           for (const auto *I : PreInit->decls()) {
6784             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6785               CGF.EmitVarDecl(cast<VarDecl>(*I));
6786             } else {
6787               CodeGenFunction::AutoVarEmission Emission =
6788                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6789               CGF.EmitAutoVarCleanups(Emission);
6790             }
6791           }
6792         }
6793         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6794             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6795         ThreadLimitVal =
6796             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6797       }
6798       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6799           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6800         CS = Dir->getInnermostCapturedStmt();
6801         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6802             CGF.getContext(), CS->getCapturedStmt());
6803         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6804       }
6805       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6806           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6807         CS = Dir->getInnermostCapturedStmt();
6808         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6809           return NumThreads;
6810       }
6811       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6812         return Bld.getInt32(1);
6813     }
6814     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6815   }
6816   case OMPD_target_teams: {
6817     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6818       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6819       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6820       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6821           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6822       ThreadLimitVal =
6823           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6824     }
6825     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6826     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6827       return NumThreads;
6828     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6829         CGF.getContext(), CS->getCapturedStmt());
6830     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6831       if (Dir->getDirectiveKind() == OMPD_distribute) {
6832         CS = Dir->getInnermostCapturedStmt();
6833         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6834           return NumThreads;
6835       }
6836     }
6837     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6838   }
6839   case OMPD_target_teams_distribute:
6840     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6841       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6842       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6843       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6844           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6845       ThreadLimitVal =
6846           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6847     }
6848     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6849   case OMPD_target_parallel:
6850   case OMPD_target_parallel_for:
6851   case OMPD_target_parallel_for_simd:
6852   case OMPD_target_teams_distribute_parallel_for:
6853   case OMPD_target_teams_distribute_parallel_for_simd: {
6854     llvm::Value *CondVal = nullptr;
6855     // Handle if clause. If if clause present, the number of threads is
6856     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6857     if (D.hasClausesOfKind<OMPIfClause>()) {
6858       const OMPIfClause *IfClause = nullptr;
6859       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6860         if (C->getNameModifier() == OMPD_unknown ||
6861             C->getNameModifier() == OMPD_parallel) {
6862           IfClause = C;
6863           break;
6864         }
6865       }
6866       if (IfClause) {
6867         const Expr *Cond = IfClause->getCondition();
6868         bool Result;
6869         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6870           if (!Result)
6871             return Bld.getInt32(1);
6872         } else {
6873           CodeGenFunction::RunCleanupsScope Scope(CGF);
6874           CondVal = CGF.EvaluateExprAsBool(Cond);
6875         }
6876       }
6877     }
6878     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6879       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6880       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6881       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6882           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6883       ThreadLimitVal =
6884           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6885     }
6886     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6887       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6888       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6889       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6890           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6891       NumThreadsVal =
6892           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/false);
6893       ThreadLimitVal = ThreadLimitVal
6894                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6895                                                                 ThreadLimitVal),
6896                                               NumThreadsVal, ThreadLimitVal)
6897                            : NumThreadsVal;
6898     }
6899     if (!ThreadLimitVal)
6900       ThreadLimitVal = Bld.getInt32(0);
6901     if (CondVal)
6902       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6903     return ThreadLimitVal;
6904   }
6905   case OMPD_target_teams_distribute_simd:
6906   case OMPD_target_simd:
6907     return Bld.getInt32(1);
6908   case OMPD_parallel:
6909   case OMPD_for:
6910   case OMPD_parallel_for:
6911   case OMPD_parallel_sections:
6912   case OMPD_for_simd:
6913   case OMPD_parallel_for_simd:
6914   case OMPD_cancel:
6915   case OMPD_cancellation_point:
6916   case OMPD_ordered:
6917   case OMPD_threadprivate:
6918   case OMPD_allocate:
6919   case OMPD_task:
6920   case OMPD_simd:
6921   case OMPD_sections:
6922   case OMPD_section:
6923   case OMPD_single:
6924   case OMPD_master:
6925   case OMPD_critical:
6926   case OMPD_taskyield:
6927   case OMPD_barrier:
6928   case OMPD_taskwait:
6929   case OMPD_taskgroup:
6930   case OMPD_atomic:
6931   case OMPD_flush:
6932   case OMPD_teams:
6933   case OMPD_target_data:
6934   case OMPD_target_exit_data:
6935   case OMPD_target_enter_data:
6936   case OMPD_distribute:
6937   case OMPD_distribute_simd:
6938   case OMPD_distribute_parallel_for:
6939   case OMPD_distribute_parallel_for_simd:
6940   case OMPD_teams_distribute:
6941   case OMPD_teams_distribute_simd:
6942   case OMPD_teams_distribute_parallel_for:
6943   case OMPD_teams_distribute_parallel_for_simd:
6944   case OMPD_target_update:
6945   case OMPD_declare_simd:
6946   case OMPD_declare_target:
6947   case OMPD_end_declare_target:
6948   case OMPD_declare_reduction:
6949   case OMPD_declare_mapper:
6950   case OMPD_taskloop:
6951   case OMPD_taskloop_simd:
6952   case OMPD_requires:
6953   case OMPD_unknown:
6954     break;
6955   }
6956   llvm_unreachable("Unsupported directive kind.");
6957 }
6958 
6959 namespace {
6960 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6961 
6962 // Utility to handle information from clauses associated with a given
6963 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6964 // It provides a convenient interface to obtain the information and generate
6965 // code for that information.
6966 class MappableExprsHandler {
6967 public:
6968   /// Values for bit flags used to specify the mapping type for
6969   /// offloading.
6970   enum OpenMPOffloadMappingFlags : uint64_t {
6971     /// No flags
6972     OMP_MAP_NONE = 0x0,
6973     /// Allocate memory on the device and move data from host to device.
6974     OMP_MAP_TO = 0x01,
6975     /// Allocate memory on the device and move data from device to host.
6976     OMP_MAP_FROM = 0x02,
6977     /// Always perform the requested mapping action on the element, even
6978     /// if it was already mapped before.
6979     OMP_MAP_ALWAYS = 0x04,
6980     /// Delete the element from the device environment, ignoring the
6981     /// current reference count associated with the element.
6982     OMP_MAP_DELETE = 0x08,
6983     /// The element being mapped is a pointer-pointee pair; both the
6984     /// pointer and the pointee should be mapped.
6985     OMP_MAP_PTR_AND_OBJ = 0x10,
6986     /// This flags signals that the base address of an entry should be
6987     /// passed to the target kernel as an argument.
6988     OMP_MAP_TARGET_PARAM = 0x20,
6989     /// Signal that the runtime library has to return the device pointer
6990     /// in the current position for the data being mapped. Used when we have the
6991     /// use_device_ptr clause.
6992     OMP_MAP_RETURN_PARAM = 0x40,
6993     /// This flag signals that the reference being passed is a pointer to
6994     /// private data.
6995     OMP_MAP_PRIVATE = 0x80,
6996     /// Pass the element to the device by value.
6997     OMP_MAP_LITERAL = 0x100,
6998     /// Implicit map
6999     OMP_MAP_IMPLICIT = 0x200,
7000     /// The 16 MSBs of the flags indicate whether the entry is member of some
7001     /// struct/class.
7002     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7003     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7004   };
7005 
7006   /// Class that associates information with a base pointer to be passed to the
7007   /// runtime library.
7008   class BasePointerInfo {
7009     /// The base pointer.
7010     llvm::Value *Ptr = nullptr;
7011     /// The base declaration that refers to this device pointer, or null if
7012     /// there is none.
7013     const ValueDecl *DevPtrDecl = nullptr;
7014 
7015   public:
7016     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7017         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7018     llvm::Value *operator*() const { return Ptr; }
7019     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7020     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7021   };
7022 
7023   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7024   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7025   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7026 
7027   /// Map between a struct and the its lowest & highest elements which have been
7028   /// mapped.
7029   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7030   ///                    HE(FieldIndex, Pointer)}
7031   struct StructRangeInfoTy {
7032     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7033         0, Address::invalid()};
7034     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7035         0, Address::invalid()};
7036     Address Base = Address::invalid();
7037   };
7038 
7039 private:
7040   /// Kind that defines how a device pointer has to be returned.
7041   struct MapInfo {
7042     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7043     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7044     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7045     bool ReturnDevicePointer = false;
7046     bool IsImplicit = false;
7047 
7048     MapInfo() = default;
7049     MapInfo(
7050         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7051         OpenMPMapClauseKind MapType,
7052         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7053         bool ReturnDevicePointer, bool IsImplicit)
7054         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7055           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7056   };
7057 
7058   /// If use_device_ptr is used on a pointer which is a struct member and there
7059   /// is no map information about it, then emission of that entry is deferred
7060   /// until the whole struct has been processed.
7061   struct DeferredDevicePtrEntryTy {
7062     const Expr *IE = nullptr;
7063     const ValueDecl *VD = nullptr;
7064 
7065     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7066         : IE(IE), VD(VD) {}
7067   };
7068 
7069   /// Directive from where the map clauses were extracted.
7070   const OMPExecutableDirective &CurDir;
7071 
7072   /// Function the directive is being generated for.
7073   CodeGenFunction &CGF;
7074 
7075   /// Set of all first private variables in the current directive.
7076   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
7077 
7078   /// Map between device pointer declarations and their expression components.
7079   /// The key value for declarations in 'this' is null.
7080   llvm::DenseMap<
7081       const ValueDecl *,
7082       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7083       DevPointersMap;
7084 
7085   llvm::Value *getExprTypeSize(const Expr *E) const {
7086     QualType ExprTy = E->getType().getCanonicalType();
7087 
7088     // Reference types are ignored for mapping purposes.
7089     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7090       ExprTy = RefTy->getPointeeType().getCanonicalType();
7091 
7092     // Given that an array section is considered a built-in type, we need to
7093     // do the calculation based on the length of the section instead of relying
7094     // on CGF.getTypeSize(E->getType()).
7095     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7096       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7097                             OAE->getBase()->IgnoreParenImpCasts())
7098                             .getCanonicalType();
7099 
7100       // If there is no length associated with the expression, that means we
7101       // are using the whole length of the base.
7102       if (!OAE->getLength() && OAE->getColonLoc().isValid())
7103         return CGF.getTypeSize(BaseTy);
7104 
7105       llvm::Value *ElemSize;
7106       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7107         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7108       } else {
7109         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7110         assert(ATy && "Expecting array type if not a pointer type.");
7111         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7112       }
7113 
7114       // If we don't have a length at this point, that is because we have an
7115       // array section with a single element.
7116       if (!OAE->getLength())
7117         return ElemSize;
7118 
7119       llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
7120       LengthVal =
7121           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
7122       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7123     }
7124     return CGF.getTypeSize(ExprTy);
7125   }
7126 
7127   /// Return the corresponding bits for a given map clause modifier. Add
7128   /// a flag marking the map as a pointer if requested. Add a flag marking the
7129   /// map as the first one of a series of maps that relate to the same map
7130   /// expression.
7131   OpenMPOffloadMappingFlags getMapTypeBits(
7132       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7133       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7134     OpenMPOffloadMappingFlags Bits =
7135         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7136     switch (MapType) {
7137     case OMPC_MAP_alloc:
7138     case OMPC_MAP_release:
7139       // alloc and release is the default behavior in the runtime library,  i.e.
7140       // if we don't pass any bits alloc/release that is what the runtime is
7141       // going to do. Therefore, we don't need to signal anything for these two
7142       // type modifiers.
7143       break;
7144     case OMPC_MAP_to:
7145       Bits |= OMP_MAP_TO;
7146       break;
7147     case OMPC_MAP_from:
7148       Bits |= OMP_MAP_FROM;
7149       break;
7150     case OMPC_MAP_tofrom:
7151       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7152       break;
7153     case OMPC_MAP_delete:
7154       Bits |= OMP_MAP_DELETE;
7155       break;
7156     case OMPC_MAP_unknown:
7157       llvm_unreachable("Unexpected map type!");
7158     }
7159     if (AddPtrFlag)
7160       Bits |= OMP_MAP_PTR_AND_OBJ;
7161     if (AddIsTargetParamFlag)
7162       Bits |= OMP_MAP_TARGET_PARAM;
7163     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7164         != MapModifiers.end())
7165       Bits |= OMP_MAP_ALWAYS;
7166     return Bits;
7167   }
7168 
7169   /// Return true if the provided expression is a final array section. A
7170   /// final array section, is one whose length can't be proved to be one.
7171   bool isFinalArraySectionExpression(const Expr *E) const {
7172     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7173 
7174     // It is not an array section and therefore not a unity-size one.
7175     if (!OASE)
7176       return false;
7177 
7178     // An array section with no colon always refer to a single element.
7179     if (OASE->getColonLoc().isInvalid())
7180       return false;
7181 
7182     const Expr *Length = OASE->getLength();
7183 
7184     // If we don't have a length we have to check if the array has size 1
7185     // for this dimension. Also, we should always expect a length if the
7186     // base type is pointer.
7187     if (!Length) {
7188       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7189                              OASE->getBase()->IgnoreParenImpCasts())
7190                              .getCanonicalType();
7191       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7192         return ATy->getSize().getSExtValue() != 1;
7193       // If we don't have a constant dimension length, we have to consider
7194       // the current section as having any size, so it is not necessarily
7195       // unitary. If it happen to be unity size, that's user fault.
7196       return true;
7197     }
7198 
7199     // Check if the length evaluates to 1.
7200     Expr::EvalResult Result;
7201     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7202       return true; // Can have more that size 1.
7203 
7204     llvm::APSInt ConstLength = Result.Val.getInt();
7205     return ConstLength.getSExtValue() != 1;
7206   }
7207 
7208   /// Generate the base pointers, section pointers, sizes and map type
7209   /// bits for the provided map type, map modifier, and expression components.
7210   /// \a IsFirstComponent should be set to true if the provided set of
7211   /// components is the first associated with a capture.
7212   void generateInfoForComponentList(
7213       OpenMPMapClauseKind MapType,
7214       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7215       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7216       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7217       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7218       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7219       bool IsImplicit,
7220       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7221           OverlappedElements = llvm::None) const {
7222     // The following summarizes what has to be generated for each map and the
7223     // types below. The generated information is expressed in this order:
7224     // base pointer, section pointer, size, flags
7225     // (to add to the ones that come from the map type and modifier).
7226     //
7227     // double d;
7228     // int i[100];
7229     // float *p;
7230     //
7231     // struct S1 {
7232     //   int i;
7233     //   float f[50];
7234     // }
7235     // struct S2 {
7236     //   int i;
7237     //   float f[50];
7238     //   S1 s;
7239     //   double *p;
7240     //   struct S2 *ps;
7241     // }
7242     // S2 s;
7243     // S2 *ps;
7244     //
7245     // map(d)
7246     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7247     //
7248     // map(i)
7249     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7250     //
7251     // map(i[1:23])
7252     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7253     //
7254     // map(p)
7255     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7256     //
7257     // map(p[1:24])
7258     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7259     //
7260     // map(s)
7261     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7262     //
7263     // map(s.i)
7264     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7265     //
7266     // map(s.s.f)
7267     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7268     //
7269     // map(s.p)
7270     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7271     //
7272     // map(to: s.p[:22])
7273     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7274     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7275     // &(s.p), &(s.p[0]), 22*sizeof(double),
7276     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7277     // (*) alloc space for struct members, only this is a target parameter
7278     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7279     //      optimizes this entry out, same in the examples below)
7280     // (***) map the pointee (map: to)
7281     //
7282     // map(s.ps)
7283     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7284     //
7285     // map(from: s.ps->s.i)
7286     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7287     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7288     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7289     //
7290     // map(to: s.ps->ps)
7291     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7292     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7293     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7294     //
7295     // map(s.ps->ps->ps)
7296     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7297     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7298     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7299     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7300     //
7301     // map(to: s.ps->ps->s.f[:22])
7302     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7303     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7304     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7305     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7306     //
7307     // map(ps)
7308     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7309     //
7310     // map(ps->i)
7311     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7312     //
7313     // map(ps->s.f)
7314     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7315     //
7316     // map(from: ps->p)
7317     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7318     //
7319     // map(to: ps->p[:22])
7320     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7321     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7322     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7323     //
7324     // map(ps->ps)
7325     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7326     //
7327     // map(from: ps->ps->s.i)
7328     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7329     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7330     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7331     //
7332     // map(from: ps->ps->ps)
7333     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7334     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7335     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7336     //
7337     // map(ps->ps->ps->ps)
7338     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7339     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7340     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7341     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7342     //
7343     // map(to: ps->ps->ps->s.f[:22])
7344     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7345     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7346     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7347     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7348     //
7349     // map(to: s.f[:22]) map(from: s.p[:33])
7350     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7351     //     sizeof(double*) (**), TARGET_PARAM
7352     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7353     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7354     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7355     // (*) allocate contiguous space needed to fit all mapped members even if
7356     //     we allocate space for members not mapped (in this example,
7357     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7358     //     them as well because they fall between &s.f[0] and &s.p)
7359     //
7360     // map(from: s.f[:22]) map(to: ps->p[:33])
7361     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7362     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7363     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7364     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7365     // (*) the struct this entry pertains to is the 2nd element in the list of
7366     //     arguments, hence MEMBER_OF(2)
7367     //
7368     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7369     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7370     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7371     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7372     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7373     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7374     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7375     // (*) the struct this entry pertains to is the 4th element in the list
7376     //     of arguments, hence MEMBER_OF(4)
7377 
7378     // Track if the map information being generated is the first for a capture.
7379     bool IsCaptureFirstInfo = IsFirstComponentList;
7380     bool IsLink = false; // Is this variable a "declare target link"?
7381 
7382     // Scan the components from the base to the complete expression.
7383     auto CI = Components.rbegin();
7384     auto CE = Components.rend();
7385     auto I = CI;
7386 
7387     // Track if the map information being generated is the first for a list of
7388     // components.
7389     bool IsExpressionFirstInfo = true;
7390     Address BP = Address::invalid();
7391     const Expr *AssocExpr = I->getAssociatedExpression();
7392     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7393     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7394 
7395     if (isa<MemberExpr>(AssocExpr)) {
7396       // The base is the 'this' pointer. The content of the pointer is going
7397       // to be the base of the field being mapped.
7398       BP = CGF.LoadCXXThisAddress();
7399     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7400                (OASE &&
7401                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7402       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7403     } else {
7404       // The base is the reference to the variable.
7405       // BP = &Var.
7406       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7407       if (const auto *VD =
7408               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7409         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7410                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
7411           if (*Res == OMPDeclareTargetDeclAttr::MT_Link) {
7412             IsLink = true;
7413             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
7414           }
7415       }
7416 
7417       // If the variable is a pointer and is being dereferenced (i.e. is not
7418       // the last component), the base has to be the pointer itself, not its
7419       // reference. References are ignored for mapping purposes.
7420       QualType Ty =
7421           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7422       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7423         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7424 
7425         // We do not need to generate individual map information for the
7426         // pointer, it can be associated with the combined storage.
7427         ++I;
7428       }
7429     }
7430 
7431     // Track whether a component of the list should be marked as MEMBER_OF some
7432     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7433     // in a component list should be marked as MEMBER_OF, all subsequent entries
7434     // do not belong to the base struct. E.g.
7435     // struct S2 s;
7436     // s.ps->ps->ps->f[:]
7437     //   (1) (2) (3) (4)
7438     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7439     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7440     // is the pointee of ps(2) which is not member of struct s, so it should not
7441     // be marked as such (it is still PTR_AND_OBJ).
7442     // The variable is initialized to false so that PTR_AND_OBJ entries which
7443     // are not struct members are not considered (e.g. array of pointers to
7444     // data).
7445     bool ShouldBeMemberOf = false;
7446 
7447     // Variable keeping track of whether or not we have encountered a component
7448     // in the component list which is a member expression. Useful when we have a
7449     // pointer or a final array section, in which case it is the previous
7450     // component in the list which tells us whether we have a member expression.
7451     // E.g. X.f[:]
7452     // While processing the final array section "[:]" it is "f" which tells us
7453     // whether we are dealing with a member of a declared struct.
7454     const MemberExpr *EncounteredME = nullptr;
7455 
7456     for (; I != CE; ++I) {
7457       // If the current component is member of a struct (parent struct) mark it.
7458       if (!EncounteredME) {
7459         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7460         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7461         // as MEMBER_OF the parent struct.
7462         if (EncounteredME)
7463           ShouldBeMemberOf = true;
7464       }
7465 
7466       auto Next = std::next(I);
7467 
7468       // We need to generate the addresses and sizes if this is the last
7469       // component, if the component is a pointer or if it is an array section
7470       // whose length can't be proved to be one. If this is a pointer, it
7471       // becomes the base address for the following components.
7472 
7473       // A final array section, is one whose length can't be proved to be one.
7474       bool IsFinalArraySection =
7475           isFinalArraySectionExpression(I->getAssociatedExpression());
7476 
7477       // Get information on whether the element is a pointer. Have to do a
7478       // special treatment for array sections given that they are built-in
7479       // types.
7480       const auto *OASE =
7481           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7482       bool IsPointer =
7483           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7484                        .getCanonicalType()
7485                        ->isAnyPointerType()) ||
7486           I->getAssociatedExpression()->getType()->isAnyPointerType();
7487 
7488       if (Next == CE || IsPointer || IsFinalArraySection) {
7489         // If this is not the last component, we expect the pointer to be
7490         // associated with an array expression or member expression.
7491         assert((Next == CE ||
7492                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7493                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7494                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7495                "Unexpected expression");
7496 
7497         Address LB =
7498             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7499 
7500         // If this component is a pointer inside the base struct then we don't
7501         // need to create any entry for it - it will be combined with the object
7502         // it is pointing to into a single PTR_AND_OBJ entry.
7503         bool IsMemberPointer =
7504             IsPointer && EncounteredME &&
7505             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7506              EncounteredME);
7507         if (!OverlappedElements.empty()) {
7508           // Handle base element with the info for overlapped elements.
7509           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7510           assert(Next == CE &&
7511                  "Expected last element for the overlapped elements.");
7512           assert(!IsPointer &&
7513                  "Unexpected base element with the pointer type.");
7514           // Mark the whole struct as the struct that requires allocation on the
7515           // device.
7516           PartialStruct.LowestElem = {0, LB};
7517           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7518               I->getAssociatedExpression()->getType());
7519           Address HB = CGF.Builder.CreateConstGEP(
7520               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7521                                                               CGF.VoidPtrTy),
7522               TypeSize.getQuantity() - 1);
7523           PartialStruct.HighestElem = {
7524               std::numeric_limits<decltype(
7525                   PartialStruct.HighestElem.first)>::max(),
7526               HB};
7527           PartialStruct.Base = BP;
7528           // Emit data for non-overlapped data.
7529           OpenMPOffloadMappingFlags Flags =
7530               OMP_MAP_MEMBER_OF |
7531               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7532                              /*AddPtrFlag=*/false,
7533                              /*AddIsTargetParamFlag=*/false);
7534           LB = BP;
7535           llvm::Value *Size = nullptr;
7536           // Do bitcopy of all non-overlapped structure elements.
7537           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7538                    Component : OverlappedElements) {
7539             Address ComponentLB = Address::invalid();
7540             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7541                  Component) {
7542               if (MC.getAssociatedDeclaration()) {
7543                 ComponentLB =
7544                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7545                         .getAddress();
7546                 Size = CGF.Builder.CreatePtrDiff(
7547                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7548                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7549                 break;
7550               }
7551             }
7552             BasePointers.push_back(BP.getPointer());
7553             Pointers.push_back(LB.getPointer());
7554             Sizes.push_back(Size);
7555             Types.push_back(Flags);
7556             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7557           }
7558           BasePointers.push_back(BP.getPointer());
7559           Pointers.push_back(LB.getPointer());
7560           Size = CGF.Builder.CreatePtrDiff(
7561               CGF.EmitCastToVoidPtr(
7562                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7563               CGF.EmitCastToVoidPtr(LB.getPointer()));
7564           Sizes.push_back(Size);
7565           Types.push_back(Flags);
7566           break;
7567         }
7568         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7569         if (!IsMemberPointer) {
7570           BasePointers.push_back(BP.getPointer());
7571           Pointers.push_back(LB.getPointer());
7572           Sizes.push_back(Size);
7573 
7574           // We need to add a pointer flag for each map that comes from the
7575           // same expression except for the first one. We also need to signal
7576           // this map is the first one that relates with the current capture
7577           // (there is a set of entries for each capture).
7578           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7579               MapType, MapModifiers, IsImplicit,
7580               !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink);
7581 
7582           if (!IsExpressionFirstInfo) {
7583             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7584             // then we reset the TO/FROM/ALWAYS/DELETE flags.
7585             if (IsPointer)
7586               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7587                          OMP_MAP_DELETE);
7588 
7589             if (ShouldBeMemberOf) {
7590               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7591               // should be later updated with the correct value of MEMBER_OF.
7592               Flags |= OMP_MAP_MEMBER_OF;
7593               // From now on, all subsequent PTR_AND_OBJ entries should not be
7594               // marked as MEMBER_OF.
7595               ShouldBeMemberOf = false;
7596             }
7597           }
7598 
7599           Types.push_back(Flags);
7600         }
7601 
7602         // If we have encountered a member expression so far, keep track of the
7603         // mapped member. If the parent is "*this", then the value declaration
7604         // is nullptr.
7605         if (EncounteredME) {
7606           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7607           unsigned FieldIndex = FD->getFieldIndex();
7608 
7609           // Update info about the lowest and highest elements for this struct
7610           if (!PartialStruct.Base.isValid()) {
7611             PartialStruct.LowestElem = {FieldIndex, LB};
7612             PartialStruct.HighestElem = {FieldIndex, LB};
7613             PartialStruct.Base = BP;
7614           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7615             PartialStruct.LowestElem = {FieldIndex, LB};
7616           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7617             PartialStruct.HighestElem = {FieldIndex, LB};
7618           }
7619         }
7620 
7621         // If we have a final array section, we are done with this expression.
7622         if (IsFinalArraySection)
7623           break;
7624 
7625         // The pointer becomes the base for the next element.
7626         if (Next != CE)
7627           BP = LB;
7628 
7629         IsExpressionFirstInfo = false;
7630         IsCaptureFirstInfo = false;
7631       }
7632     }
7633   }
7634 
7635   /// Return the adjusted map modifiers if the declaration a capture refers to
7636   /// appears in a first-private clause. This is expected to be used only with
7637   /// directives that start with 'target'.
7638   MappableExprsHandler::OpenMPOffloadMappingFlags
7639   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7640     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7641 
7642     // A first private variable captured by reference will use only the
7643     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7644     // declaration is known as first-private in this handler.
7645     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7646       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7647           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7648         return MappableExprsHandler::OMP_MAP_ALWAYS |
7649                MappableExprsHandler::OMP_MAP_TO;
7650       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7651         return MappableExprsHandler::OMP_MAP_TO |
7652                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7653       return MappableExprsHandler::OMP_MAP_PRIVATE |
7654              MappableExprsHandler::OMP_MAP_TO;
7655     }
7656     return MappableExprsHandler::OMP_MAP_TO |
7657            MappableExprsHandler::OMP_MAP_FROM;
7658   }
7659 
7660   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7661     // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
7662     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7663                                                   << 48);
7664   }
7665 
7666   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7667                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7668     // If the entry is PTR_AND_OBJ but has not been marked with the special
7669     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7670     // marked as MEMBER_OF.
7671     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7672         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7673       return;
7674 
7675     // Reset the placeholder value to prepare the flag for the assignment of the
7676     // proper MEMBER_OF value.
7677     Flags &= ~OMP_MAP_MEMBER_OF;
7678     Flags |= MemberOfFlag;
7679   }
7680 
7681   void getPlainLayout(const CXXRecordDecl *RD,
7682                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7683                       bool AsBase) const {
7684     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7685 
7686     llvm::StructType *St =
7687         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7688 
7689     unsigned NumElements = St->getNumElements();
7690     llvm::SmallVector<
7691         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7692         RecordLayout(NumElements);
7693 
7694     // Fill bases.
7695     for (const auto &I : RD->bases()) {
7696       if (I.isVirtual())
7697         continue;
7698       const auto *Base = I.getType()->getAsCXXRecordDecl();
7699       // Ignore empty bases.
7700       if (Base->isEmpty() || CGF.getContext()
7701                                  .getASTRecordLayout(Base)
7702                                  .getNonVirtualSize()
7703                                  .isZero())
7704         continue;
7705 
7706       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7707       RecordLayout[FieldIndex] = Base;
7708     }
7709     // Fill in virtual bases.
7710     for (const auto &I : RD->vbases()) {
7711       const auto *Base = I.getType()->getAsCXXRecordDecl();
7712       // Ignore empty bases.
7713       if (Base->isEmpty())
7714         continue;
7715       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7716       if (RecordLayout[FieldIndex])
7717         continue;
7718       RecordLayout[FieldIndex] = Base;
7719     }
7720     // Fill in all the fields.
7721     assert(!RD->isUnion() && "Unexpected union.");
7722     for (const auto *Field : RD->fields()) {
7723       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7724       // will fill in later.)
7725       if (!Field->isBitField()) {
7726         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7727         RecordLayout[FieldIndex] = Field;
7728       }
7729     }
7730     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7731              &Data : RecordLayout) {
7732       if (Data.isNull())
7733         continue;
7734       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7735         getPlainLayout(Base, Layout, /*AsBase=*/true);
7736       else
7737         Layout.push_back(Data.get<const FieldDecl *>());
7738     }
7739   }
7740 
7741 public:
7742   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7743       : CurDir(Dir), CGF(CGF) {
7744     // Extract firstprivate clause information.
7745     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7746       for (const auto *D : C->varlists())
7747         FirstPrivateDecls.insert(
7748             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
7749     // Extract device pointer clause information.
7750     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7751       for (auto L : C->component_lists())
7752         DevPointersMap[L.first].push_back(L.second);
7753   }
7754 
7755   /// Generate code for the combined entry if we have a partially mapped struct
7756   /// and take care of the mapping flags of the arguments corresponding to
7757   /// individual struct members.
7758   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7759                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7760                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7761                          const StructRangeInfoTy &PartialStruct) const {
7762     // Base is the base of the struct
7763     BasePointers.push_back(PartialStruct.Base.getPointer());
7764     // Pointer is the address of the lowest element
7765     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7766     Pointers.push_back(LB);
7767     // Size is (addr of {highest+1} element) - (addr of lowest element)
7768     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7769     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7770     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7771     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7772     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7773     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy,
7774                                                   /*isSinged=*/false);
7775     Sizes.push_back(Size);
7776     // Map type is always TARGET_PARAM
7777     Types.push_back(OMP_MAP_TARGET_PARAM);
7778     // Remove TARGET_PARAM flag from the first element
7779     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7780 
7781     // All other current entries will be MEMBER_OF the combined entry
7782     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7783     // 0xFFFF in the MEMBER_OF field).
7784     OpenMPOffloadMappingFlags MemberOfFlag =
7785         getMemberOfFlag(BasePointers.size() - 1);
7786     for (auto &M : CurTypes)
7787       setCorrectMemberOfFlag(M, MemberOfFlag);
7788   }
7789 
7790   /// Generate all the base pointers, section pointers, sizes and map
7791   /// types for the extracted mappable expressions. Also, for each item that
7792   /// relates with a device pointer, a pair of the relevant declaration and
7793   /// index where it occurs is appended to the device pointers info array.
7794   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7795                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7796                        MapFlagsArrayTy &Types) const {
7797     // We have to process the component lists that relate with the same
7798     // declaration in a single chunk so that we can generate the map flags
7799     // correctly. Therefore, we organize all lists in a map.
7800     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7801 
7802     // Helper function to fill the information map for the different supported
7803     // clauses.
7804     auto &&InfoGen = [&Info](
7805         const ValueDecl *D,
7806         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7807         OpenMPMapClauseKind MapType,
7808         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7809         bool ReturnDevicePointer, bool IsImplicit) {
7810       const ValueDecl *VD =
7811           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7812       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7813                             IsImplicit);
7814     };
7815 
7816     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7817     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7818       for (const auto &L : C->component_lists()) {
7819         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7820             /*ReturnDevicePointer=*/false, C->isImplicit());
7821       }
7822     for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
7823       for (const auto &L : C->component_lists()) {
7824         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7825             /*ReturnDevicePointer=*/false, C->isImplicit());
7826       }
7827     for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
7828       for (const auto &L : C->component_lists()) {
7829         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7830             /*ReturnDevicePointer=*/false, C->isImplicit());
7831       }
7832 
7833     // Look at the use_device_ptr clause information and mark the existing map
7834     // entries as such. If there is no map information for an entry in the
7835     // use_device_ptr list, we create one with map type 'alloc' and zero size
7836     // section. It is the user fault if that was not mapped before. If there is
7837     // no map information and the pointer is a struct member, then we defer the
7838     // emission of that entry until the whole struct has been processed.
7839     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7840         DeferredInfo;
7841 
7842     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7843     for (const auto *C :
7844         this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
7845       for (const auto &L : C->component_lists()) {
7846         assert(!L.second.empty() && "Not expecting empty list of components!");
7847         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7848         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7849         const Expr *IE = L.second.back().getAssociatedExpression();
7850         // If the first component is a member expression, we have to look into
7851         // 'this', which maps to null in the map of map information. Otherwise
7852         // look directly for the information.
7853         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7854 
7855         // We potentially have map information for this declaration already.
7856         // Look for the first set of components that refer to it.
7857         if (It != Info.end()) {
7858           auto CI = std::find_if(
7859               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7860                 return MI.Components.back().getAssociatedDeclaration() == VD;
7861               });
7862           // If we found a map entry, signal that the pointer has to be returned
7863           // and move on to the next declaration.
7864           if (CI != It->second.end()) {
7865             CI->ReturnDevicePointer = true;
7866             continue;
7867           }
7868         }
7869 
7870         // We didn't find any match in our map information - generate a zero
7871         // size array section - if the pointer is a struct member we defer this
7872         // action until the whole struct has been processed.
7873         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
7874         if (isa<MemberExpr>(IE)) {
7875           // Insert the pointer into Info to be processed by
7876           // generateInfoForComponentList. Because it is a member pointer
7877           // without a pointee, no entry will be generated for it, therefore
7878           // we need to generate one after the whole struct has been processed.
7879           // Nonetheless, generateInfoForComponentList must be called to take
7880           // the pointer into account for the calculation of the range of the
7881           // partial struct.
7882           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7883                   /*ReturnDevicePointer=*/false, C->isImplicit());
7884           DeferredInfo[nullptr].emplace_back(IE, VD);
7885         } else {
7886           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7887               this->CGF.EmitLValue(IE), IE->getExprLoc());
7888           BasePointers.emplace_back(Ptr, VD);
7889           Pointers.push_back(Ptr);
7890           Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7891           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
7892         }
7893       }
7894     }
7895 
7896     for (const auto &M : Info) {
7897       // We need to know when we generate information for the first component
7898       // associated with a capture, because the mapping flags depend on it.
7899       bool IsFirstComponentList = true;
7900 
7901       // Temporary versions of arrays
7902       MapBaseValuesArrayTy CurBasePointers;
7903       MapValuesArrayTy CurPointers;
7904       MapValuesArrayTy CurSizes;
7905       MapFlagsArrayTy CurTypes;
7906       StructRangeInfoTy PartialStruct;
7907 
7908       for (const MapInfo &L : M.second) {
7909         assert(!L.Components.empty() &&
7910                "Not expecting declaration with no component lists.");
7911 
7912         // Remember the current base pointer index.
7913         unsigned CurrentBasePointersIdx = CurBasePointers.size();
7914         // FIXME: MSVC 2013 seems to require this-> to find the member method.
7915         this->generateInfoForComponentList(
7916             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
7917             CurPointers, CurSizes, CurTypes, PartialStruct,
7918             IsFirstComponentList, L.IsImplicit);
7919 
7920         // If this entry relates with a device pointer, set the relevant
7921         // declaration and add the 'return pointer' flag.
7922         if (L.ReturnDevicePointer) {
7923           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
7924                  "Unexpected number of mapped base pointers.");
7925 
7926           const ValueDecl *RelevantVD =
7927               L.Components.back().getAssociatedDeclaration();
7928           assert(RelevantVD &&
7929                  "No relevant declaration related with device pointer??");
7930 
7931           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
7932           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
7933         }
7934         IsFirstComponentList = false;
7935       }
7936 
7937       // Append any pending zero-length pointers which are struct members and
7938       // used with use_device_ptr.
7939       auto CI = DeferredInfo.find(M.first);
7940       if (CI != DeferredInfo.end()) {
7941         for (const DeferredDevicePtrEntryTy &L : CI->second) {
7942           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
7943           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7944               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
7945           CurBasePointers.emplace_back(BasePtr, L.VD);
7946           CurPointers.push_back(Ptr);
7947           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7948           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
7949           // value MEMBER_OF=FFFF so that the entry is later updated with the
7950           // correct value of MEMBER_OF.
7951           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
7952                              OMP_MAP_MEMBER_OF);
7953         }
7954       }
7955 
7956       // If there is an entry in PartialStruct it means we have a struct with
7957       // individual members mapped. Emit an extra combined entry.
7958       if (PartialStruct.Base.isValid())
7959         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
7960                           PartialStruct);
7961 
7962       // We need to append the results of this capture to what we already have.
7963       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
7964       Pointers.append(CurPointers.begin(), CurPointers.end());
7965       Sizes.append(CurSizes.begin(), CurSizes.end());
7966       Types.append(CurTypes.begin(), CurTypes.end());
7967     }
7968   }
7969 
7970   /// Emit capture info for lambdas for variables captured by reference.
7971   void generateInfoForLambdaCaptures(
7972       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
7973       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7974       MapFlagsArrayTy &Types,
7975       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
7976     const auto *RD = VD->getType()
7977                          .getCanonicalType()
7978                          .getNonReferenceType()
7979                          ->getAsCXXRecordDecl();
7980     if (!RD || !RD->isLambda())
7981       return;
7982     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
7983     LValue VDLVal = CGF.MakeAddrLValue(
7984         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
7985     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
7986     FieldDecl *ThisCapture = nullptr;
7987     RD->getCaptureFields(Captures, ThisCapture);
7988     if (ThisCapture) {
7989       LValue ThisLVal =
7990           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
7991       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
7992       LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
7993       BasePointers.push_back(ThisLVal.getPointer());
7994       Pointers.push_back(ThisLValVal.getPointer());
7995       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
7996       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
7997                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
7998     }
7999     for (const LambdaCapture &LC : RD->captures()) {
8000       if (LC.getCaptureKind() != LCK_ByRef)
8001         continue;
8002       const VarDecl *VD = LC.getCapturedVar();
8003       auto It = Captures.find(VD);
8004       assert(It != Captures.end() && "Found lambda capture without field.");
8005       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8006       LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8007       LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8008       BasePointers.push_back(VarLVal.getPointer());
8009       Pointers.push_back(VarLValVal.getPointer());
8010       Sizes.push_back(CGF.getTypeSize(
8011           VD->getType().getCanonicalType().getNonReferenceType()));
8012       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8013                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8014     }
8015   }
8016 
8017   /// Set correct indices for lambdas captures.
8018   void adjustMemberOfForLambdaCaptures(
8019       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8020       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8021       MapFlagsArrayTy &Types) const {
8022     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8023       // Set correct member_of idx for all implicit lambda captures.
8024       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8025                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8026         continue;
8027       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8028       assert(BasePtr && "Unable to find base lambda address.");
8029       int TgtIdx = -1;
8030       for (unsigned J = I; J > 0; --J) {
8031         unsigned Idx = J - 1;
8032         if (Pointers[Idx] != BasePtr)
8033           continue;
8034         TgtIdx = Idx;
8035         break;
8036       }
8037       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8038       // All other current entries will be MEMBER_OF the combined entry
8039       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8040       // 0xFFFF in the MEMBER_OF field).
8041       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8042       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8043     }
8044   }
8045 
8046   /// Generate the base pointers, section pointers, sizes and map types
8047   /// associated to a given capture.
8048   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8049                               llvm::Value *Arg,
8050                               MapBaseValuesArrayTy &BasePointers,
8051                               MapValuesArrayTy &Pointers,
8052                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8053                               StructRangeInfoTy &PartialStruct) const {
8054     assert(!Cap->capturesVariableArrayType() &&
8055            "Not expecting to generate map info for a variable array type!");
8056 
8057     // We need to know when we generating information for the first component
8058     const ValueDecl *VD = Cap->capturesThis()
8059                               ? nullptr
8060                               : Cap->getCapturedVar()->getCanonicalDecl();
8061 
8062     // If this declaration appears in a is_device_ptr clause we just have to
8063     // pass the pointer by value. If it is a reference to a declaration, we just
8064     // pass its value.
8065     if (DevPointersMap.count(VD)) {
8066       BasePointers.emplace_back(Arg, VD);
8067       Pointers.push_back(Arg);
8068       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
8069       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8070       return;
8071     }
8072 
8073     using MapData =
8074         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8075                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8076     SmallVector<MapData, 4> DeclComponentLists;
8077     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
8078     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8079       for (const auto &L : C->decl_component_lists(VD)) {
8080         assert(L.first == VD &&
8081                "We got information for the wrong declaration??");
8082         assert(!L.second.empty() &&
8083                "Not expecting declaration with no component lists.");
8084         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8085                                         C->getMapTypeModifiers(),
8086                                         C->isImplicit());
8087       }
8088     }
8089 
8090     // Find overlapping elements (including the offset from the base element).
8091     llvm::SmallDenseMap<
8092         const MapData *,
8093         llvm::SmallVector<
8094             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8095         4>
8096         OverlappedData;
8097     size_t Count = 0;
8098     for (const MapData &L : DeclComponentLists) {
8099       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8100       OpenMPMapClauseKind MapType;
8101       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8102       bool IsImplicit;
8103       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8104       ++Count;
8105       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8106         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8107         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8108         auto CI = Components.rbegin();
8109         auto CE = Components.rend();
8110         auto SI = Components1.rbegin();
8111         auto SE = Components1.rend();
8112         for (; CI != CE && SI != SE; ++CI, ++SI) {
8113           if (CI->getAssociatedExpression()->getStmtClass() !=
8114               SI->getAssociatedExpression()->getStmtClass())
8115             break;
8116           // Are we dealing with different variables/fields?
8117           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8118             break;
8119         }
8120         // Found overlapping if, at least for one component, reached the head of
8121         // the components list.
8122         if (CI == CE || SI == SE) {
8123           assert((CI != CE || SI != SE) &&
8124                  "Unexpected full match of the mapping components.");
8125           const MapData &BaseData = CI == CE ? L : L1;
8126           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8127               SI == SE ? Components : Components1;
8128           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8129           OverlappedElements.getSecond().push_back(SubData);
8130         }
8131       }
8132     }
8133     // Sort the overlapped elements for each item.
8134     llvm::SmallVector<const FieldDecl *, 4> Layout;
8135     if (!OverlappedData.empty()) {
8136       if (const auto *CRD =
8137               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8138         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8139       else {
8140         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8141         Layout.append(RD->field_begin(), RD->field_end());
8142       }
8143     }
8144     for (auto &Pair : OverlappedData) {
8145       llvm::sort(
8146           Pair.getSecond(),
8147           [&Layout](
8148               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8149               OMPClauseMappableExprCommon::MappableExprComponentListRef
8150                   Second) {
8151             auto CI = First.rbegin();
8152             auto CE = First.rend();
8153             auto SI = Second.rbegin();
8154             auto SE = Second.rend();
8155             for (; CI != CE && SI != SE; ++CI, ++SI) {
8156               if (CI->getAssociatedExpression()->getStmtClass() !=
8157                   SI->getAssociatedExpression()->getStmtClass())
8158                 break;
8159               // Are we dealing with different variables/fields?
8160               if (CI->getAssociatedDeclaration() !=
8161                   SI->getAssociatedDeclaration())
8162                 break;
8163             }
8164 
8165             // Lists contain the same elements.
8166             if (CI == CE && SI == SE)
8167               return false;
8168 
8169             // List with less elements is less than list with more elements.
8170             if (CI == CE || SI == SE)
8171               return CI == CE;
8172 
8173             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8174             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8175             if (FD1->getParent() == FD2->getParent())
8176               return FD1->getFieldIndex() < FD2->getFieldIndex();
8177             const auto It =
8178                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8179                   return FD == FD1 || FD == FD2;
8180                 });
8181             return *It == FD1;
8182           });
8183     }
8184 
8185     // Associated with a capture, because the mapping flags depend on it.
8186     // Go through all of the elements with the overlapped elements.
8187     for (const auto &Pair : OverlappedData) {
8188       const MapData &L = *Pair.getFirst();
8189       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8190       OpenMPMapClauseKind MapType;
8191       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8192       bool IsImplicit;
8193       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8194       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8195           OverlappedComponents = Pair.getSecond();
8196       bool IsFirstComponentList = true;
8197       generateInfoForComponentList(MapType, MapModifiers, Components,
8198                                    BasePointers, Pointers, Sizes, Types,
8199                                    PartialStruct, IsFirstComponentList,
8200                                    IsImplicit, OverlappedComponents);
8201     }
8202     // Go through other elements without overlapped elements.
8203     bool IsFirstComponentList = OverlappedData.empty();
8204     for (const MapData &L : DeclComponentLists) {
8205       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8206       OpenMPMapClauseKind MapType;
8207       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8208       bool IsImplicit;
8209       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8210       auto It = OverlappedData.find(&L);
8211       if (It == OverlappedData.end())
8212         generateInfoForComponentList(MapType, MapModifiers, Components,
8213                                      BasePointers, Pointers, Sizes, Types,
8214                                      PartialStruct, IsFirstComponentList,
8215                                      IsImplicit);
8216       IsFirstComponentList = false;
8217     }
8218   }
8219 
8220   /// Generate the base pointers, section pointers, sizes and map types
8221   /// associated with the declare target link variables.
8222   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8223                                         MapValuesArrayTy &Pointers,
8224                                         MapValuesArrayTy &Sizes,
8225                                         MapFlagsArrayTy &Types) const {
8226     // Map other list items in the map clause which are not captured variables
8227     // but "declare target link" global variables.,
8228     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8229       for (const auto &L : C->component_lists()) {
8230         if (!L.first)
8231           continue;
8232         const auto *VD = dyn_cast<VarDecl>(L.first);
8233         if (!VD)
8234           continue;
8235         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8236             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8237         if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8238           continue;
8239         StructRangeInfoTy PartialStruct;
8240         generateInfoForComponentList(
8241             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8242             Pointers, Sizes, Types, PartialStruct,
8243             /*IsFirstComponentList=*/true, C->isImplicit());
8244         assert(!PartialStruct.Base.isValid() &&
8245                "No partial structs for declare target link expected.");
8246       }
8247     }
8248   }
8249 
8250   /// Generate the default map information for a given capture \a CI,
8251   /// record field declaration \a RI and captured value \a CV.
8252   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8253                               const FieldDecl &RI, llvm::Value *CV,
8254                               MapBaseValuesArrayTy &CurBasePointers,
8255                               MapValuesArrayTy &CurPointers,
8256                               MapValuesArrayTy &CurSizes,
8257                               MapFlagsArrayTy &CurMapTypes) const {
8258     // Do the default mapping.
8259     if (CI.capturesThis()) {
8260       CurBasePointers.push_back(CV);
8261       CurPointers.push_back(CV);
8262       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8263       CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
8264       // Default map type.
8265       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8266     } else if (CI.capturesVariableByCopy()) {
8267       CurBasePointers.push_back(CV);
8268       CurPointers.push_back(CV);
8269       if (!RI.getType()->isAnyPointerType()) {
8270         // We have to signal to the runtime captures passed by value that are
8271         // not pointers.
8272         CurMapTypes.push_back(OMP_MAP_LITERAL);
8273         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
8274       } else {
8275         // Pointers are implicitly mapped with a zero size and no flags
8276         // (other than first map that is added for all implicit maps).
8277         CurMapTypes.push_back(OMP_MAP_NONE);
8278         CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
8279       }
8280     } else {
8281       assert(CI.capturesVariable() && "Expected captured reference.");
8282       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8283       QualType ElementType = PtrTy->getPointeeType();
8284       CurSizes.push_back(CGF.getTypeSize(ElementType));
8285       // The default map type for a scalar/complex type is 'to' because by
8286       // default the value doesn't have to be retrieved. For an aggregate
8287       // type, the default is 'tofrom'.
8288       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8289       const VarDecl *VD = CI.getCapturedVar();
8290       if (FirstPrivateDecls.count(VD) &&
8291           VD->getType().isConstant(CGF.getContext())) {
8292         llvm::Constant *Addr =
8293             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8294         // Copy the value of the original variable to the new global copy.
8295         CGF.Builder.CreateMemCpy(
8296             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8297             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8298             CurSizes.back(), /*isVolatile=*/false);
8299         // Use new global variable as the base pointers.
8300         CurBasePointers.push_back(Addr);
8301         CurPointers.push_back(Addr);
8302       } else {
8303         CurBasePointers.push_back(CV);
8304         if (FirstPrivateDecls.count(VD) && ElementType->isAnyPointerType()) {
8305           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8306               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8307               AlignmentSource::Decl));
8308           CurPointers.push_back(PtrAddr.getPointer());
8309         } else {
8310           CurPointers.push_back(CV);
8311         }
8312       }
8313     }
8314     // Every default map produces a single argument which is a target parameter.
8315     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8316 
8317     // Add flag stating this is an implicit map.
8318     CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8319   }
8320 };
8321 
8322 enum OpenMPOffloadingReservedDeviceIDs {
8323   /// Device ID if the device was not defined, runtime should get it
8324   /// from environment variables in the spec.
8325   OMP_DEVICEID_UNDEF = -1,
8326 };
8327 } // anonymous namespace
8328 
8329 /// Emit the arrays used to pass the captures and map information to the
8330 /// offloading runtime library. If there is no map or capture information,
8331 /// return nullptr by reference.
8332 static void
8333 emitOffloadingArrays(CodeGenFunction &CGF,
8334                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8335                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8336                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8337                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8338                      CGOpenMPRuntime::TargetDataInfo &Info) {
8339   CodeGenModule &CGM = CGF.CGM;
8340   ASTContext &Ctx = CGF.getContext();
8341 
8342   // Reset the array information.
8343   Info.clearArrayInfo();
8344   Info.NumberOfPtrs = BasePointers.size();
8345 
8346   if (Info.NumberOfPtrs) {
8347     // Detect if we have any capture size requiring runtime evaluation of the
8348     // size so that a constant array could be eventually used.
8349     bool hasRuntimeEvaluationCaptureSize = false;
8350     for (llvm::Value *S : Sizes)
8351       if (!isa<llvm::Constant>(S)) {
8352         hasRuntimeEvaluationCaptureSize = true;
8353         break;
8354       }
8355 
8356     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8357     QualType PointerArrayType =
8358         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
8359                                  /*IndexTypeQuals=*/0);
8360 
8361     Info.BasePointersArray =
8362         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8363     Info.PointersArray =
8364         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8365 
8366     // If we don't have any VLA types or other types that require runtime
8367     // evaluation, we can use a constant array for the map sizes, otherwise we
8368     // need to fill up the arrays as we do for the pointers.
8369     if (hasRuntimeEvaluationCaptureSize) {
8370       QualType SizeArrayType = Ctx.getConstantArrayType(
8371           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
8372           /*IndexTypeQuals=*/0);
8373       Info.SizesArray =
8374           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8375     } else {
8376       // We expect all the sizes to be constant, so we collect them to create
8377       // a constant array.
8378       SmallVector<llvm::Constant *, 16> ConstSizes;
8379       for (llvm::Value *S : Sizes)
8380         ConstSizes.push_back(cast<llvm::Constant>(S));
8381 
8382       auto *SizesArrayInit = llvm::ConstantArray::get(
8383           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
8384       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8385       auto *SizesArrayGbl = new llvm::GlobalVariable(
8386           CGM.getModule(), SizesArrayInit->getType(),
8387           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8388           SizesArrayInit, Name);
8389       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8390       Info.SizesArray = SizesArrayGbl;
8391     }
8392 
8393     // The map types are always constant so we don't need to generate code to
8394     // fill arrays. Instead, we create an array constant.
8395     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8396     llvm::copy(MapTypes, Mapping.begin());
8397     llvm::Constant *MapTypesArrayInit =
8398         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8399     std::string MaptypesName =
8400         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8401     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8402         CGM.getModule(), MapTypesArrayInit->getType(),
8403         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8404         MapTypesArrayInit, MaptypesName);
8405     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8406     Info.MapTypesArray = MapTypesArrayGbl;
8407 
8408     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8409       llvm::Value *BPVal = *BasePointers[I];
8410       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8411           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8412           Info.BasePointersArray, 0, I);
8413       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8414           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8415       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8416       CGF.Builder.CreateStore(BPVal, BPAddr);
8417 
8418       if (Info.requiresDevicePointerInfo())
8419         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8420           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8421 
8422       llvm::Value *PVal = Pointers[I];
8423       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8424           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8425           Info.PointersArray, 0, I);
8426       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8427           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8428       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8429       CGF.Builder.CreateStore(PVal, PAddr);
8430 
8431       if (hasRuntimeEvaluationCaptureSize) {
8432         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8433             llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
8434             Info.SizesArray,
8435             /*Idx0=*/0,
8436             /*Idx1=*/I);
8437         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
8438         CGF.Builder.CreateStore(
8439             CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true),
8440             SAddr);
8441       }
8442     }
8443   }
8444 }
8445 /// Emit the arguments to be passed to the runtime library based on the
8446 /// arrays of pointers, sizes and map types.
8447 static void emitOffloadingArraysArgument(
8448     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8449     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8450     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8451   CodeGenModule &CGM = CGF.CGM;
8452   if (Info.NumberOfPtrs) {
8453     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8454         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8455         Info.BasePointersArray,
8456         /*Idx0=*/0, /*Idx1=*/0);
8457     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8458         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8459         Info.PointersArray,
8460         /*Idx0=*/0,
8461         /*Idx1=*/0);
8462     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8463         llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
8464         /*Idx0=*/0, /*Idx1=*/0);
8465     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8466         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8467         Info.MapTypesArray,
8468         /*Idx0=*/0,
8469         /*Idx1=*/0);
8470   } else {
8471     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8472     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8473     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
8474     MapTypesArrayArg =
8475         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8476   }
8477 }
8478 
8479 /// Check for inner distribute directive.
8480 static const OMPExecutableDirective *
8481 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8482   const auto *CS = D.getInnermostCapturedStmt();
8483   const auto *Body =
8484       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8485   const Stmt *ChildStmt =
8486       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8487 
8488   if (const auto *NestedDir =
8489           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8490     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8491     switch (D.getDirectiveKind()) {
8492     case OMPD_target:
8493       if (isOpenMPDistributeDirective(DKind))
8494         return NestedDir;
8495       if (DKind == OMPD_teams) {
8496         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8497             /*IgnoreCaptured=*/true);
8498         if (!Body)
8499           return nullptr;
8500         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8501         if (const auto *NND =
8502                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8503           DKind = NND->getDirectiveKind();
8504           if (isOpenMPDistributeDirective(DKind))
8505             return NND;
8506         }
8507       }
8508       return nullptr;
8509     case OMPD_target_teams:
8510       if (isOpenMPDistributeDirective(DKind))
8511         return NestedDir;
8512       return nullptr;
8513     case OMPD_target_parallel:
8514     case OMPD_target_simd:
8515     case OMPD_target_parallel_for:
8516     case OMPD_target_parallel_for_simd:
8517       return nullptr;
8518     case OMPD_target_teams_distribute:
8519     case OMPD_target_teams_distribute_simd:
8520     case OMPD_target_teams_distribute_parallel_for:
8521     case OMPD_target_teams_distribute_parallel_for_simd:
8522     case OMPD_parallel:
8523     case OMPD_for:
8524     case OMPD_parallel_for:
8525     case OMPD_parallel_sections:
8526     case OMPD_for_simd:
8527     case OMPD_parallel_for_simd:
8528     case OMPD_cancel:
8529     case OMPD_cancellation_point:
8530     case OMPD_ordered:
8531     case OMPD_threadprivate:
8532     case OMPD_allocate:
8533     case OMPD_task:
8534     case OMPD_simd:
8535     case OMPD_sections:
8536     case OMPD_section:
8537     case OMPD_single:
8538     case OMPD_master:
8539     case OMPD_critical:
8540     case OMPD_taskyield:
8541     case OMPD_barrier:
8542     case OMPD_taskwait:
8543     case OMPD_taskgroup:
8544     case OMPD_atomic:
8545     case OMPD_flush:
8546     case OMPD_teams:
8547     case OMPD_target_data:
8548     case OMPD_target_exit_data:
8549     case OMPD_target_enter_data:
8550     case OMPD_distribute:
8551     case OMPD_distribute_simd:
8552     case OMPD_distribute_parallel_for:
8553     case OMPD_distribute_parallel_for_simd:
8554     case OMPD_teams_distribute:
8555     case OMPD_teams_distribute_simd:
8556     case OMPD_teams_distribute_parallel_for:
8557     case OMPD_teams_distribute_parallel_for_simd:
8558     case OMPD_target_update:
8559     case OMPD_declare_simd:
8560     case OMPD_declare_target:
8561     case OMPD_end_declare_target:
8562     case OMPD_declare_reduction:
8563     case OMPD_declare_mapper:
8564     case OMPD_taskloop:
8565     case OMPD_taskloop_simd:
8566     case OMPD_requires:
8567     case OMPD_unknown:
8568       llvm_unreachable("Unexpected directive.");
8569     }
8570   }
8571 
8572   return nullptr;
8573 }
8574 
8575 void CGOpenMPRuntime::emitTargetNumIterationsCall(
8576     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
8577     const llvm::function_ref<llvm::Value *(
8578         CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
8579   OpenMPDirectiveKind Kind = D.getDirectiveKind();
8580   const OMPExecutableDirective *TD = &D;
8581   // Get nested teams distribute kind directive, if any.
8582   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
8583     TD = getNestedDistributeDirective(CGM.getContext(), D);
8584   if (!TD)
8585     return;
8586   const auto *LD = cast<OMPLoopDirective>(TD);
8587   auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
8588                                                      PrePostActionTy &) {
8589     llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
8590 
8591     // Emit device ID if any.
8592     llvm::Value *DeviceID;
8593     if (Device)
8594       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8595                                            CGF.Int64Ty, /*isSigned=*/true);
8596     else
8597       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8598 
8599     llvm::Value *Args[] = {DeviceID, NumIterations};
8600     CGF.EmitRuntimeCall(
8601         createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
8602   };
8603   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
8604 }
8605 
8606 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
8607                                      const OMPExecutableDirective &D,
8608                                      llvm::Function *OutlinedFn,
8609                                      llvm::Value *OutlinedFnID,
8610                                      const Expr *IfCond, const Expr *Device) {
8611   if (!CGF.HaveInsertPoint())
8612     return;
8613 
8614   assert(OutlinedFn && "Invalid outlined function!");
8615 
8616   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
8617   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
8618   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
8619   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
8620                                             PrePostActionTy &) {
8621     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8622   };
8623   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
8624 
8625   CodeGenFunction::OMPTargetDataInfo InputInfo;
8626   llvm::Value *MapTypesArray = nullptr;
8627   // Fill up the pointer arrays and transfer execution to the device.
8628   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
8629                     &MapTypesArray, &CS, RequiresOuterTask,
8630                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
8631     // On top of the arrays that were filled up, the target offloading call
8632     // takes as arguments the device id as well as the host pointer. The host
8633     // pointer is used by the runtime library to identify the current target
8634     // region, so it only has to be unique and not necessarily point to
8635     // anything. It could be the pointer to the outlined function that
8636     // implements the target region, but we aren't using that so that the
8637     // compiler doesn't need to keep that, and could therefore inline the host
8638     // function if proven worthwhile during optimization.
8639 
8640     // From this point on, we need to have an ID of the target region defined.
8641     assert(OutlinedFnID && "Invalid outlined function ID!");
8642 
8643     // Emit device ID if any.
8644     llvm::Value *DeviceID;
8645     if (Device) {
8646       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8647                                            CGF.Int64Ty, /*isSigned=*/true);
8648     } else {
8649       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8650     }
8651 
8652     // Emit the number of elements in the offloading arrays.
8653     llvm::Value *PointerNum =
8654         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8655 
8656     // Return value of the runtime offloading call.
8657     llvm::Value *Return;
8658 
8659     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
8660     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
8661 
8662     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8663     // The target region is an outlined function launched by the runtime
8664     // via calls __tgt_target() or __tgt_target_teams().
8665     //
8666     // __tgt_target() launches a target region with one team and one thread,
8667     // executing a serial region.  This master thread may in turn launch
8668     // more threads within its team upon encountering a parallel region,
8669     // however, no additional teams can be launched on the device.
8670     //
8671     // __tgt_target_teams() launches a target region with one or more teams,
8672     // each with one or more threads.  This call is required for target
8673     // constructs such as:
8674     //  'target teams'
8675     //  'target' / 'teams'
8676     //  'target teams distribute parallel for'
8677     //  'target parallel'
8678     // and so on.
8679     //
8680     // Note that on the host and CPU targets, the runtime implementation of
8681     // these calls simply call the outlined function without forking threads.
8682     // The outlined functions themselves have runtime calls to
8683     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
8684     // the compiler in emitTeamsCall() and emitParallelCall().
8685     //
8686     // In contrast, on the NVPTX target, the implementation of
8687     // __tgt_target_teams() launches a GPU kernel with the requested number
8688     // of teams and threads so no additional calls to the runtime are required.
8689     if (NumTeams) {
8690       // If we have NumTeams defined this means that we have an enclosed teams
8691       // region. Therefore we also expect to have NumThreads defined. These two
8692       // values should be defined in the presence of a teams directive,
8693       // regardless of having any clauses associated. If the user is using teams
8694       // but no clauses, these two values will be the default that should be
8695       // passed to the runtime library - a 32-bit integer with the value zero.
8696       assert(NumThreads && "Thread limit expression should be available along "
8697                            "with number of teams.");
8698       llvm::Value *OffloadingArgs[] = {DeviceID,
8699                                        OutlinedFnID,
8700                                        PointerNum,
8701                                        InputInfo.BasePointersArray.getPointer(),
8702                                        InputInfo.PointersArray.getPointer(),
8703                                        InputInfo.SizesArray.getPointer(),
8704                                        MapTypesArray,
8705                                        NumTeams,
8706                                        NumThreads};
8707       Return = CGF.EmitRuntimeCall(
8708           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
8709                                           : OMPRTL__tgt_target_teams),
8710           OffloadingArgs);
8711     } else {
8712       llvm::Value *OffloadingArgs[] = {DeviceID,
8713                                        OutlinedFnID,
8714                                        PointerNum,
8715                                        InputInfo.BasePointersArray.getPointer(),
8716                                        InputInfo.PointersArray.getPointer(),
8717                                        InputInfo.SizesArray.getPointer(),
8718                                        MapTypesArray};
8719       Return = CGF.EmitRuntimeCall(
8720           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
8721                                           : OMPRTL__tgt_target),
8722           OffloadingArgs);
8723     }
8724 
8725     // Check the error code and execute the host version if required.
8726     llvm::BasicBlock *OffloadFailedBlock =
8727         CGF.createBasicBlock("omp_offload.failed");
8728     llvm::BasicBlock *OffloadContBlock =
8729         CGF.createBasicBlock("omp_offload.cont");
8730     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
8731     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
8732 
8733     CGF.EmitBlock(OffloadFailedBlock);
8734     if (RequiresOuterTask) {
8735       CapturedVars.clear();
8736       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8737     }
8738     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8739     CGF.EmitBranch(OffloadContBlock);
8740 
8741     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
8742   };
8743 
8744   // Notify that the host version must be executed.
8745   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
8746                     RequiresOuterTask](CodeGenFunction &CGF,
8747                                        PrePostActionTy &) {
8748     if (RequiresOuterTask) {
8749       CapturedVars.clear();
8750       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8751     }
8752     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8753   };
8754 
8755   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
8756                           &CapturedVars, RequiresOuterTask,
8757                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
8758     // Fill up the arrays with all the captured variables.
8759     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8760     MappableExprsHandler::MapValuesArrayTy Pointers;
8761     MappableExprsHandler::MapValuesArrayTy Sizes;
8762     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8763 
8764     // Get mappable expression information.
8765     MappableExprsHandler MEHandler(D, CGF);
8766     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
8767 
8768     auto RI = CS.getCapturedRecordDecl()->field_begin();
8769     auto CV = CapturedVars.begin();
8770     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
8771                                               CE = CS.capture_end();
8772          CI != CE; ++CI, ++RI, ++CV) {
8773       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
8774       MappableExprsHandler::MapValuesArrayTy CurPointers;
8775       MappableExprsHandler::MapValuesArrayTy CurSizes;
8776       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
8777       MappableExprsHandler::StructRangeInfoTy PartialStruct;
8778 
8779       // VLA sizes are passed to the outlined region by copy and do not have map
8780       // information associated.
8781       if (CI->capturesVariableArrayType()) {
8782         CurBasePointers.push_back(*CV);
8783         CurPointers.push_back(*CV);
8784         CurSizes.push_back(CGF.getTypeSize(RI->getType()));
8785         // Copy to the device as an argument. No need to retrieve it.
8786         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
8787                               MappableExprsHandler::OMP_MAP_TARGET_PARAM);
8788       } else {
8789         // If we have any information in the map clause, we use it, otherwise we
8790         // just do a default mapping.
8791         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
8792                                          CurSizes, CurMapTypes, PartialStruct);
8793         if (CurBasePointers.empty())
8794           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
8795                                            CurPointers, CurSizes, CurMapTypes);
8796         // Generate correct mapping for variables captured by reference in
8797         // lambdas.
8798         if (CI->capturesVariable())
8799           MEHandler.generateInfoForLambdaCaptures(
8800               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
8801               CurMapTypes, LambdaPointers);
8802       }
8803       // We expect to have at least an element of information for this capture.
8804       assert(!CurBasePointers.empty() &&
8805              "Non-existing map pointer for capture!");
8806       assert(CurBasePointers.size() == CurPointers.size() &&
8807              CurBasePointers.size() == CurSizes.size() &&
8808              CurBasePointers.size() == CurMapTypes.size() &&
8809              "Inconsistent map information sizes!");
8810 
8811       // If there is an entry in PartialStruct it means we have a struct with
8812       // individual members mapped. Emit an extra combined entry.
8813       if (PartialStruct.Base.isValid())
8814         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
8815                                     CurMapTypes, PartialStruct);
8816 
8817       // We need to append the results of this capture to what we already have.
8818       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8819       Pointers.append(CurPointers.begin(), CurPointers.end());
8820       Sizes.append(CurSizes.begin(), CurSizes.end());
8821       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
8822     }
8823     // Adjust MEMBER_OF flags for the lambdas captures.
8824     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
8825                                               Pointers, MapTypes);
8826     // Map other list items in the map clause which are not captured variables
8827     // but "declare target link" global variables.
8828     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
8829                                                MapTypes);
8830 
8831     TargetDataInfo Info;
8832     // Fill up the arrays and create the arguments.
8833     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8834     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8835                                  Info.PointersArray, Info.SizesArray,
8836                                  Info.MapTypesArray, Info);
8837     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8838     InputInfo.BasePointersArray =
8839         Address(Info.BasePointersArray, CGM.getPointerAlign());
8840     InputInfo.PointersArray =
8841         Address(Info.PointersArray, CGM.getPointerAlign());
8842     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
8843     MapTypesArray = Info.MapTypesArray;
8844     if (RequiresOuterTask)
8845       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8846     else
8847       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8848   };
8849 
8850   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
8851                              CodeGenFunction &CGF, PrePostActionTy &) {
8852     if (RequiresOuterTask) {
8853       CodeGenFunction::OMPTargetDataInfo InputInfo;
8854       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
8855     } else {
8856       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
8857     }
8858   };
8859 
8860   // If we have a target function ID it means that we need to support
8861   // offloading, otherwise, just execute on the host. We need to execute on host
8862   // regardless of the conditional in the if clause if, e.g., the user do not
8863   // specify target triples.
8864   if (OutlinedFnID) {
8865     if (IfCond) {
8866       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
8867     } else {
8868       RegionCodeGenTy ThenRCG(TargetThenGen);
8869       ThenRCG(CGF);
8870     }
8871   } else {
8872     RegionCodeGenTy ElseRCG(TargetElseGen);
8873     ElseRCG(CGF);
8874   }
8875 }
8876 
8877 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
8878                                                     StringRef ParentName) {
8879   if (!S)
8880     return;
8881 
8882   // Codegen OMP target directives that offload compute to the device.
8883   bool RequiresDeviceCodegen =
8884       isa<OMPExecutableDirective>(S) &&
8885       isOpenMPTargetExecutionDirective(
8886           cast<OMPExecutableDirective>(S)->getDirectiveKind());
8887 
8888   if (RequiresDeviceCodegen) {
8889     const auto &E = *cast<OMPExecutableDirective>(S);
8890     unsigned DeviceID;
8891     unsigned FileID;
8892     unsigned Line;
8893     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
8894                              FileID, Line);
8895 
8896     // Is this a target region that should not be emitted as an entry point? If
8897     // so just signal we are done with this target region.
8898     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
8899                                                             ParentName, Line))
8900       return;
8901 
8902     switch (E.getDirectiveKind()) {
8903     case OMPD_target:
8904       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
8905                                                    cast<OMPTargetDirective>(E));
8906       break;
8907     case OMPD_target_parallel:
8908       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
8909           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
8910       break;
8911     case OMPD_target_teams:
8912       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
8913           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
8914       break;
8915     case OMPD_target_teams_distribute:
8916       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
8917           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
8918       break;
8919     case OMPD_target_teams_distribute_simd:
8920       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
8921           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
8922       break;
8923     case OMPD_target_parallel_for:
8924       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
8925           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
8926       break;
8927     case OMPD_target_parallel_for_simd:
8928       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
8929           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
8930       break;
8931     case OMPD_target_simd:
8932       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
8933           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
8934       break;
8935     case OMPD_target_teams_distribute_parallel_for:
8936       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
8937           CGM, ParentName,
8938           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
8939       break;
8940     case OMPD_target_teams_distribute_parallel_for_simd:
8941       CodeGenFunction::
8942           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
8943               CGM, ParentName,
8944               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
8945       break;
8946     case OMPD_parallel:
8947     case OMPD_for:
8948     case OMPD_parallel_for:
8949     case OMPD_parallel_sections:
8950     case OMPD_for_simd:
8951     case OMPD_parallel_for_simd:
8952     case OMPD_cancel:
8953     case OMPD_cancellation_point:
8954     case OMPD_ordered:
8955     case OMPD_threadprivate:
8956     case OMPD_allocate:
8957     case OMPD_task:
8958     case OMPD_simd:
8959     case OMPD_sections:
8960     case OMPD_section:
8961     case OMPD_single:
8962     case OMPD_master:
8963     case OMPD_critical:
8964     case OMPD_taskyield:
8965     case OMPD_barrier:
8966     case OMPD_taskwait:
8967     case OMPD_taskgroup:
8968     case OMPD_atomic:
8969     case OMPD_flush:
8970     case OMPD_teams:
8971     case OMPD_target_data:
8972     case OMPD_target_exit_data:
8973     case OMPD_target_enter_data:
8974     case OMPD_distribute:
8975     case OMPD_distribute_simd:
8976     case OMPD_distribute_parallel_for:
8977     case OMPD_distribute_parallel_for_simd:
8978     case OMPD_teams_distribute:
8979     case OMPD_teams_distribute_simd:
8980     case OMPD_teams_distribute_parallel_for:
8981     case OMPD_teams_distribute_parallel_for_simd:
8982     case OMPD_target_update:
8983     case OMPD_declare_simd:
8984     case OMPD_declare_target:
8985     case OMPD_end_declare_target:
8986     case OMPD_declare_reduction:
8987     case OMPD_declare_mapper:
8988     case OMPD_taskloop:
8989     case OMPD_taskloop_simd:
8990     case OMPD_requires:
8991     case OMPD_unknown:
8992       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
8993     }
8994     return;
8995   }
8996 
8997   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
8998     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
8999       return;
9000 
9001     scanForTargetRegionsFunctions(
9002         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9003     return;
9004   }
9005 
9006   // If this is a lambda function, look into its body.
9007   if (const auto *L = dyn_cast<LambdaExpr>(S))
9008     S = L->getBody();
9009 
9010   // Keep looking for target regions recursively.
9011   for (const Stmt *II : S->children())
9012     scanForTargetRegionsFunctions(II, ParentName);
9013 }
9014 
9015 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9016   // If emitting code for the host, we do not process FD here. Instead we do
9017   // the normal code generation.
9018   if (!CGM.getLangOpts().OpenMPIsDevice)
9019     return false;
9020 
9021   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9022   StringRef Name = CGM.getMangledName(GD);
9023   // Try to detect target regions in the function.
9024   if (const auto *FD = dyn_cast<FunctionDecl>(VD))
9025     scanForTargetRegionsFunctions(FD->getBody(), Name);
9026 
9027   // Do not to emit function if it is not marked as declare target.
9028   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9029          AlreadyEmittedTargetFunctions.count(Name) == 0;
9030 }
9031 
9032 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9033   if (!CGM.getLangOpts().OpenMPIsDevice)
9034     return false;
9035 
9036   // Check if there are Ctors/Dtors in this declaration and look for target
9037   // regions in it. We use the complete variant to produce the kernel name
9038   // mangling.
9039   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9040   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9041     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9042       StringRef ParentName =
9043           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9044       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9045     }
9046     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9047       StringRef ParentName =
9048           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9049       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9050     }
9051   }
9052 
9053   // Do not to emit variable if it is not marked as declare target.
9054   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9055       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9056           cast<VarDecl>(GD.getDecl()));
9057   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) {
9058     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9059     return true;
9060   }
9061   return false;
9062 }
9063 
9064 llvm::Constant *
9065 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9066                                                 const VarDecl *VD) {
9067   assert(VD->getType().isConstant(CGM.getContext()) &&
9068          "Expected constant variable.");
9069   StringRef VarName;
9070   llvm::Constant *Addr;
9071   llvm::GlobalValue::LinkageTypes Linkage;
9072   QualType Ty = VD->getType();
9073   SmallString<128> Buffer;
9074   {
9075     unsigned DeviceID;
9076     unsigned FileID;
9077     unsigned Line;
9078     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9079                              FileID, Line);
9080     llvm::raw_svector_ostream OS(Buffer);
9081     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9082        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9083     VarName = OS.str();
9084   }
9085   Linkage = llvm::GlobalValue::InternalLinkage;
9086   Addr =
9087       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9088                                   getDefaultFirstprivateAddressSpace());
9089   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9090   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9091   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9092   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9093       VarName, Addr, VarSize,
9094       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9095   return Addr;
9096 }
9097 
9098 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9099                                                    llvm::Constant *Addr) {
9100   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9101       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9102   if (!Res) {
9103     if (CGM.getLangOpts().OpenMPIsDevice) {
9104       // Register non-target variables being emitted in device code (debug info
9105       // may cause this).
9106       StringRef VarName = CGM.getMangledName(VD);
9107       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9108     }
9109     return;
9110   }
9111   // Register declare target variables.
9112   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9113   StringRef VarName;
9114   CharUnits VarSize;
9115   llvm::GlobalValue::LinkageTypes Linkage;
9116   switch (*Res) {
9117   case OMPDeclareTargetDeclAttr::MT_To:
9118     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9119     VarName = CGM.getMangledName(VD);
9120     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9121       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9122       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9123     } else {
9124       VarSize = CharUnits::Zero();
9125     }
9126     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9127     // Temp solution to prevent optimizations of the internal variables.
9128     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9129       std::string RefName = getName({VarName, "ref"});
9130       if (!CGM.GetGlobalValue(RefName)) {
9131         llvm::Constant *AddrRef =
9132             getOrCreateInternalVariable(Addr->getType(), RefName);
9133         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9134         GVAddrRef->setConstant(/*Val=*/true);
9135         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9136         GVAddrRef->setInitializer(Addr);
9137         CGM.addCompilerUsedGlobal(GVAddrRef);
9138       }
9139     }
9140     break;
9141   case OMPDeclareTargetDeclAttr::MT_Link:
9142     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9143     if (CGM.getLangOpts().OpenMPIsDevice) {
9144       VarName = Addr->getName();
9145       Addr = nullptr;
9146     } else {
9147       VarName = getAddrOfDeclareTargetLink(VD).getName();
9148       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
9149     }
9150     VarSize = CGM.getPointerSize();
9151     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9152     break;
9153   }
9154   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9155       VarName, Addr, VarSize, Flags, Linkage);
9156 }
9157 
9158 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9159   if (isa<FunctionDecl>(GD.getDecl()) ||
9160       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9161     return emitTargetFunctions(GD);
9162 
9163   return emitTargetGlobalVariable(GD);
9164 }
9165 
9166 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9167   for (const VarDecl *VD : DeferredGlobalVariables) {
9168     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9169         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9170     if (!Res)
9171       continue;
9172     if (*Res == OMPDeclareTargetDeclAttr::MT_To) {
9173       CGM.EmitGlobal(VD);
9174     } else {
9175       assert(*Res == OMPDeclareTargetDeclAttr::MT_Link &&
9176              "Expected to or link clauses.");
9177       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
9178     }
9179   }
9180 }
9181 
9182 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9183     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9184   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9185          " Expected target-based directive.");
9186 }
9187 
9188 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9189                                                        LangAS &AS) {
9190   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9191     return false;
9192   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9193   switch(A->getAllocatorType()) {
9194   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9195   // Not supported, fallback to the default mem space.
9196   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9197   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9198   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9199   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9200   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9201   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9202   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9203     AS = LangAS::Default;
9204     return true;
9205   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9206     llvm_unreachable("Expected predefined allocator for the variables with the "
9207                      "static storage.");
9208   }
9209   return false;
9210 }
9211 
9212 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9213     CodeGenModule &CGM)
9214     : CGM(CGM) {
9215   if (CGM.getLangOpts().OpenMPIsDevice) {
9216     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9217     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9218   }
9219 }
9220 
9221 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9222   if (CGM.getLangOpts().OpenMPIsDevice)
9223     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9224 }
9225 
9226 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9227   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9228     return true;
9229 
9230   StringRef Name = CGM.getMangledName(GD);
9231   const auto *D = cast<FunctionDecl>(GD.getDecl());
9232   // Do not to emit function if it is marked as declare target as it was already
9233   // emitted.
9234   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9235     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9236       if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9237         return !F->isDeclaration();
9238       return false;
9239     }
9240     return true;
9241   }
9242 
9243   return !AlreadyEmittedTargetFunctions.insert(Name).second;
9244 }
9245 
9246 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
9247   // If we have offloading in the current module, we need to emit the entries
9248   // now and register the offloading descriptor.
9249   createOffloadEntriesAndInfoMetadata();
9250 
9251   // Create and register the offloading binary descriptors. This is the main
9252   // entity that captures all the information about offloading in the current
9253   // compilation unit.
9254   return createOffloadingBinaryDescriptorRegistration();
9255 }
9256 
9257 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9258                                     const OMPExecutableDirective &D,
9259                                     SourceLocation Loc,
9260                                     llvm::Function *OutlinedFn,
9261                                     ArrayRef<llvm::Value *> CapturedVars) {
9262   if (!CGF.HaveInsertPoint())
9263     return;
9264 
9265   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9266   CodeGenFunction::RunCleanupsScope Scope(CGF);
9267 
9268   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9269   llvm::Value *Args[] = {
9270       RTLoc,
9271       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9272       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9273   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9274   RealArgs.append(std::begin(Args), std::end(Args));
9275   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9276 
9277   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9278   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9279 }
9280 
9281 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9282                                          const Expr *NumTeams,
9283                                          const Expr *ThreadLimit,
9284                                          SourceLocation Loc) {
9285   if (!CGF.HaveInsertPoint())
9286     return;
9287 
9288   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9289 
9290   llvm::Value *NumTeamsVal =
9291       NumTeams
9292           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9293                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9294           : CGF.Builder.getInt32(0);
9295 
9296   llvm::Value *ThreadLimitVal =
9297       ThreadLimit
9298           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9299                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9300           : CGF.Builder.getInt32(0);
9301 
9302   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9303   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9304                                      ThreadLimitVal};
9305   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9306                       PushNumTeamsArgs);
9307 }
9308 
9309 void CGOpenMPRuntime::emitTargetDataCalls(
9310     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9311     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9312   if (!CGF.HaveInsertPoint())
9313     return;
9314 
9315   // Action used to replace the default codegen action and turn privatization
9316   // off.
9317   PrePostActionTy NoPrivAction;
9318 
9319   // Generate the code for the opening of the data environment. Capture all the
9320   // arguments of the runtime call by reference because they are used in the
9321   // closing of the region.
9322   auto &&BeginThenGen = [this, &D, Device, &Info,
9323                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9324     // Fill up the arrays with all the mapped variables.
9325     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9326     MappableExprsHandler::MapValuesArrayTy Pointers;
9327     MappableExprsHandler::MapValuesArrayTy Sizes;
9328     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9329 
9330     // Get map clause information.
9331     MappableExprsHandler MCHandler(D, CGF);
9332     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9333 
9334     // Fill up the arrays and create the arguments.
9335     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9336 
9337     llvm::Value *BasePointersArrayArg = nullptr;
9338     llvm::Value *PointersArrayArg = nullptr;
9339     llvm::Value *SizesArrayArg = nullptr;
9340     llvm::Value *MapTypesArrayArg = nullptr;
9341     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9342                                  SizesArrayArg, MapTypesArrayArg, Info);
9343 
9344     // Emit device ID if any.
9345     llvm::Value *DeviceID = nullptr;
9346     if (Device) {
9347       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9348                                            CGF.Int64Ty, /*isSigned=*/true);
9349     } else {
9350       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9351     }
9352 
9353     // Emit the number of elements in the offloading arrays.
9354     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9355 
9356     llvm::Value *OffloadingArgs[] = {
9357         DeviceID,         PointerNum,    BasePointersArrayArg,
9358         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9359     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
9360                         OffloadingArgs);
9361 
9362     // If device pointer privatization is required, emit the body of the region
9363     // here. It will have to be duplicated: with and without privatization.
9364     if (!Info.CaptureDeviceAddrMap.empty())
9365       CodeGen(CGF);
9366   };
9367 
9368   // Generate code for the closing of the data region.
9369   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
9370                                             PrePostActionTy &) {
9371     assert(Info.isValid() && "Invalid data environment closing arguments.");
9372 
9373     llvm::Value *BasePointersArrayArg = nullptr;
9374     llvm::Value *PointersArrayArg = nullptr;
9375     llvm::Value *SizesArrayArg = nullptr;
9376     llvm::Value *MapTypesArrayArg = nullptr;
9377     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9378                                  SizesArrayArg, MapTypesArrayArg, Info);
9379 
9380     // Emit device ID if any.
9381     llvm::Value *DeviceID = nullptr;
9382     if (Device) {
9383       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9384                                            CGF.Int64Ty, /*isSigned=*/true);
9385     } else {
9386       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9387     }
9388 
9389     // Emit the number of elements in the offloading arrays.
9390     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9391 
9392     llvm::Value *OffloadingArgs[] = {
9393         DeviceID,         PointerNum,    BasePointersArrayArg,
9394         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9395     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
9396                         OffloadingArgs);
9397   };
9398 
9399   // If we need device pointer privatization, we need to emit the body of the
9400   // region with no privatization in the 'else' branch of the conditional.
9401   // Otherwise, we don't have to do anything.
9402   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
9403                                                          PrePostActionTy &) {
9404     if (!Info.CaptureDeviceAddrMap.empty()) {
9405       CodeGen.setAction(NoPrivAction);
9406       CodeGen(CGF);
9407     }
9408   };
9409 
9410   // We don't have to do anything to close the region if the if clause evaluates
9411   // to false.
9412   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
9413 
9414   if (IfCond) {
9415     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
9416   } else {
9417     RegionCodeGenTy RCG(BeginThenGen);
9418     RCG(CGF);
9419   }
9420 
9421   // If we don't require privatization of device pointers, we emit the body in
9422   // between the runtime calls. This avoids duplicating the body code.
9423   if (Info.CaptureDeviceAddrMap.empty()) {
9424     CodeGen.setAction(NoPrivAction);
9425     CodeGen(CGF);
9426   }
9427 
9428   if (IfCond) {
9429     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
9430   } else {
9431     RegionCodeGenTy RCG(EndThenGen);
9432     RCG(CGF);
9433   }
9434 }
9435 
9436 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
9437     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9438     const Expr *Device) {
9439   if (!CGF.HaveInsertPoint())
9440     return;
9441 
9442   assert((isa<OMPTargetEnterDataDirective>(D) ||
9443           isa<OMPTargetExitDataDirective>(D) ||
9444           isa<OMPTargetUpdateDirective>(D)) &&
9445          "Expecting either target enter, exit data, or update directives.");
9446 
9447   CodeGenFunction::OMPTargetDataInfo InputInfo;
9448   llvm::Value *MapTypesArray = nullptr;
9449   // Generate the code for the opening of the data environment.
9450   auto &&ThenGen = [this, &D, Device, &InputInfo,
9451                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
9452     // Emit device ID if any.
9453     llvm::Value *DeviceID = nullptr;
9454     if (Device) {
9455       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9456                                            CGF.Int64Ty, /*isSigned=*/true);
9457     } else {
9458       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9459     }
9460 
9461     // Emit the number of elements in the offloading arrays.
9462     llvm::Constant *PointerNum =
9463         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9464 
9465     llvm::Value *OffloadingArgs[] = {DeviceID,
9466                                      PointerNum,
9467                                      InputInfo.BasePointersArray.getPointer(),
9468                                      InputInfo.PointersArray.getPointer(),
9469                                      InputInfo.SizesArray.getPointer(),
9470                                      MapTypesArray};
9471 
9472     // Select the right runtime function call for each expected standalone
9473     // directive.
9474     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9475     OpenMPRTLFunction RTLFn;
9476     switch (D.getDirectiveKind()) {
9477     case OMPD_target_enter_data:
9478       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
9479                         : OMPRTL__tgt_target_data_begin;
9480       break;
9481     case OMPD_target_exit_data:
9482       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
9483                         : OMPRTL__tgt_target_data_end;
9484       break;
9485     case OMPD_target_update:
9486       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
9487                         : OMPRTL__tgt_target_data_update;
9488       break;
9489     case OMPD_parallel:
9490     case OMPD_for:
9491     case OMPD_parallel_for:
9492     case OMPD_parallel_sections:
9493     case OMPD_for_simd:
9494     case OMPD_parallel_for_simd:
9495     case OMPD_cancel:
9496     case OMPD_cancellation_point:
9497     case OMPD_ordered:
9498     case OMPD_threadprivate:
9499     case OMPD_allocate:
9500     case OMPD_task:
9501     case OMPD_simd:
9502     case OMPD_sections:
9503     case OMPD_section:
9504     case OMPD_single:
9505     case OMPD_master:
9506     case OMPD_critical:
9507     case OMPD_taskyield:
9508     case OMPD_barrier:
9509     case OMPD_taskwait:
9510     case OMPD_taskgroup:
9511     case OMPD_atomic:
9512     case OMPD_flush:
9513     case OMPD_teams:
9514     case OMPD_target_data:
9515     case OMPD_distribute:
9516     case OMPD_distribute_simd:
9517     case OMPD_distribute_parallel_for:
9518     case OMPD_distribute_parallel_for_simd:
9519     case OMPD_teams_distribute:
9520     case OMPD_teams_distribute_simd:
9521     case OMPD_teams_distribute_parallel_for:
9522     case OMPD_teams_distribute_parallel_for_simd:
9523     case OMPD_declare_simd:
9524     case OMPD_declare_target:
9525     case OMPD_end_declare_target:
9526     case OMPD_declare_reduction:
9527     case OMPD_declare_mapper:
9528     case OMPD_taskloop:
9529     case OMPD_taskloop_simd:
9530     case OMPD_target:
9531     case OMPD_target_simd:
9532     case OMPD_target_teams_distribute:
9533     case OMPD_target_teams_distribute_simd:
9534     case OMPD_target_teams_distribute_parallel_for:
9535     case OMPD_target_teams_distribute_parallel_for_simd:
9536     case OMPD_target_teams:
9537     case OMPD_target_parallel:
9538     case OMPD_target_parallel_for:
9539     case OMPD_target_parallel_for_simd:
9540     case OMPD_requires:
9541     case OMPD_unknown:
9542       llvm_unreachable("Unexpected standalone target data directive.");
9543       break;
9544     }
9545     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
9546   };
9547 
9548   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
9549                              CodeGenFunction &CGF, PrePostActionTy &) {
9550     // Fill up the arrays with all the mapped variables.
9551     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9552     MappableExprsHandler::MapValuesArrayTy Pointers;
9553     MappableExprsHandler::MapValuesArrayTy Sizes;
9554     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9555 
9556     // Get map clause information.
9557     MappableExprsHandler MEHandler(D, CGF);
9558     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9559 
9560     TargetDataInfo Info;
9561     // Fill up the arrays and create the arguments.
9562     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9563     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9564                                  Info.PointersArray, Info.SizesArray,
9565                                  Info.MapTypesArray, Info);
9566     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9567     InputInfo.BasePointersArray =
9568         Address(Info.BasePointersArray, CGM.getPointerAlign());
9569     InputInfo.PointersArray =
9570         Address(Info.PointersArray, CGM.getPointerAlign());
9571     InputInfo.SizesArray =
9572         Address(Info.SizesArray, CGM.getPointerAlign());
9573     MapTypesArray = Info.MapTypesArray;
9574     if (D.hasClausesOfKind<OMPDependClause>())
9575       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9576     else
9577       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9578   };
9579 
9580   if (IfCond) {
9581     emitOMPIfClause(CGF, IfCond, TargetThenGen,
9582                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
9583   } else {
9584     RegionCodeGenTy ThenRCG(TargetThenGen);
9585     ThenRCG(CGF);
9586   }
9587 }
9588 
9589 namespace {
9590   /// Kind of parameter in a function with 'declare simd' directive.
9591   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
9592   /// Attribute set of the parameter.
9593   struct ParamAttrTy {
9594     ParamKindTy Kind = Vector;
9595     llvm::APSInt StrideOrArg;
9596     llvm::APSInt Alignment;
9597   };
9598 } // namespace
9599 
9600 static unsigned evaluateCDTSize(const FunctionDecl *FD,
9601                                 ArrayRef<ParamAttrTy> ParamAttrs) {
9602   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
9603   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
9604   // of that clause. The VLEN value must be power of 2.
9605   // In other case the notion of the function`s "characteristic data type" (CDT)
9606   // is used to compute the vector length.
9607   // CDT is defined in the following order:
9608   //   a) For non-void function, the CDT is the return type.
9609   //   b) If the function has any non-uniform, non-linear parameters, then the
9610   //   CDT is the type of the first such parameter.
9611   //   c) If the CDT determined by a) or b) above is struct, union, or class
9612   //   type which is pass-by-value (except for the type that maps to the
9613   //   built-in complex data type), the characteristic data type is int.
9614   //   d) If none of the above three cases is applicable, the CDT is int.
9615   // The VLEN is then determined based on the CDT and the size of vector
9616   // register of that ISA for which current vector version is generated. The
9617   // VLEN is computed using the formula below:
9618   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
9619   // where vector register size specified in section 3.2.1 Registers and the
9620   // Stack Frame of original AMD64 ABI document.
9621   QualType RetType = FD->getReturnType();
9622   if (RetType.isNull())
9623     return 0;
9624   ASTContext &C = FD->getASTContext();
9625   QualType CDT;
9626   if (!RetType.isNull() && !RetType->isVoidType()) {
9627     CDT = RetType;
9628   } else {
9629     unsigned Offset = 0;
9630     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
9631       if (ParamAttrs[Offset].Kind == Vector)
9632         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
9633       ++Offset;
9634     }
9635     if (CDT.isNull()) {
9636       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9637         if (ParamAttrs[I + Offset].Kind == Vector) {
9638           CDT = FD->getParamDecl(I)->getType();
9639           break;
9640         }
9641       }
9642     }
9643   }
9644   if (CDT.isNull())
9645     CDT = C.IntTy;
9646   CDT = CDT->getCanonicalTypeUnqualified();
9647   if (CDT->isRecordType() || CDT->isUnionType())
9648     CDT = C.IntTy;
9649   return C.getTypeSize(CDT);
9650 }
9651 
9652 static void
9653 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
9654                            const llvm::APSInt &VLENVal,
9655                            ArrayRef<ParamAttrTy> ParamAttrs,
9656                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
9657   struct ISADataTy {
9658     char ISA;
9659     unsigned VecRegSize;
9660   };
9661   ISADataTy ISAData[] = {
9662       {
9663           'b', 128
9664       }, // SSE
9665       {
9666           'c', 256
9667       }, // AVX
9668       {
9669           'd', 256
9670       }, // AVX2
9671       {
9672           'e', 512
9673       }, // AVX512
9674   };
9675   llvm::SmallVector<char, 2> Masked;
9676   switch (State) {
9677   case OMPDeclareSimdDeclAttr::BS_Undefined:
9678     Masked.push_back('N');
9679     Masked.push_back('M');
9680     break;
9681   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
9682     Masked.push_back('N');
9683     break;
9684   case OMPDeclareSimdDeclAttr::BS_Inbranch:
9685     Masked.push_back('M');
9686     break;
9687   }
9688   for (char Mask : Masked) {
9689     for (const ISADataTy &Data : ISAData) {
9690       SmallString<256> Buffer;
9691       llvm::raw_svector_ostream Out(Buffer);
9692       Out << "_ZGV" << Data.ISA << Mask;
9693       if (!VLENVal) {
9694         Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
9695                                          evaluateCDTSize(FD, ParamAttrs));
9696       } else {
9697         Out << VLENVal;
9698       }
9699       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
9700         switch (ParamAttr.Kind){
9701         case LinearWithVarStride:
9702           Out << 's' << ParamAttr.StrideOrArg;
9703           break;
9704         case Linear:
9705           Out << 'l';
9706           if (!!ParamAttr.StrideOrArg)
9707             Out << ParamAttr.StrideOrArg;
9708           break;
9709         case Uniform:
9710           Out << 'u';
9711           break;
9712         case Vector:
9713           Out << 'v';
9714           break;
9715         }
9716         if (!!ParamAttr.Alignment)
9717           Out << 'a' << ParamAttr.Alignment;
9718       }
9719       Out << '_' << Fn->getName();
9720       Fn->addFnAttr(Out.str());
9721     }
9722   }
9723 }
9724 
9725 // This are the Functions that are needed to mangle the name of the
9726 // vector functions generated by the compiler, according to the rules
9727 // defined in the "Vector Function ABI specifications for AArch64",
9728 // available at
9729 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
9730 
9731 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
9732 ///
9733 /// TODO: Need to implement the behavior for reference marked with a
9734 /// var or no linear modifiers (1.b in the section). For this, we
9735 /// need to extend ParamKindTy to support the linear modifiers.
9736 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
9737   QT = QT.getCanonicalType();
9738 
9739   if (QT->isVoidType())
9740     return false;
9741 
9742   if (Kind == ParamKindTy::Uniform)
9743     return false;
9744 
9745   if (Kind == ParamKindTy::Linear)
9746     return false;
9747 
9748   // TODO: Handle linear references with modifiers
9749 
9750   if (Kind == ParamKindTy::LinearWithVarStride)
9751     return false;
9752 
9753   return true;
9754 }
9755 
9756 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
9757 static bool getAArch64PBV(QualType QT, ASTContext &C) {
9758   QT = QT.getCanonicalType();
9759   unsigned Size = C.getTypeSize(QT);
9760 
9761   // Only scalars and complex within 16 bytes wide set PVB to true.
9762   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
9763     return false;
9764 
9765   if (QT->isFloatingType())
9766     return true;
9767 
9768   if (QT->isIntegerType())
9769     return true;
9770 
9771   if (QT->isPointerType())
9772     return true;
9773 
9774   // TODO: Add support for complex types (section 3.1.2, item 2).
9775 
9776   return false;
9777 }
9778 
9779 /// Computes the lane size (LS) of a return type or of an input parameter,
9780 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
9781 /// TODO: Add support for references, section 3.2.1, item 1.
9782 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
9783   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
9784     QualType PTy = QT.getCanonicalType()->getPointeeType();
9785     if (getAArch64PBV(PTy, C))
9786       return C.getTypeSize(PTy);
9787   }
9788   if (getAArch64PBV(QT, C))
9789     return C.getTypeSize(QT);
9790 
9791   return C.getTypeSize(C.getUIntPtrType());
9792 }
9793 
9794 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
9795 // signature of the scalar function, as defined in 3.2.2 of the
9796 // AAVFABI.
9797 static std::tuple<unsigned, unsigned, bool>
9798 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
9799   QualType RetType = FD->getReturnType().getCanonicalType();
9800 
9801   ASTContext &C = FD->getASTContext();
9802 
9803   bool OutputBecomesInput = false;
9804 
9805   llvm::SmallVector<unsigned, 8> Sizes;
9806   if (!RetType->isVoidType()) {
9807     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
9808     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
9809       OutputBecomesInput = true;
9810   }
9811   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9812     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
9813     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
9814   }
9815 
9816   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
9817   // The LS of a function parameter / return value can only be a power
9818   // of 2, starting from 8 bits, up to 128.
9819   assert(std::all_of(Sizes.begin(), Sizes.end(),
9820                      [](unsigned Size) {
9821                        return Size == 8 || Size == 16 || Size == 32 ||
9822                               Size == 64 || Size == 128;
9823                      }) &&
9824          "Invalid size");
9825 
9826   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
9827                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
9828                          OutputBecomesInput);
9829 }
9830 
9831 /// Mangle the parameter part of the vector function name according to
9832 /// their OpenMP classification. The mangling function is defined in
9833 /// section 3.5 of the AAVFABI.
9834 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
9835   SmallString<256> Buffer;
9836   llvm::raw_svector_ostream Out(Buffer);
9837   for (const auto &ParamAttr : ParamAttrs) {
9838     switch (ParamAttr.Kind) {
9839     case LinearWithVarStride:
9840       Out << "ls" << ParamAttr.StrideOrArg;
9841       break;
9842     case Linear:
9843       Out << 'l';
9844       // Don't print the step value if it is not present or if it is
9845       // equal to 1.
9846       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
9847         Out << ParamAttr.StrideOrArg;
9848       break;
9849     case Uniform:
9850       Out << 'u';
9851       break;
9852     case Vector:
9853       Out << 'v';
9854       break;
9855     }
9856 
9857     if (!!ParamAttr.Alignment)
9858       Out << 'a' << ParamAttr.Alignment;
9859   }
9860 
9861   return Out.str();
9862 }
9863 
9864 // Function used to add the attribute. The parameter `VLEN` is
9865 // templated to allow the use of "x" when targeting scalable functions
9866 // for SVE.
9867 template <typename T>
9868 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
9869                                  char ISA, StringRef ParSeq,
9870                                  StringRef MangledName, bool OutputBecomesInput,
9871                                  llvm::Function *Fn) {
9872   SmallString<256> Buffer;
9873   llvm::raw_svector_ostream Out(Buffer);
9874   Out << Prefix << ISA << LMask << VLEN;
9875   if (OutputBecomesInput)
9876     Out << "v";
9877   Out << ParSeq << "_" << MangledName;
9878   Fn->addFnAttr(Out.str());
9879 }
9880 
9881 // Helper function to generate the Advanced SIMD names depending on
9882 // the value of the NDS when simdlen is not present.
9883 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
9884                                       StringRef Prefix, char ISA,
9885                                       StringRef ParSeq, StringRef MangledName,
9886                                       bool OutputBecomesInput,
9887                                       llvm::Function *Fn) {
9888   switch (NDS) {
9889   case 8:
9890     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
9891                          OutputBecomesInput, Fn);
9892     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
9893                          OutputBecomesInput, Fn);
9894     break;
9895   case 16:
9896     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
9897                          OutputBecomesInput, Fn);
9898     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
9899                          OutputBecomesInput, Fn);
9900     break;
9901   case 32:
9902     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
9903                          OutputBecomesInput, Fn);
9904     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
9905                          OutputBecomesInput, Fn);
9906     break;
9907   case 64:
9908   case 128:
9909     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
9910                          OutputBecomesInput, Fn);
9911     break;
9912   default:
9913     llvm_unreachable("Scalar type is too wide.");
9914   }
9915 }
9916 
9917 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
9918 static void emitAArch64DeclareSimdFunction(
9919     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
9920     ArrayRef<ParamAttrTy> ParamAttrs,
9921     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
9922     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
9923 
9924   // Get basic data for building the vector signature.
9925   const auto Data = getNDSWDS(FD, ParamAttrs);
9926   const unsigned NDS = std::get<0>(Data);
9927   const unsigned WDS = std::get<1>(Data);
9928   const bool OutputBecomesInput = std::get<2>(Data);
9929 
9930   // Check the values provided via `simdlen` by the user.
9931   // 1. A `simdlen(1)` doesn't produce vector signatures,
9932   if (UserVLEN == 1) {
9933     unsigned DiagID = CGM.getDiags().getCustomDiagID(
9934         DiagnosticsEngine::Warning,
9935         "The clause simdlen(1) has no effect when targeting aarch64.");
9936     CGM.getDiags().Report(SLoc, DiagID);
9937     return;
9938   }
9939 
9940   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
9941   // Advanced SIMD output.
9942   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
9943     unsigned DiagID = CGM.getDiags().getCustomDiagID(
9944         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
9945                                     "power of 2 when targeting Advanced SIMD.");
9946     CGM.getDiags().Report(SLoc, DiagID);
9947     return;
9948   }
9949 
9950   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
9951   // limits.
9952   if (ISA == 's' && UserVLEN != 0) {
9953     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
9954       unsigned DiagID = CGM.getDiags().getCustomDiagID(
9955           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
9956                                       "lanes in the architectural constraints "
9957                                       "for SVE (min is 128-bit, max is "
9958                                       "2048-bit, by steps of 128-bit)");
9959       CGM.getDiags().Report(SLoc, DiagID) << WDS;
9960       return;
9961     }
9962   }
9963 
9964   // Sort out parameter sequence.
9965   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
9966   StringRef Prefix = "_ZGV";
9967   // Generate simdlen from user input (if any).
9968   if (UserVLEN) {
9969     if (ISA == 's') {
9970       // SVE generates only a masked function.
9971       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
9972                            OutputBecomesInput, Fn);
9973     } else {
9974       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
9975       // Advanced SIMD generates one or two functions, depending on
9976       // the `[not]inbranch` clause.
9977       switch (State) {
9978       case OMPDeclareSimdDeclAttr::BS_Undefined:
9979         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
9980                              OutputBecomesInput, Fn);
9981         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
9982                              OutputBecomesInput, Fn);
9983         break;
9984       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
9985         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
9986                              OutputBecomesInput, Fn);
9987         break;
9988       case OMPDeclareSimdDeclAttr::BS_Inbranch:
9989         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
9990                              OutputBecomesInput, Fn);
9991         break;
9992       }
9993     }
9994   } else {
9995     // If no user simdlen is provided, follow the AAVFABI rules for
9996     // generating the vector length.
9997     if (ISA == 's') {
9998       // SVE, section 3.4.1, item 1.
9999       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10000                            OutputBecomesInput, Fn);
10001     } else {
10002       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10003       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10004       // two vector names depending on the use of the clause
10005       // `[not]inbranch`.
10006       switch (State) {
10007       case OMPDeclareSimdDeclAttr::BS_Undefined:
10008         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10009                                   OutputBecomesInput, Fn);
10010         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10011                                   OutputBecomesInput, Fn);
10012         break;
10013       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10014         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10015                                   OutputBecomesInput, Fn);
10016         break;
10017       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10018         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10019                                   OutputBecomesInput, Fn);
10020         break;
10021       }
10022     }
10023   }
10024 }
10025 
10026 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10027                                               llvm::Function *Fn) {
10028   ASTContext &C = CGM.getContext();
10029   FD = FD->getMostRecentDecl();
10030   // Map params to their positions in function decl.
10031   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10032   if (isa<CXXMethodDecl>(FD))
10033     ParamPositions.try_emplace(FD, 0);
10034   unsigned ParamPos = ParamPositions.size();
10035   for (const ParmVarDecl *P : FD->parameters()) {
10036     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10037     ++ParamPos;
10038   }
10039   while (FD) {
10040     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10041       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10042       // Mark uniform parameters.
10043       for (const Expr *E : Attr->uniforms()) {
10044         E = E->IgnoreParenImpCasts();
10045         unsigned Pos;
10046         if (isa<CXXThisExpr>(E)) {
10047           Pos = ParamPositions[FD];
10048         } else {
10049           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10050                                 ->getCanonicalDecl();
10051           Pos = ParamPositions[PVD];
10052         }
10053         ParamAttrs[Pos].Kind = Uniform;
10054       }
10055       // Get alignment info.
10056       auto NI = Attr->alignments_begin();
10057       for (const Expr *E : Attr->aligneds()) {
10058         E = E->IgnoreParenImpCasts();
10059         unsigned Pos;
10060         QualType ParmTy;
10061         if (isa<CXXThisExpr>(E)) {
10062           Pos = ParamPositions[FD];
10063           ParmTy = E->getType();
10064         } else {
10065           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10066                                 ->getCanonicalDecl();
10067           Pos = ParamPositions[PVD];
10068           ParmTy = PVD->getType();
10069         }
10070         ParamAttrs[Pos].Alignment =
10071             (*NI)
10072                 ? (*NI)->EvaluateKnownConstInt(C)
10073                 : llvm::APSInt::getUnsigned(
10074                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10075                           .getQuantity());
10076         ++NI;
10077       }
10078       // Mark linear parameters.
10079       auto SI = Attr->steps_begin();
10080       auto MI = Attr->modifiers_begin();
10081       for (const Expr *E : Attr->linears()) {
10082         E = E->IgnoreParenImpCasts();
10083         unsigned Pos;
10084         if (isa<CXXThisExpr>(E)) {
10085           Pos = ParamPositions[FD];
10086         } else {
10087           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10088                                 ->getCanonicalDecl();
10089           Pos = ParamPositions[PVD];
10090         }
10091         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10092         ParamAttr.Kind = Linear;
10093         if (*SI) {
10094           Expr::EvalResult Result;
10095           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10096             if (const auto *DRE =
10097                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10098               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10099                 ParamAttr.Kind = LinearWithVarStride;
10100                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10101                     ParamPositions[StridePVD->getCanonicalDecl()]);
10102               }
10103             }
10104           } else {
10105             ParamAttr.StrideOrArg = Result.Val.getInt();
10106           }
10107         }
10108         ++SI;
10109         ++MI;
10110       }
10111       llvm::APSInt VLENVal;
10112       SourceLocation ExprLoc;
10113       const Expr *VLENExpr = Attr->getSimdlen();
10114       if (VLENExpr) {
10115         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10116         ExprLoc = VLENExpr->getExprLoc();
10117       }
10118       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10119       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10120           CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10121         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10122       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10123         unsigned VLEN = VLENVal.getExtValue();
10124         StringRef MangledName = Fn->getName();
10125         if (CGM.getTarget().hasFeature("sve"))
10126           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10127                                          MangledName, 's', 128, Fn, ExprLoc);
10128         if (CGM.getTarget().hasFeature("neon"))
10129           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10130                                          MangledName, 'n', 128, Fn, ExprLoc);
10131       }
10132     }
10133     FD = FD->getPreviousDecl();
10134   }
10135 }
10136 
10137 namespace {
10138 /// Cleanup action for doacross support.
10139 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10140 public:
10141   static const int DoacrossFinArgs = 2;
10142 
10143 private:
10144   llvm::FunctionCallee RTLFn;
10145   llvm::Value *Args[DoacrossFinArgs];
10146 
10147 public:
10148   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10149                     ArrayRef<llvm::Value *> CallArgs)
10150       : RTLFn(RTLFn) {
10151     assert(CallArgs.size() == DoacrossFinArgs);
10152     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10153   }
10154   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10155     if (!CGF.HaveInsertPoint())
10156       return;
10157     CGF.EmitRuntimeCall(RTLFn, Args);
10158   }
10159 };
10160 } // namespace
10161 
10162 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10163                                        const OMPLoopDirective &D,
10164                                        ArrayRef<Expr *> NumIterations) {
10165   if (!CGF.HaveInsertPoint())
10166     return;
10167 
10168   ASTContext &C = CGM.getContext();
10169   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10170   RecordDecl *RD;
10171   if (KmpDimTy.isNull()) {
10172     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10173     //  kmp_int64 lo; // lower
10174     //  kmp_int64 up; // upper
10175     //  kmp_int64 st; // stride
10176     // };
10177     RD = C.buildImplicitRecord("kmp_dim");
10178     RD->startDefinition();
10179     addFieldToRecordDecl(C, RD, Int64Ty);
10180     addFieldToRecordDecl(C, RD, Int64Ty);
10181     addFieldToRecordDecl(C, RD, Int64Ty);
10182     RD->completeDefinition();
10183     KmpDimTy = C.getRecordType(RD);
10184   } else {
10185     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10186   }
10187   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10188   QualType ArrayTy =
10189       C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
10190 
10191   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10192   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10193   enum { LowerFD = 0, UpperFD, StrideFD };
10194   // Fill dims with data.
10195   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10196     LValue DimsLVal = CGF.MakeAddrLValue(
10197         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10198     // dims.upper = num_iterations;
10199     LValue UpperLVal = CGF.EmitLValueForField(
10200         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10201     llvm::Value *NumIterVal =
10202         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10203                                  D.getNumIterations()->getType(), Int64Ty,
10204                                  D.getNumIterations()->getExprLoc());
10205     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10206     // dims.stride = 1;
10207     LValue StrideLVal = CGF.EmitLValueForField(
10208         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10209     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10210                           StrideLVal);
10211   }
10212 
10213   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10214   // kmp_int32 num_dims, struct kmp_dim * dims);
10215   llvm::Value *Args[] = {
10216       emitUpdateLocation(CGF, D.getBeginLoc()),
10217       getThreadID(CGF, D.getBeginLoc()),
10218       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10219       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10220           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10221           CGM.VoidPtrTy)};
10222 
10223   llvm::FunctionCallee RTLFn =
10224       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10225   CGF.EmitRuntimeCall(RTLFn, Args);
10226   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10227       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10228   llvm::FunctionCallee FiniRTLFn =
10229       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10230   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10231                                              llvm::makeArrayRef(FiniArgs));
10232 }
10233 
10234 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10235                                           const OMPDependClause *C) {
10236   QualType Int64Ty =
10237       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10238   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10239   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10240       Int64Ty, Size, ArrayType::Normal, 0);
10241   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10242   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10243     const Expr *CounterVal = C->getLoopData(I);
10244     assert(CounterVal);
10245     llvm::Value *CntVal = CGF.EmitScalarConversion(
10246         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10247         CounterVal->getExprLoc());
10248     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10249                           /*Volatile=*/false, Int64Ty);
10250   }
10251   llvm::Value *Args[] = {
10252       emitUpdateLocation(CGF, C->getBeginLoc()),
10253       getThreadID(CGF, C->getBeginLoc()),
10254       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10255   llvm::FunctionCallee RTLFn;
10256   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10257     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10258   } else {
10259     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10260     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10261   }
10262   CGF.EmitRuntimeCall(RTLFn, Args);
10263 }
10264 
10265 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10266                                llvm::FunctionCallee Callee,
10267                                ArrayRef<llvm::Value *> Args) const {
10268   assert(Loc.isValid() && "Outlined function call location must be valid.");
10269   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10270 
10271   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10272     if (Fn->doesNotThrow()) {
10273       CGF.EmitNounwindRuntimeCall(Fn, Args);
10274       return;
10275     }
10276   }
10277   CGF.EmitRuntimeCall(Callee, Args);
10278 }
10279 
10280 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10281     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10282     ArrayRef<llvm::Value *> Args) const {
10283   emitCall(CGF, Loc, OutlinedFn, Args);
10284 }
10285 
10286 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10287                                              const VarDecl *NativeParam,
10288                                              const VarDecl *TargetParam) const {
10289   return CGF.GetAddrOfLocalVar(NativeParam);
10290 }
10291 
10292 namespace {
10293 /// Cleanup action for allocate support.
10294 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10295 public:
10296   static const int CleanupArgs = 3;
10297 
10298 private:
10299   llvm::FunctionCallee RTLFn;
10300   llvm::Value *Args[CleanupArgs];
10301 
10302 public:
10303   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10304                        ArrayRef<llvm::Value *> CallArgs)
10305       : RTLFn(RTLFn) {
10306     assert(CallArgs.size() == CleanupArgs &&
10307            "Size of arguments does not match.");
10308     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10309   }
10310   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10311     if (!CGF.HaveInsertPoint())
10312       return;
10313     CGF.EmitRuntimeCall(RTLFn, Args);
10314   }
10315 };
10316 } // namespace
10317 
10318 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10319                                                    const VarDecl *VD) {
10320   if (!VD)
10321     return Address::invalid();
10322   const VarDecl *CVD = VD->getCanonicalDecl();
10323   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10324     return Address::invalid();
10325   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10326   // Use the default allocation.
10327   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10328       !AA->getAllocator())
10329     return Address::invalid();
10330   llvm::Value *Size;
10331   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10332   if (CVD->getType()->isVariablyModifiedType()) {
10333     Size = CGF.getTypeSize(CVD->getType());
10334     // Align the size: ((size + align - 1) / align) * align
10335     Size = CGF.Builder.CreateNUWAdd(
10336         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10337     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10338     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10339   } else {
10340     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10341     Size = CGM.getSize(Sz.alignTo(Align));
10342   }
10343   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10344   assert(AA->getAllocator() &&
10345          "Expected allocator expression for non-default allocator.");
10346   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10347   // According to the standard, the original allocator type is a enum (integer).
10348   // Convert to pointer type, if required.
10349   if (Allocator->getType()->isIntegerTy())
10350     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10351   else if (Allocator->getType()->isPointerTy())
10352     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10353                                                                 CGM.VoidPtrTy);
10354   llvm::Value *Args[] = {ThreadID, Size, Allocator};
10355 
10356   llvm::Value *Addr =
10357       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
10358                           CVD->getName() + ".void.addr");
10359   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
10360                                                               Allocator};
10361   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
10362 
10363   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10364                                                 llvm::makeArrayRef(FiniArgs));
10365   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10366       Addr,
10367       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
10368       CVD->getName() + ".addr");
10369   return Address(Addr, Align);
10370 }
10371 
10372 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
10373     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10374     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10375   llvm_unreachable("Not supported in SIMD-only mode");
10376 }
10377 
10378 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
10379     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10380     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10381   llvm_unreachable("Not supported in SIMD-only mode");
10382 }
10383 
10384 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
10385     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10386     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
10387     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
10388     bool Tied, unsigned &NumberOfParts) {
10389   llvm_unreachable("Not supported in SIMD-only mode");
10390 }
10391 
10392 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
10393                                            SourceLocation Loc,
10394                                            llvm::Function *OutlinedFn,
10395                                            ArrayRef<llvm::Value *> CapturedVars,
10396                                            const Expr *IfCond) {
10397   llvm_unreachable("Not supported in SIMD-only mode");
10398 }
10399 
10400 void CGOpenMPSIMDRuntime::emitCriticalRegion(
10401     CodeGenFunction &CGF, StringRef CriticalName,
10402     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
10403     const Expr *Hint) {
10404   llvm_unreachable("Not supported in SIMD-only mode");
10405 }
10406 
10407 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
10408                                            const RegionCodeGenTy &MasterOpGen,
10409                                            SourceLocation Loc) {
10410   llvm_unreachable("Not supported in SIMD-only mode");
10411 }
10412 
10413 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
10414                                             SourceLocation Loc) {
10415   llvm_unreachable("Not supported in SIMD-only mode");
10416 }
10417 
10418 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
10419     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
10420     SourceLocation Loc) {
10421   llvm_unreachable("Not supported in SIMD-only mode");
10422 }
10423 
10424 void CGOpenMPSIMDRuntime::emitSingleRegion(
10425     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
10426     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
10427     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
10428     ArrayRef<const Expr *> AssignmentOps) {
10429   llvm_unreachable("Not supported in SIMD-only mode");
10430 }
10431 
10432 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
10433                                             const RegionCodeGenTy &OrderedOpGen,
10434                                             SourceLocation Loc,
10435                                             bool IsThreads) {
10436   llvm_unreachable("Not supported in SIMD-only mode");
10437 }
10438 
10439 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
10440                                           SourceLocation Loc,
10441                                           OpenMPDirectiveKind Kind,
10442                                           bool EmitChecks,
10443                                           bool ForceSimpleCall) {
10444   llvm_unreachable("Not supported in SIMD-only mode");
10445 }
10446 
10447 void CGOpenMPSIMDRuntime::emitForDispatchInit(
10448     CodeGenFunction &CGF, SourceLocation Loc,
10449     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
10450     bool Ordered, const DispatchRTInput &DispatchValues) {
10451   llvm_unreachable("Not supported in SIMD-only mode");
10452 }
10453 
10454 void CGOpenMPSIMDRuntime::emitForStaticInit(
10455     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
10456     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
10457   llvm_unreachable("Not supported in SIMD-only mode");
10458 }
10459 
10460 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
10461     CodeGenFunction &CGF, SourceLocation Loc,
10462     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
10463   llvm_unreachable("Not supported in SIMD-only mode");
10464 }
10465 
10466 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
10467                                                      SourceLocation Loc,
10468                                                      unsigned IVSize,
10469                                                      bool IVSigned) {
10470   llvm_unreachable("Not supported in SIMD-only mode");
10471 }
10472 
10473 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
10474                                               SourceLocation Loc,
10475                                               OpenMPDirectiveKind DKind) {
10476   llvm_unreachable("Not supported in SIMD-only mode");
10477 }
10478 
10479 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
10480                                               SourceLocation Loc,
10481                                               unsigned IVSize, bool IVSigned,
10482                                               Address IL, Address LB,
10483                                               Address UB, Address ST) {
10484   llvm_unreachable("Not supported in SIMD-only mode");
10485 }
10486 
10487 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
10488                                                llvm::Value *NumThreads,
10489                                                SourceLocation Loc) {
10490   llvm_unreachable("Not supported in SIMD-only mode");
10491 }
10492 
10493 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
10494                                              OpenMPProcBindClauseKind ProcBind,
10495                                              SourceLocation Loc) {
10496   llvm_unreachable("Not supported in SIMD-only mode");
10497 }
10498 
10499 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
10500                                                     const VarDecl *VD,
10501                                                     Address VDAddr,
10502                                                     SourceLocation Loc) {
10503   llvm_unreachable("Not supported in SIMD-only mode");
10504 }
10505 
10506 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
10507     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
10508     CodeGenFunction *CGF) {
10509   llvm_unreachable("Not supported in SIMD-only mode");
10510 }
10511 
10512 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
10513     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
10514   llvm_unreachable("Not supported in SIMD-only mode");
10515 }
10516 
10517 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
10518                                     ArrayRef<const Expr *> Vars,
10519                                     SourceLocation Loc) {
10520   llvm_unreachable("Not supported in SIMD-only mode");
10521 }
10522 
10523 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
10524                                        const OMPExecutableDirective &D,
10525                                        llvm::Function *TaskFunction,
10526                                        QualType SharedsTy, Address Shareds,
10527                                        const Expr *IfCond,
10528                                        const OMPTaskDataTy &Data) {
10529   llvm_unreachable("Not supported in SIMD-only mode");
10530 }
10531 
10532 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
10533     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
10534     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
10535     const Expr *IfCond, const OMPTaskDataTy &Data) {
10536   llvm_unreachable("Not supported in SIMD-only mode");
10537 }
10538 
10539 void CGOpenMPSIMDRuntime::emitReduction(
10540     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
10541     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
10542     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
10543   assert(Options.SimpleReduction && "Only simple reduction is expected.");
10544   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
10545                                  ReductionOps, Options);
10546 }
10547 
10548 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
10549     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
10550     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
10551   llvm_unreachable("Not supported in SIMD-only mode");
10552 }
10553 
10554 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
10555                                                   SourceLocation Loc,
10556                                                   ReductionCodeGen &RCG,
10557                                                   unsigned N) {
10558   llvm_unreachable("Not supported in SIMD-only mode");
10559 }
10560 
10561 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
10562                                                   SourceLocation Loc,
10563                                                   llvm::Value *ReductionsPtr,
10564                                                   LValue SharedLVal) {
10565   llvm_unreachable("Not supported in SIMD-only mode");
10566 }
10567 
10568 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
10569                                            SourceLocation Loc) {
10570   llvm_unreachable("Not supported in SIMD-only mode");
10571 }
10572 
10573 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
10574     CodeGenFunction &CGF, SourceLocation Loc,
10575     OpenMPDirectiveKind CancelRegion) {
10576   llvm_unreachable("Not supported in SIMD-only mode");
10577 }
10578 
10579 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
10580                                          SourceLocation Loc, const Expr *IfCond,
10581                                          OpenMPDirectiveKind CancelRegion) {
10582   llvm_unreachable("Not supported in SIMD-only mode");
10583 }
10584 
10585 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
10586     const OMPExecutableDirective &D, StringRef ParentName,
10587     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
10588     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
10589   llvm_unreachable("Not supported in SIMD-only mode");
10590 }
10591 
10592 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
10593                                          const OMPExecutableDirective &D,
10594                                          llvm::Function *OutlinedFn,
10595                                          llvm::Value *OutlinedFnID,
10596                                          const Expr *IfCond,
10597                                          const Expr *Device) {
10598   llvm_unreachable("Not supported in SIMD-only mode");
10599 }
10600 
10601 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
10602   llvm_unreachable("Not supported in SIMD-only mode");
10603 }
10604 
10605 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10606   llvm_unreachable("Not supported in SIMD-only mode");
10607 }
10608 
10609 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
10610   return false;
10611 }
10612 
10613 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
10614   return nullptr;
10615 }
10616 
10617 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
10618                                         const OMPExecutableDirective &D,
10619                                         SourceLocation Loc,
10620                                         llvm::Function *OutlinedFn,
10621                                         ArrayRef<llvm::Value *> CapturedVars) {
10622   llvm_unreachable("Not supported in SIMD-only mode");
10623 }
10624 
10625 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10626                                              const Expr *NumTeams,
10627                                              const Expr *ThreadLimit,
10628                                              SourceLocation Loc) {
10629   llvm_unreachable("Not supported in SIMD-only mode");
10630 }
10631 
10632 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
10633     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10634     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10635   llvm_unreachable("Not supported in SIMD-only mode");
10636 }
10637 
10638 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
10639     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10640     const Expr *Device) {
10641   llvm_unreachable("Not supported in SIMD-only mode");
10642 }
10643 
10644 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10645                                            const OMPLoopDirective &D,
10646                                            ArrayRef<Expr *> NumIterations) {
10647   llvm_unreachable("Not supported in SIMD-only mode");
10648 }
10649 
10650 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10651                                               const OMPDependClause *C) {
10652   llvm_unreachable("Not supported in SIMD-only mode");
10653 }
10654 
10655 const VarDecl *
10656 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
10657                                         const VarDecl *NativeParam) const {
10658   llvm_unreachable("Not supported in SIMD-only mode");
10659 }
10660 
10661 Address
10662 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
10663                                          const VarDecl *NativeParam,
10664                                          const VarDecl *TargetParam) const {
10665   llvm_unreachable("Not supported in SIMD-only mode");
10666 }
10667