1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/Basic/BitmaskEnum.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38   /// Kinds of OpenMP regions used in codegen.
39   enum CGOpenMPRegionKind {
40     /// Region with outlined function for standalone 'parallel'
41     /// directive.
42     ParallelOutlinedRegion,
43     /// Region with outlined function for standalone 'task' directive.
44     TaskOutlinedRegion,
45     /// Region for constructs that do not require function outlining,
46     /// like 'for', 'sections', 'atomic' etc. directives.
47     InlinedRegion,
48     /// Region with outlined function for standalone 'target' directive.
49     TargetRegion,
50   };
51 
52   CGOpenMPRegionInfo(const CapturedStmt &CS,
53                      const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63         Kind(Kind), HasCancel(HasCancel) {}
64 
65   /// Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69   /// Emit the captured statement body.
70   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72   /// Get an LValue for the current ThreadID variable.
73   /// \return LValue for thread id variable. This LValue always has type int32*.
74   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82   bool hasCancel() const { return HasCancel; }
83 
84   static bool classof(const CGCapturedStmtInfo *Info) {
85     return Info->getKind() == CR_OpenMP;
86   }
87 
88   ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91   CGOpenMPRegionKind RegionKind;
92   RegionCodeGenTy CodeGen;
93   OpenMPDirectiveKind Kind;
94   bool HasCancel;
95 };
96 
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101                              const RegionCodeGenTy &CodeGen,
102                              OpenMPDirectiveKind Kind, bool HasCancel,
103                              StringRef HelperName)
104       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105                            HasCancel),
106         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108   }
109 
110   /// Get a variable or parameter for storing global thread id
111   /// inside OpenMP construct.
112   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114   /// Get the name of the capture helper.
115   StringRef getHelperName() const override { return HelperName; }
116 
117   static bool classof(const CGCapturedStmtInfo *Info) {
118     return CGOpenMPRegionInfo::classof(Info) &&
119            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120                ParallelOutlinedRegion;
121   }
122 
123 private:
124   /// A variable or parameter storing global thread id for OpenMP
125   /// constructs.
126   const VarDecl *ThreadIDVar;
127   StringRef HelperName;
128 };
129 
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133   class UntiedTaskActionTy final : public PrePostActionTy {
134     bool Untied;
135     const VarDecl *PartIDVar;
136     const RegionCodeGenTy UntiedCodeGen;
137     llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139   public:
140     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141                        const RegionCodeGenTy &UntiedCodeGen)
142         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143     void Enter(CodeGenFunction &CGF) override {
144       if (Untied) {
145         // Emit task switching point.
146         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147             CGF.GetAddrOfLocalVar(PartIDVar),
148             PartIDVar->getType()->castAs<PointerType>());
149         llvm::Value *Res =
150             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190 
191   /// Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195   /// Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198   /// Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210 
211 private:
212   /// A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218 
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230   // Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236 
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244 
245   /// Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253 
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259 
260   /// Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267 
268   /// Get an LValue for the current ThreadID variable.
269   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272     llvm_unreachable("No LValue for inlined OpenMP construct");
273   }
274 
275   /// Get the name of the capture helper.
276   StringRef getHelperName() const override {
277     if (auto *OuterRegionInfo = getOldCSI())
278       return OuterRegionInfo->getHelperName();
279     llvm_unreachable("No helper name for inlined OpenMP construct");
280   }
281 
282   void emitUntiedSwitch(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       OuterRegionInfo->emitUntiedSwitch(CGF);
285   }
286 
287   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289   static bool classof(const CGCapturedStmtInfo *Info) {
290     return CGOpenMPRegionInfo::classof(Info) &&
291            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292   }
293 
294   ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297   /// CodeGen info about outer OpenMP region.
298   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299   CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
311       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312                            /*HasCancel=*/false),
313         HelperName(HelperName) {}
314 
315   /// This is unused for target regions because each starts executing
316   /// with a single thread.
317   const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319   /// Get the name of the capture helper.
320   StringRef getHelperName() const override { return HelperName; }
321 
322   static bool classof(const CGCapturedStmtInfo *Info) {
323     return CGOpenMPRegionInfo::classof(Info) &&
324            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325   }
326 
327 private:
328   StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332   llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340                                   OMPD_unknown,
341                                   /*HasCancel=*/false),
342         PrivScope(CGF) {
343     // Make sure the globals captured in the provided statement are local by
344     // using the privatization logic. We assume the same variable is not
345     // captured more than once.
346     for (const auto &C : CS.captures()) {
347       if (!C.capturesVariable() && !C.capturesVariableByCopy())
348         continue;
349 
350       const VarDecl *VD = C.getCapturedVar();
351       if (VD->isLocalVarDeclOrParm())
352         continue;
353 
354       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355                       /*RefersToEnclosingVariableOrCapture=*/false,
356                       VD->getType().getNonReferenceType(), VK_LValue,
357                       C.getLocation());
358       PrivScope.addPrivate(
359           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360     }
361     (void)PrivScope.Privatize();
362   }
363 
364   /// Lookup the captured field decl for a variable.
365   const FieldDecl *lookup(const VarDecl *VD) const override {
366     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367       return FD;
368     return nullptr;
369   }
370 
371   /// Emit the captured statement body.
372   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373     llvm_unreachable("No body for expressions");
374   }
375 
376   /// Get a variable or parameter for storing global thread id
377   /// inside OpenMP construct.
378   const VarDecl *getThreadIDVariable() const override {
379     llvm_unreachable("No thread id for expressions");
380   }
381 
382   /// Get the name of the capture helper.
383   StringRef getHelperName() const override {
384     llvm_unreachable("No helper name for expressions");
385   }
386 
387   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388 
389 private:
390   /// Private scope to capture global variables.
391   CodeGenFunction::OMPPrivateScope PrivScope;
392 };
393 
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396   CodeGenFunction &CGF;
397   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398   FieldDecl *LambdaThisCaptureField = nullptr;
399   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400 
401 public:
402   /// Constructs region for combined constructs.
403   /// \param CodeGen Code generation sequence for combined directives. Includes
404   /// a list of functions used for code generation of implicitly inlined
405   /// regions.
406   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407                           OpenMPDirectiveKind Kind, bool HasCancel)
408       : CGF(CGF) {
409     // Start emission for the construct.
410     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414     CGF.LambdaThisCaptureField = nullptr;
415     BlockInfo = CGF.BlockInfo;
416     CGF.BlockInfo = nullptr;
417   }
418 
419   ~InlinedOpenMPRegionRAII() {
420     // Restore original CapturedStmtInfo only if we're done with code emission.
421     auto *OldCSI =
422         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423     delete CGF.CapturedStmtInfo;
424     CGF.CapturedStmtInfo = OldCSI;
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427     CGF.BlockInfo = BlockInfo;
428   }
429 };
430 
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435   /// Use trampoline for internal microtask.
436   OMP_IDENT_IMD = 0x01,
437   /// Use c-style ident structure.
438   OMP_IDENT_KMPC = 0x02,
439   /// Atomic reduction option for kmpc_reduce.
440   OMP_ATOMIC_REDUCE = 0x10,
441   /// Explicit 'barrier' directive.
442   OMP_IDENT_BARRIER_EXPL = 0x20,
443   /// Implicit barrier in code.
444   OMP_IDENT_BARRIER_IMPL = 0x40,
445   /// Implicit barrier in 'for' directive.
446   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447   /// Implicit barrier in 'sections' directive.
448   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449   /// Implicit barrier in 'single' directive.
450   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451   /// Call of __kmp_for_static_init for static loop.
452   OMP_IDENT_WORK_LOOP = 0x200,
453   /// Call of __kmp_for_static_init for sections.
454   OMP_IDENT_WORK_SECTIONS = 0x400,
455   /// Call of __kmp_for_static_init for distribute.
456   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459 
460 /// Describes ident structure that describes a source location.
461 /// All descriptions are taken from
462 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
463 /// Original structure:
464 /// typedef struct ident {
465 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
466 ///                                  see above  */
467 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
468 ///                                  KMP_IDENT_KMPC identifies this union
469 ///                                  member  */
470 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
471 ///                                  see above */
472 ///#if USE_ITT_BUILD
473 ///                            /*  but currently used for storing
474 ///                                region-specific ITT */
475 ///                            /*  contextual information. */
476 ///#endif /* USE_ITT_BUILD */
477 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
478 ///                                 C++  */
479 ///    char const *psource;    /**< String describing the source location.
480 ///                            The string is composed of semi-colon separated
481 //                             fields which describe the source file,
482 ///                            the function and a pair of line numbers that
483 ///                            delimit the construct.
484 ///                             */
485 /// } ident_t;
486 enum IdentFieldIndex {
487   /// might be used in Fortran
488   IdentField_Reserved_1,
489   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
490   IdentField_Flags,
491   /// Not really used in Fortran any more
492   IdentField_Reserved_2,
493   /// Source[4] in Fortran, do not use for C++
494   IdentField_Reserved_3,
495   /// String describing the source location. The string is composed of
496   /// semi-colon separated fields which describe the source file, the function
497   /// and a pair of line numbers that delimit the construct.
498   IdentField_PSource
499 };
500 
501 /// Schedule types for 'omp for' loops (these enumerators are taken from
502 /// the enum sched_type in kmp.h).
503 enum OpenMPSchedType {
504   /// Lower bound for default (unordered) versions.
505   OMP_sch_lower = 32,
506   OMP_sch_static_chunked = 33,
507   OMP_sch_static = 34,
508   OMP_sch_dynamic_chunked = 35,
509   OMP_sch_guided_chunked = 36,
510   OMP_sch_runtime = 37,
511   OMP_sch_auto = 38,
512   /// static with chunk adjustment (e.g., simd)
513   OMP_sch_static_balanced_chunked = 45,
514   /// Lower bound for 'ordered' versions.
515   OMP_ord_lower = 64,
516   OMP_ord_static_chunked = 65,
517   OMP_ord_static = 66,
518   OMP_ord_dynamic_chunked = 67,
519   OMP_ord_guided_chunked = 68,
520   OMP_ord_runtime = 69,
521   OMP_ord_auto = 70,
522   OMP_sch_default = OMP_sch_static,
523   /// dist_schedule types
524   OMP_dist_sch_static_chunked = 91,
525   OMP_dist_sch_static = 92,
526   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
527   /// Set if the monotonic schedule modifier was present.
528   OMP_sch_modifier_monotonic = (1 << 29),
529   /// Set if the nonmonotonic schedule modifier was present.
530   OMP_sch_modifier_nonmonotonic = (1 << 30),
531 };
532 
533 enum OpenMPRTLFunction {
534   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
535   /// kmpc_micro microtask, ...);
536   OMPRTL__kmpc_fork_call,
537   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
538   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
539   OMPRTL__kmpc_threadprivate_cached,
540   /// Call to void __kmpc_threadprivate_register( ident_t *,
541   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
542   OMPRTL__kmpc_threadprivate_register,
543   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
544   OMPRTL__kmpc_global_thread_num,
545   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
546   // kmp_critical_name *crit);
547   OMPRTL__kmpc_critical,
548   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
549   // global_tid, kmp_critical_name *crit, uintptr_t hint);
550   OMPRTL__kmpc_critical_with_hint,
551   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
552   // kmp_critical_name *crit);
553   OMPRTL__kmpc_end_critical,
554   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
555   // global_tid);
556   OMPRTL__kmpc_cancel_barrier,
557   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
558   OMPRTL__kmpc_barrier,
559   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
560   OMPRTL__kmpc_for_static_fini,
561   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
562   // global_tid);
563   OMPRTL__kmpc_serialized_parallel,
564   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
565   // global_tid);
566   OMPRTL__kmpc_end_serialized_parallel,
567   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
568   // kmp_int32 num_threads);
569   OMPRTL__kmpc_push_num_threads,
570   // Call to void __kmpc_flush(ident_t *loc);
571   OMPRTL__kmpc_flush,
572   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
573   OMPRTL__kmpc_master,
574   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
575   OMPRTL__kmpc_end_master,
576   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
577   // int end_part);
578   OMPRTL__kmpc_omp_taskyield,
579   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
580   OMPRTL__kmpc_single,
581   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
582   OMPRTL__kmpc_end_single,
583   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
584   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
585   // kmp_routine_entry_t *task_entry);
586   OMPRTL__kmpc_omp_task_alloc,
587   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
588   // new_task);
589   OMPRTL__kmpc_omp_task,
590   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
591   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
592   // kmp_int32 didit);
593   OMPRTL__kmpc_copyprivate,
594   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
595   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
596   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
597   OMPRTL__kmpc_reduce,
598   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
599   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
600   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
601   // *lck);
602   OMPRTL__kmpc_reduce_nowait,
603   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
604   // kmp_critical_name *lck);
605   OMPRTL__kmpc_end_reduce,
606   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
607   // kmp_critical_name *lck);
608   OMPRTL__kmpc_end_reduce_nowait,
609   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
610   // kmp_task_t * new_task);
611   OMPRTL__kmpc_omp_task_begin_if0,
612   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
613   // kmp_task_t * new_task);
614   OMPRTL__kmpc_omp_task_complete_if0,
615   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
616   OMPRTL__kmpc_ordered,
617   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
618   OMPRTL__kmpc_end_ordered,
619   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
620   // global_tid);
621   OMPRTL__kmpc_omp_taskwait,
622   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
623   OMPRTL__kmpc_taskgroup,
624   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
625   OMPRTL__kmpc_end_taskgroup,
626   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
627   // int proc_bind);
628   OMPRTL__kmpc_push_proc_bind,
629   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
630   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
631   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
632   OMPRTL__kmpc_omp_task_with_deps,
633   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
634   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
635   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
636   OMPRTL__kmpc_omp_wait_deps,
637   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
638   // global_tid, kmp_int32 cncl_kind);
639   OMPRTL__kmpc_cancellationpoint,
640   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
641   // kmp_int32 cncl_kind);
642   OMPRTL__kmpc_cancel,
643   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
644   // kmp_int32 num_teams, kmp_int32 thread_limit);
645   OMPRTL__kmpc_push_num_teams,
646   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
647   // microtask, ...);
648   OMPRTL__kmpc_fork_teams,
649   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
650   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
651   // sched, kmp_uint64 grainsize, void *task_dup);
652   OMPRTL__kmpc_taskloop,
653   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
654   // num_dims, struct kmp_dim *dims);
655   OMPRTL__kmpc_doacross_init,
656   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
657   OMPRTL__kmpc_doacross_fini,
658   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
659   // *vec);
660   OMPRTL__kmpc_doacross_post,
661   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
662   // *vec);
663   OMPRTL__kmpc_doacross_wait,
664   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
665   // *data);
666   OMPRTL__kmpc_task_reduction_init,
667   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
668   // *d);
669   OMPRTL__kmpc_task_reduction_get_th_data,
670   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
671   OMPRTL__kmpc_alloc,
672   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
673   OMPRTL__kmpc_free,
674 
675   //
676   // Offloading related calls
677   //
678   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
679   // size);
680   OMPRTL__kmpc_push_target_tripcount,
681   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
682   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
683   // *arg_types);
684   OMPRTL__tgt_target,
685   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
686   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
687   // *arg_types);
688   OMPRTL__tgt_target_nowait,
689   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
690   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
691   // *arg_types, int32_t num_teams, int32_t thread_limit);
692   OMPRTL__tgt_target_teams,
693   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
694   // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
695   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
696   OMPRTL__tgt_target_teams_nowait,
697   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
698   OMPRTL__tgt_register_lib,
699   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
700   OMPRTL__tgt_unregister_lib,
701   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
702   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
703   OMPRTL__tgt_target_data_begin,
704   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
705   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
706   // *arg_types);
707   OMPRTL__tgt_target_data_begin_nowait,
708   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
709   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
710   OMPRTL__tgt_target_data_end,
711   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
712   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
713   // *arg_types);
714   OMPRTL__tgt_target_data_end_nowait,
715   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
716   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
717   OMPRTL__tgt_target_data_update,
718   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
719   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
720   // *arg_types);
721   OMPRTL__tgt_target_data_update_nowait,
722 };
723 
724 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
725 /// region.
726 class CleanupTy final : public EHScopeStack::Cleanup {
727   PrePostActionTy *Action;
728 
729 public:
730   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
731   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
732     if (!CGF.HaveInsertPoint())
733       return;
734     Action->Exit(CGF);
735   }
736 };
737 
738 } // anonymous namespace
739 
740 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
741   CodeGenFunction::RunCleanupsScope Scope(CGF);
742   if (PrePostAction) {
743     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
744     Callback(CodeGen, CGF, *PrePostAction);
745   } else {
746     PrePostActionTy Action;
747     Callback(CodeGen, CGF, Action);
748   }
749 }
750 
751 /// Check if the combiner is a call to UDR combiner and if it is so return the
752 /// UDR decl used for reduction.
753 static const OMPDeclareReductionDecl *
754 getReductionInit(const Expr *ReductionOp) {
755   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
756     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
757       if (const auto *DRE =
758               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
759         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
760           return DRD;
761   return nullptr;
762 }
763 
764 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
765                                              const OMPDeclareReductionDecl *DRD,
766                                              const Expr *InitOp,
767                                              Address Private, Address Original,
768                                              QualType Ty) {
769   if (DRD->getInitializer()) {
770     std::pair<llvm::Function *, llvm::Function *> Reduction =
771         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
772     const auto *CE = cast<CallExpr>(InitOp);
773     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
774     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
775     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
776     const auto *LHSDRE =
777         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
778     const auto *RHSDRE =
779         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
780     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
781     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
782                             [=]() { return Private; });
783     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
784                             [=]() { return Original; });
785     (void)PrivateScope.Privatize();
786     RValue Func = RValue::get(Reduction.second);
787     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
788     CGF.EmitIgnoredExpr(InitOp);
789   } else {
790     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
791     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
792     auto *GV = new llvm::GlobalVariable(
793         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
794         llvm::GlobalValue::PrivateLinkage, Init, Name);
795     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
796     RValue InitRVal;
797     switch (CGF.getEvaluationKind(Ty)) {
798     case TEK_Scalar:
799       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
800       break;
801     case TEK_Complex:
802       InitRVal =
803           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
804       break;
805     case TEK_Aggregate:
806       InitRVal = RValue::getAggregate(LV.getAddress());
807       break;
808     }
809     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
810     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
811     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
812                          /*IsInitializer=*/false);
813   }
814 }
815 
816 /// Emit initialization of arrays of complex types.
817 /// \param DestAddr Address of the array.
818 /// \param Type Type of array.
819 /// \param Init Initial expression of array.
820 /// \param SrcAddr Address of the original array.
821 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
822                                  QualType Type, bool EmitDeclareReductionInit,
823                                  const Expr *Init,
824                                  const OMPDeclareReductionDecl *DRD,
825                                  Address SrcAddr = Address::invalid()) {
826   // Perform element-by-element initialization.
827   QualType ElementTy;
828 
829   // Drill down to the base element type on both arrays.
830   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
831   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
832   DestAddr =
833       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
834   if (DRD)
835     SrcAddr =
836         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
837 
838   llvm::Value *SrcBegin = nullptr;
839   if (DRD)
840     SrcBegin = SrcAddr.getPointer();
841   llvm::Value *DestBegin = DestAddr.getPointer();
842   // Cast from pointer to array type to pointer to single element.
843   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
844   // The basic structure here is a while-do loop.
845   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
846   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
847   llvm::Value *IsEmpty =
848       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
849   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
850 
851   // Enter the loop body, making that address the current address.
852   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
853   CGF.EmitBlock(BodyBB);
854 
855   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
856 
857   llvm::PHINode *SrcElementPHI = nullptr;
858   Address SrcElementCurrent = Address::invalid();
859   if (DRD) {
860     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
861                                           "omp.arraycpy.srcElementPast");
862     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
863     SrcElementCurrent =
864         Address(SrcElementPHI,
865                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
866   }
867   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
868       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
869   DestElementPHI->addIncoming(DestBegin, EntryBB);
870   Address DestElementCurrent =
871       Address(DestElementPHI,
872               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
873 
874   // Emit copy.
875   {
876     CodeGenFunction::RunCleanupsScope InitScope(CGF);
877     if (EmitDeclareReductionInit) {
878       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
879                                        SrcElementCurrent, ElementTy);
880     } else
881       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
882                            /*IsInitializer=*/false);
883   }
884 
885   if (DRD) {
886     // Shift the address forward by one element.
887     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
888         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
889     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
890   }
891 
892   // Shift the address forward by one element.
893   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
894       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
895   // Check whether we've reached the end.
896   llvm::Value *Done =
897       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
898   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
899   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
900 
901   // Done.
902   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
903 }
904 
905 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
906   return CGF.EmitOMPSharedLValue(E);
907 }
908 
909 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
910                                             const Expr *E) {
911   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
912     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
913   return LValue();
914 }
915 
916 void ReductionCodeGen::emitAggregateInitialization(
917     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
918     const OMPDeclareReductionDecl *DRD) {
919   // Emit VarDecl with copy init for arrays.
920   // Get the address of the original variable captured in current
921   // captured region.
922   const auto *PrivateVD =
923       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
924   bool EmitDeclareReductionInit =
925       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
926   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
927                        EmitDeclareReductionInit,
928                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
929                                                 : PrivateVD->getInit(),
930                        DRD, SharedLVal.getAddress());
931 }
932 
933 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
934                                    ArrayRef<const Expr *> Privates,
935                                    ArrayRef<const Expr *> ReductionOps) {
936   ClausesData.reserve(Shareds.size());
937   SharedAddresses.reserve(Shareds.size());
938   Sizes.reserve(Shareds.size());
939   BaseDecls.reserve(Shareds.size());
940   auto IPriv = Privates.begin();
941   auto IRed = ReductionOps.begin();
942   for (const Expr *Ref : Shareds) {
943     ClausesData.emplace_back(Ref, *IPriv, *IRed);
944     std::advance(IPriv, 1);
945     std::advance(IRed, 1);
946   }
947 }
948 
949 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
950   assert(SharedAddresses.size() == N &&
951          "Number of generated lvalues must be exactly N.");
952   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
953   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
954   SharedAddresses.emplace_back(First, Second);
955 }
956 
957 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
958   const auto *PrivateVD =
959       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
960   QualType PrivateType = PrivateVD->getType();
961   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
962   if (!PrivateType->isVariablyModifiedType()) {
963     Sizes.emplace_back(
964         CGF.getTypeSize(
965             SharedAddresses[N].first.getType().getNonReferenceType()),
966         nullptr);
967     return;
968   }
969   llvm::Value *Size;
970   llvm::Value *SizeInChars;
971   auto *ElemType =
972       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
973           ->getElementType();
974   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
975   if (AsArraySection) {
976     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
977                                      SharedAddresses[N].first.getPointer());
978     Size = CGF.Builder.CreateNUWAdd(
979         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
980     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
981   } else {
982     SizeInChars = CGF.getTypeSize(
983         SharedAddresses[N].first.getType().getNonReferenceType());
984     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
985   }
986   Sizes.emplace_back(SizeInChars, Size);
987   CodeGenFunction::OpaqueValueMapping OpaqueMap(
988       CGF,
989       cast<OpaqueValueExpr>(
990           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
991       RValue::get(Size));
992   CGF.EmitVariablyModifiedType(PrivateType);
993 }
994 
995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
996                                          llvm::Value *Size) {
997   const auto *PrivateVD =
998       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
999   QualType PrivateType = PrivateVD->getType();
1000   if (!PrivateType->isVariablyModifiedType()) {
1001     assert(!Size && !Sizes[N].second &&
1002            "Size should be nullptr for non-variably modified reduction "
1003            "items.");
1004     return;
1005   }
1006   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1007       CGF,
1008       cast<OpaqueValueExpr>(
1009           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1010       RValue::get(Size));
1011   CGF.EmitVariablyModifiedType(PrivateType);
1012 }
1013 
1014 void ReductionCodeGen::emitInitialization(
1015     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1016     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1017   assert(SharedAddresses.size() > N && "No variable was generated");
1018   const auto *PrivateVD =
1019       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1020   const OMPDeclareReductionDecl *DRD =
1021       getReductionInit(ClausesData[N].ReductionOp);
1022   QualType PrivateType = PrivateVD->getType();
1023   PrivateAddr = CGF.Builder.CreateElementBitCast(
1024       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1025   QualType SharedType = SharedAddresses[N].first.getType();
1026   SharedLVal = CGF.MakeAddrLValue(
1027       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1028                                        CGF.ConvertTypeForMem(SharedType)),
1029       SharedType, SharedAddresses[N].first.getBaseInfo(),
1030       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1031   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1032     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1033   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1034     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1035                                      PrivateAddr, SharedLVal.getAddress(),
1036                                      SharedLVal.getType());
1037   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1038              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1039     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1040                          PrivateVD->getType().getQualifiers(),
1041                          /*IsInitializer=*/false);
1042   }
1043 }
1044 
1045 bool ReductionCodeGen::needCleanups(unsigned N) {
1046   const auto *PrivateVD =
1047       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1048   QualType PrivateType = PrivateVD->getType();
1049   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1050   return DTorKind != QualType::DK_none;
1051 }
1052 
1053 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1054                                     Address PrivateAddr) {
1055   const auto *PrivateVD =
1056       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1057   QualType PrivateType = PrivateVD->getType();
1058   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1059   if (needCleanups(N)) {
1060     PrivateAddr = CGF.Builder.CreateElementBitCast(
1061         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1062     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1063   }
1064 }
1065 
1066 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1067                           LValue BaseLV) {
1068   BaseTy = BaseTy.getNonReferenceType();
1069   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1070          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1071     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1072       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1073     } else {
1074       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1075       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1076     }
1077     BaseTy = BaseTy->getPointeeType();
1078   }
1079   return CGF.MakeAddrLValue(
1080       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1081                                        CGF.ConvertTypeForMem(ElTy)),
1082       BaseLV.getType(), BaseLV.getBaseInfo(),
1083       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1084 }
1085 
1086 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1087                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1088                           llvm::Value *Addr) {
1089   Address Tmp = Address::invalid();
1090   Address TopTmp = Address::invalid();
1091   Address MostTopTmp = Address::invalid();
1092   BaseTy = BaseTy.getNonReferenceType();
1093   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1094          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1095     Tmp = CGF.CreateMemTemp(BaseTy);
1096     if (TopTmp.isValid())
1097       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1098     else
1099       MostTopTmp = Tmp;
1100     TopTmp = Tmp;
1101     BaseTy = BaseTy->getPointeeType();
1102   }
1103   llvm::Type *Ty = BaseLVType;
1104   if (Tmp.isValid())
1105     Ty = Tmp.getElementType();
1106   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1107   if (Tmp.isValid()) {
1108     CGF.Builder.CreateStore(Addr, Tmp);
1109     return MostTopTmp;
1110   }
1111   return Address(Addr, BaseLVAlignment);
1112 }
1113 
1114 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1115   const VarDecl *OrigVD = nullptr;
1116   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1117     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1118     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1119       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1120     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1121       Base = TempASE->getBase()->IgnoreParenImpCasts();
1122     DE = cast<DeclRefExpr>(Base);
1123     OrigVD = cast<VarDecl>(DE->getDecl());
1124   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1125     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1126     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1127       Base = TempASE->getBase()->IgnoreParenImpCasts();
1128     DE = cast<DeclRefExpr>(Base);
1129     OrigVD = cast<VarDecl>(DE->getDecl());
1130   }
1131   return OrigVD;
1132 }
1133 
1134 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1135                                                Address PrivateAddr) {
1136   const DeclRefExpr *DE;
1137   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1138     BaseDecls.emplace_back(OrigVD);
1139     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1140     LValue BaseLValue =
1141         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1142                     OriginalBaseLValue);
1143     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1144         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1145     llvm::Value *PrivatePointer =
1146         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1147             PrivateAddr.getPointer(),
1148             SharedAddresses[N].first.getAddress().getType());
1149     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1150     return castToBase(CGF, OrigVD->getType(),
1151                       SharedAddresses[N].first.getType(),
1152                       OriginalBaseLValue.getAddress().getType(),
1153                       OriginalBaseLValue.getAlignment(), Ptr);
1154   }
1155   BaseDecls.emplace_back(
1156       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1157   return PrivateAddr;
1158 }
1159 
1160 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1161   const OMPDeclareReductionDecl *DRD =
1162       getReductionInit(ClausesData[N].ReductionOp);
1163   return DRD && DRD->getInitializer();
1164 }
1165 
1166 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1167   return CGF.EmitLoadOfPointerLValue(
1168       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1169       getThreadIDVariable()->getType()->castAs<PointerType>());
1170 }
1171 
1172 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1173   if (!CGF.HaveInsertPoint())
1174     return;
1175   // 1.2.2 OpenMP Language Terminology
1176   // Structured block - An executable statement with a single entry at the
1177   // top and a single exit at the bottom.
1178   // The point of exit cannot be a branch out of the structured block.
1179   // longjmp() and throw() must not violate the entry/exit criteria.
1180   CGF.EHStack.pushTerminate();
1181   CodeGen(CGF);
1182   CGF.EHStack.popTerminate();
1183 }
1184 
1185 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1186     CodeGenFunction &CGF) {
1187   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1188                             getThreadIDVariable()->getType(),
1189                             AlignmentSource::Decl);
1190 }
1191 
1192 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1193                                        QualType FieldTy) {
1194   auto *Field = FieldDecl::Create(
1195       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1196       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1197       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1198   Field->setAccess(AS_public);
1199   DC->addDecl(Field);
1200   return Field;
1201 }
1202 
1203 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1204                                  StringRef Separator)
1205     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1206       OffloadEntriesInfoManager(CGM) {
1207   ASTContext &C = CGM.getContext();
1208   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1209   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1210   RD->startDefinition();
1211   // reserved_1
1212   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1213   // flags
1214   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1215   // reserved_2
1216   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1217   // reserved_3
1218   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1219   // psource
1220   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1221   RD->completeDefinition();
1222   IdentQTy = C.getRecordType(RD);
1223   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1224   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1225 
1226   loadOffloadInfoMetadata();
1227 }
1228 
1229 void CGOpenMPRuntime::clear() {
1230   InternalVars.clear();
1231   // Clean non-target variable declarations possibly used only in debug info.
1232   for (const auto &Data : EmittedNonTargetVariables) {
1233     if (!Data.getValue().pointsToAliveValue())
1234       continue;
1235     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1236     if (!GV)
1237       continue;
1238     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1239       continue;
1240     GV->eraseFromParent();
1241   }
1242 }
1243 
1244 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1245   SmallString<128> Buffer;
1246   llvm::raw_svector_ostream OS(Buffer);
1247   StringRef Sep = FirstSeparator;
1248   for (StringRef Part : Parts) {
1249     OS << Sep << Part;
1250     Sep = Separator;
1251   }
1252   return OS.str();
1253 }
1254 
1255 static llvm::Function *
1256 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1257                           const Expr *CombinerInitializer, const VarDecl *In,
1258                           const VarDecl *Out, bool IsCombiner) {
1259   // void .omp_combiner.(Ty *in, Ty *out);
1260   ASTContext &C = CGM.getContext();
1261   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1262   FunctionArgList Args;
1263   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1264                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1265   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1266                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1267   Args.push_back(&OmpOutParm);
1268   Args.push_back(&OmpInParm);
1269   const CGFunctionInfo &FnInfo =
1270       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1271   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1272   std::string Name = CGM.getOpenMPRuntime().getName(
1273       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1274   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1275                                     Name, &CGM.getModule());
1276   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1277   if (CGM.getLangOpts().Optimize) {
1278     Fn->removeFnAttr(llvm::Attribute::NoInline);
1279     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1280     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1281   }
1282   CodeGenFunction CGF(CGM);
1283   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1284   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1285   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1286                     Out->getLocation());
1287   CodeGenFunction::OMPPrivateScope Scope(CGF);
1288   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1289   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1290     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1291         .getAddress();
1292   });
1293   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1294   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1295     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1296         .getAddress();
1297   });
1298   (void)Scope.Privatize();
1299   if (!IsCombiner && Out->hasInit() &&
1300       !CGF.isTrivialInitializer(Out->getInit())) {
1301     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1302                          Out->getType().getQualifiers(),
1303                          /*IsInitializer=*/true);
1304   }
1305   if (CombinerInitializer)
1306     CGF.EmitIgnoredExpr(CombinerInitializer);
1307   Scope.ForceCleanup();
1308   CGF.FinishFunction();
1309   return Fn;
1310 }
1311 
1312 void CGOpenMPRuntime::emitUserDefinedReduction(
1313     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1314   if (UDRMap.count(D) > 0)
1315     return;
1316   llvm::Function *Combiner = emitCombinerOrInitializer(
1317       CGM, D->getType(), D->getCombiner(),
1318       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1319       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1320       /*IsCombiner=*/true);
1321   llvm::Function *Initializer = nullptr;
1322   if (const Expr *Init = D->getInitializer()) {
1323     Initializer = emitCombinerOrInitializer(
1324         CGM, D->getType(),
1325         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1326                                                                      : nullptr,
1327         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1328         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1329         /*IsCombiner=*/false);
1330   }
1331   UDRMap.try_emplace(D, Combiner, Initializer);
1332   if (CGF) {
1333     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1334     Decls.second.push_back(D);
1335   }
1336 }
1337 
1338 std::pair<llvm::Function *, llvm::Function *>
1339 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1340   auto I = UDRMap.find(D);
1341   if (I != UDRMap.end())
1342     return I->second;
1343   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1344   return UDRMap.lookup(D);
1345 }
1346 
1347 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1348     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1349     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1350     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1351   assert(ThreadIDVar->getType()->isPointerType() &&
1352          "thread id variable must be of type kmp_int32 *");
1353   CodeGenFunction CGF(CGM, true);
1354   bool HasCancel = false;
1355   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1356     HasCancel = OPD->hasCancel();
1357   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1358     HasCancel = OPSD->hasCancel();
1359   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1360     HasCancel = OPFD->hasCancel();
1361   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1362     HasCancel = OPFD->hasCancel();
1363   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1364     HasCancel = OPFD->hasCancel();
1365   else if (const auto *OPFD =
1366                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1367     HasCancel = OPFD->hasCancel();
1368   else if (const auto *OPFD =
1369                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1370     HasCancel = OPFD->hasCancel();
1371   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1372                                     HasCancel, OutlinedHelperName);
1373   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1374   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1375 }
1376 
1377 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1378     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1379     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1380   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1381   return emitParallelOrTeamsOutlinedFunction(
1382       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1383 }
1384 
1385 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1386     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1387     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1388   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1389   return emitParallelOrTeamsOutlinedFunction(
1390       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1391 }
1392 
1393 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1394     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1395     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1396     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1397     bool Tied, unsigned &NumberOfParts) {
1398   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1399                                               PrePostActionTy &) {
1400     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1401     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1402     llvm::Value *TaskArgs[] = {
1403         UpLoc, ThreadID,
1404         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1405                                     TaskTVar->getType()->castAs<PointerType>())
1406             .getPointer()};
1407     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1408   };
1409   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1410                                                             UntiedCodeGen);
1411   CodeGen.setAction(Action);
1412   assert(!ThreadIDVar->getType()->isPointerType() &&
1413          "thread id variable must be of type kmp_int32 for tasks");
1414   const OpenMPDirectiveKind Region =
1415       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1416                                                       : OMPD_task;
1417   const CapturedStmt *CS = D.getCapturedStmt(Region);
1418   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1419   CodeGenFunction CGF(CGM, true);
1420   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1421                                         InnermostKind,
1422                                         TD ? TD->hasCancel() : false, Action);
1423   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1424   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1425   if (!Tied)
1426     NumberOfParts = Action.getNumberOfParts();
1427   return Res;
1428 }
1429 
1430 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1431                              const RecordDecl *RD, const CGRecordLayout &RL,
1432                              ArrayRef<llvm::Constant *> Data) {
1433   llvm::StructType *StructTy = RL.getLLVMType();
1434   unsigned PrevIdx = 0;
1435   ConstantInitBuilder CIBuilder(CGM);
1436   auto DI = Data.begin();
1437   for (const FieldDecl *FD : RD->fields()) {
1438     unsigned Idx = RL.getLLVMFieldNo(FD);
1439     // Fill the alignment.
1440     for (unsigned I = PrevIdx; I < Idx; ++I)
1441       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1442     PrevIdx = Idx + 1;
1443     Fields.add(*DI);
1444     ++DI;
1445   }
1446 }
1447 
1448 template <class... As>
1449 static llvm::GlobalVariable *
1450 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1451                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1452                    As &&... Args) {
1453   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1454   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1455   ConstantInitBuilder CIBuilder(CGM);
1456   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1457   buildStructValue(Fields, CGM, RD, RL, Data);
1458   return Fields.finishAndCreateGlobal(
1459       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1460       std::forward<As>(Args)...);
1461 }
1462 
1463 template <typename T>
1464 static void
1465 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1466                                          ArrayRef<llvm::Constant *> Data,
1467                                          T &Parent) {
1468   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1469   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1470   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1471   buildStructValue(Fields, CGM, RD, RL, Data);
1472   Fields.finishAndAddTo(Parent);
1473 }
1474 
1475 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1476   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1477   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1478   FlagsTy FlagsKey(Flags, Reserved2Flags);
1479   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1480   if (!Entry) {
1481     if (!DefaultOpenMPPSource) {
1482       // Initialize default location for psource field of ident_t structure of
1483       // all ident_t objects. Format is ";file;function;line;column;;".
1484       // Taken from
1485       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1486       DefaultOpenMPPSource =
1487           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1488       DefaultOpenMPPSource =
1489           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1490     }
1491 
1492     llvm::Constant *Data[] = {
1493         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1494         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1495         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1496         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1497     llvm::GlobalValue *DefaultOpenMPLocation =
1498         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1499                            llvm::GlobalValue::PrivateLinkage);
1500     DefaultOpenMPLocation->setUnnamedAddr(
1501         llvm::GlobalValue::UnnamedAddr::Global);
1502 
1503     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1504   }
1505   return Address(Entry, Align);
1506 }
1507 
1508 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1509                                              bool AtCurrentPoint) {
1510   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1511   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1512 
1513   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1514   if (AtCurrentPoint) {
1515     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1516         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1517   } else {
1518     Elem.second.ServiceInsertPt =
1519         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1520     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1521   }
1522 }
1523 
1524 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1525   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1526   if (Elem.second.ServiceInsertPt) {
1527     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1528     Elem.second.ServiceInsertPt = nullptr;
1529     Ptr->eraseFromParent();
1530   }
1531 }
1532 
1533 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1534                                                  SourceLocation Loc,
1535                                                  unsigned Flags) {
1536   Flags |= OMP_IDENT_KMPC;
1537   // If no debug info is generated - return global default location.
1538   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1539       Loc.isInvalid())
1540     return getOrCreateDefaultLocation(Flags).getPointer();
1541 
1542   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1543 
1544   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1545   Address LocValue = Address::invalid();
1546   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1547   if (I != OpenMPLocThreadIDMap.end())
1548     LocValue = Address(I->second.DebugLoc, Align);
1549 
1550   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1551   // GetOpenMPThreadID was called before this routine.
1552   if (!LocValue.isValid()) {
1553     // Generate "ident_t .kmpc_loc.addr;"
1554     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1555     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1556     Elem.second.DebugLoc = AI.getPointer();
1557     LocValue = AI;
1558 
1559     if (!Elem.second.ServiceInsertPt)
1560       setLocThreadIdInsertPt(CGF);
1561     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1562     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1563     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1564                              CGF.getTypeSize(IdentQTy));
1565   }
1566 
1567   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1568   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1569   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1570   LValue PSource =
1571       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1572 
1573   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1574   if (OMPDebugLoc == nullptr) {
1575     SmallString<128> Buffer2;
1576     llvm::raw_svector_ostream OS2(Buffer2);
1577     // Build debug location
1578     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1579     OS2 << ";" << PLoc.getFilename() << ";";
1580     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1581       OS2 << FD->getQualifiedNameAsString();
1582     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1583     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1584     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1585   }
1586   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1587   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1588 
1589   // Our callers always pass this to a runtime function, so for
1590   // convenience, go ahead and return a naked pointer.
1591   return LocValue.getPointer();
1592 }
1593 
1594 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1595                                           SourceLocation Loc) {
1596   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1597 
1598   llvm::Value *ThreadID = nullptr;
1599   // Check whether we've already cached a load of the thread id in this
1600   // function.
1601   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1602   if (I != OpenMPLocThreadIDMap.end()) {
1603     ThreadID = I->second.ThreadID;
1604     if (ThreadID != nullptr)
1605       return ThreadID;
1606   }
1607   // If exceptions are enabled, do not use parameter to avoid possible crash.
1608   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1609       !CGF.getLangOpts().CXXExceptions ||
1610       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1611     if (auto *OMPRegionInfo =
1612             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1613       if (OMPRegionInfo->getThreadIDVariable()) {
1614         // Check if this an outlined function with thread id passed as argument.
1615         LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1616         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1617         // If value loaded in entry block, cache it and use it everywhere in
1618         // function.
1619         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1620           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1621           Elem.second.ThreadID = ThreadID;
1622         }
1623         return ThreadID;
1624       }
1625     }
1626   }
1627 
1628   // This is not an outlined function region - need to call __kmpc_int32
1629   // kmpc_global_thread_num(ident_t *loc).
1630   // Generate thread id value and cache this value for use across the
1631   // function.
1632   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1633   if (!Elem.second.ServiceInsertPt)
1634     setLocThreadIdInsertPt(CGF);
1635   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1636   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1637   llvm::CallInst *Call = CGF.Builder.CreateCall(
1638       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1639       emitUpdateLocation(CGF, Loc));
1640   Call->setCallingConv(CGF.getRuntimeCC());
1641   Elem.second.ThreadID = Call;
1642   return Call;
1643 }
1644 
1645 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1646   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1647   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1648     clearLocThreadIdInsertPt(CGF);
1649     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1650   }
1651   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1652     for(auto *D : FunctionUDRMap[CGF.CurFn])
1653       UDRMap.erase(D);
1654     FunctionUDRMap.erase(CGF.CurFn);
1655   }
1656 }
1657 
1658 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1659   return IdentTy->getPointerTo();
1660 }
1661 
1662 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1663   if (!Kmpc_MicroTy) {
1664     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1665     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1666                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1667     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1668   }
1669   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1670 }
1671 
1672 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1673   llvm::FunctionCallee RTLFn = nullptr;
1674   switch (static_cast<OpenMPRTLFunction>(Function)) {
1675   case OMPRTL__kmpc_fork_call: {
1676     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1677     // microtask, ...);
1678     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1679                                 getKmpc_MicroPointerTy()};
1680     auto *FnTy =
1681         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1682     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1683     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1684       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1685         llvm::LLVMContext &Ctx = F->getContext();
1686         llvm::MDBuilder MDB(Ctx);
1687         // Annotate the callback behavior of the __kmpc_fork_call:
1688         //  - The callback callee is argument number 2 (microtask).
1689         //  - The first two arguments of the callback callee are unknown (-1).
1690         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1691         //    callback callee.
1692         F->addMetadata(
1693             llvm::LLVMContext::MD_callback,
1694             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1695                                         2, {-1, -1},
1696                                         /* VarArgsArePassed */ true)}));
1697       }
1698     }
1699     break;
1700   }
1701   case OMPRTL__kmpc_global_thread_num: {
1702     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1703     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1704     auto *FnTy =
1705         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1706     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1707     break;
1708   }
1709   case OMPRTL__kmpc_threadprivate_cached: {
1710     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1711     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1712     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1713                                 CGM.VoidPtrTy, CGM.SizeTy,
1714                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1715     auto *FnTy =
1716         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1717     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1718     break;
1719   }
1720   case OMPRTL__kmpc_critical: {
1721     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1722     // kmp_critical_name *crit);
1723     llvm::Type *TypeParams[] = {
1724         getIdentTyPointerTy(), CGM.Int32Ty,
1725         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1726     auto *FnTy =
1727         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1728     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1729     break;
1730   }
1731   case OMPRTL__kmpc_critical_with_hint: {
1732     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1733     // kmp_critical_name *crit, uintptr_t hint);
1734     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1735                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1736                                 CGM.IntPtrTy};
1737     auto *FnTy =
1738         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1739     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1740     break;
1741   }
1742   case OMPRTL__kmpc_threadprivate_register: {
1743     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1744     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1745     // typedef void *(*kmpc_ctor)(void *);
1746     auto *KmpcCtorTy =
1747         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1748                                 /*isVarArg*/ false)->getPointerTo();
1749     // typedef void *(*kmpc_cctor)(void *, void *);
1750     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1751     auto *KmpcCopyCtorTy =
1752         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1753                                 /*isVarArg*/ false)
1754             ->getPointerTo();
1755     // typedef void (*kmpc_dtor)(void *);
1756     auto *KmpcDtorTy =
1757         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1758             ->getPointerTo();
1759     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1760                               KmpcCopyCtorTy, KmpcDtorTy};
1761     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1762                                         /*isVarArg*/ false);
1763     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1764     break;
1765   }
1766   case OMPRTL__kmpc_end_critical: {
1767     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1768     // kmp_critical_name *crit);
1769     llvm::Type *TypeParams[] = {
1770         getIdentTyPointerTy(), CGM.Int32Ty,
1771         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1772     auto *FnTy =
1773         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1774     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1775     break;
1776   }
1777   case OMPRTL__kmpc_cancel_barrier: {
1778     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1779     // global_tid);
1780     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1781     auto *FnTy =
1782         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1783     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1784     break;
1785   }
1786   case OMPRTL__kmpc_barrier: {
1787     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1788     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1789     auto *FnTy =
1790         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1791     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1792     break;
1793   }
1794   case OMPRTL__kmpc_for_static_fini: {
1795     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1796     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1797     auto *FnTy =
1798         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1799     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1800     break;
1801   }
1802   case OMPRTL__kmpc_push_num_threads: {
1803     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1804     // kmp_int32 num_threads)
1805     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1806                                 CGM.Int32Ty};
1807     auto *FnTy =
1808         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1809     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1810     break;
1811   }
1812   case OMPRTL__kmpc_serialized_parallel: {
1813     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1814     // global_tid);
1815     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1816     auto *FnTy =
1817         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1818     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1819     break;
1820   }
1821   case OMPRTL__kmpc_end_serialized_parallel: {
1822     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1823     // global_tid);
1824     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1825     auto *FnTy =
1826         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1827     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1828     break;
1829   }
1830   case OMPRTL__kmpc_flush: {
1831     // Build void __kmpc_flush(ident_t *loc);
1832     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1833     auto *FnTy =
1834         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1835     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1836     break;
1837   }
1838   case OMPRTL__kmpc_master: {
1839     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1840     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1841     auto *FnTy =
1842         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1843     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1844     break;
1845   }
1846   case OMPRTL__kmpc_end_master: {
1847     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1848     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1849     auto *FnTy =
1850         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1851     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1852     break;
1853   }
1854   case OMPRTL__kmpc_omp_taskyield: {
1855     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1856     // int end_part);
1857     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1858     auto *FnTy =
1859         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1860     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1861     break;
1862   }
1863   case OMPRTL__kmpc_single: {
1864     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1865     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1866     auto *FnTy =
1867         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1868     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1869     break;
1870   }
1871   case OMPRTL__kmpc_end_single: {
1872     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1873     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1874     auto *FnTy =
1875         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1876     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1877     break;
1878   }
1879   case OMPRTL__kmpc_omp_task_alloc: {
1880     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1881     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1882     // kmp_routine_entry_t *task_entry);
1883     assert(KmpRoutineEntryPtrTy != nullptr &&
1884            "Type kmp_routine_entry_t must be created.");
1885     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1886                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1887     // Return void * and then cast to particular kmp_task_t type.
1888     auto *FnTy =
1889         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1890     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1891     break;
1892   }
1893   case OMPRTL__kmpc_omp_task: {
1894     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1895     // *new_task);
1896     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1897                                 CGM.VoidPtrTy};
1898     auto *FnTy =
1899         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1900     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1901     break;
1902   }
1903   case OMPRTL__kmpc_copyprivate: {
1904     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1905     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1906     // kmp_int32 didit);
1907     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1908     auto *CpyFnTy =
1909         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1910     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1911                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1912                                 CGM.Int32Ty};
1913     auto *FnTy =
1914         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1915     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1916     break;
1917   }
1918   case OMPRTL__kmpc_reduce: {
1919     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1920     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1921     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1922     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1923     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1924                                                /*isVarArg=*/false);
1925     llvm::Type *TypeParams[] = {
1926         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1927         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1928         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1929     auto *FnTy =
1930         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1931     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1932     break;
1933   }
1934   case OMPRTL__kmpc_reduce_nowait: {
1935     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1936     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1937     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1938     // *lck);
1939     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1940     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1941                                                /*isVarArg=*/false);
1942     llvm::Type *TypeParams[] = {
1943         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1944         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1945         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1946     auto *FnTy =
1947         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1948     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1949     break;
1950   }
1951   case OMPRTL__kmpc_end_reduce: {
1952     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1953     // kmp_critical_name *lck);
1954     llvm::Type *TypeParams[] = {
1955         getIdentTyPointerTy(), CGM.Int32Ty,
1956         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1957     auto *FnTy =
1958         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1959     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1960     break;
1961   }
1962   case OMPRTL__kmpc_end_reduce_nowait: {
1963     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1964     // kmp_critical_name *lck);
1965     llvm::Type *TypeParams[] = {
1966         getIdentTyPointerTy(), CGM.Int32Ty,
1967         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1968     auto *FnTy =
1969         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1970     RTLFn =
1971         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1972     break;
1973   }
1974   case OMPRTL__kmpc_omp_task_begin_if0: {
1975     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1976     // *new_task);
1977     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1978                                 CGM.VoidPtrTy};
1979     auto *FnTy =
1980         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1981     RTLFn =
1982         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1983     break;
1984   }
1985   case OMPRTL__kmpc_omp_task_complete_if0: {
1986     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1987     // *new_task);
1988     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1989                                 CGM.VoidPtrTy};
1990     auto *FnTy =
1991         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1992     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1993                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1994     break;
1995   }
1996   case OMPRTL__kmpc_ordered: {
1997     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1998     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1999     auto *FnTy =
2000         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2001     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2002     break;
2003   }
2004   case OMPRTL__kmpc_end_ordered: {
2005     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2006     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2007     auto *FnTy =
2008         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2009     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2010     break;
2011   }
2012   case OMPRTL__kmpc_omp_taskwait: {
2013     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2014     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2015     auto *FnTy =
2016         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2017     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2018     break;
2019   }
2020   case OMPRTL__kmpc_taskgroup: {
2021     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2022     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2023     auto *FnTy =
2024         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2025     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2026     break;
2027   }
2028   case OMPRTL__kmpc_end_taskgroup: {
2029     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2030     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2031     auto *FnTy =
2032         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2033     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2034     break;
2035   }
2036   case OMPRTL__kmpc_push_proc_bind: {
2037     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2038     // int proc_bind)
2039     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2040     auto *FnTy =
2041         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2042     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2043     break;
2044   }
2045   case OMPRTL__kmpc_omp_task_with_deps: {
2046     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2047     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2048     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2049     llvm::Type *TypeParams[] = {
2050         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2051         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2052     auto *FnTy =
2053         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2054     RTLFn =
2055         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2056     break;
2057   }
2058   case OMPRTL__kmpc_omp_wait_deps: {
2059     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2060     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2061     // kmp_depend_info_t *noalias_dep_list);
2062     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2063                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2064                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2065     auto *FnTy =
2066         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2067     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2068     break;
2069   }
2070   case OMPRTL__kmpc_cancellationpoint: {
2071     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2072     // global_tid, kmp_int32 cncl_kind)
2073     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2074     auto *FnTy =
2075         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2076     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2077     break;
2078   }
2079   case OMPRTL__kmpc_cancel: {
2080     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2081     // kmp_int32 cncl_kind)
2082     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2083     auto *FnTy =
2084         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2085     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2086     break;
2087   }
2088   case OMPRTL__kmpc_push_num_teams: {
2089     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2090     // kmp_int32 num_teams, kmp_int32 num_threads)
2091     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2092         CGM.Int32Ty};
2093     auto *FnTy =
2094         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2095     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2096     break;
2097   }
2098   case OMPRTL__kmpc_fork_teams: {
2099     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2100     // microtask, ...);
2101     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2102                                 getKmpc_MicroPointerTy()};
2103     auto *FnTy =
2104         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2105     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2106     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2107       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2108         llvm::LLVMContext &Ctx = F->getContext();
2109         llvm::MDBuilder MDB(Ctx);
2110         // Annotate the callback behavior of the __kmpc_fork_teams:
2111         //  - The callback callee is argument number 2 (microtask).
2112         //  - The first two arguments of the callback callee are unknown (-1).
2113         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2114         //    callback callee.
2115         F->addMetadata(
2116             llvm::LLVMContext::MD_callback,
2117             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2118                                         2, {-1, -1},
2119                                         /* VarArgsArePassed */ true)}));
2120       }
2121     }
2122     break;
2123   }
2124   case OMPRTL__kmpc_taskloop: {
2125     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2126     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2127     // sched, kmp_uint64 grainsize, void *task_dup);
2128     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2129                                 CGM.IntTy,
2130                                 CGM.VoidPtrTy,
2131                                 CGM.IntTy,
2132                                 CGM.Int64Ty->getPointerTo(),
2133                                 CGM.Int64Ty->getPointerTo(),
2134                                 CGM.Int64Ty,
2135                                 CGM.IntTy,
2136                                 CGM.IntTy,
2137                                 CGM.Int64Ty,
2138                                 CGM.VoidPtrTy};
2139     auto *FnTy =
2140         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2141     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2142     break;
2143   }
2144   case OMPRTL__kmpc_doacross_init: {
2145     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2146     // num_dims, struct kmp_dim *dims);
2147     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2148                                 CGM.Int32Ty,
2149                                 CGM.Int32Ty,
2150                                 CGM.VoidPtrTy};
2151     auto *FnTy =
2152         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2153     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2154     break;
2155   }
2156   case OMPRTL__kmpc_doacross_fini: {
2157     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2158     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2159     auto *FnTy =
2160         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2161     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2162     break;
2163   }
2164   case OMPRTL__kmpc_doacross_post: {
2165     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2166     // *vec);
2167     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2168                                 CGM.Int64Ty->getPointerTo()};
2169     auto *FnTy =
2170         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2171     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2172     break;
2173   }
2174   case OMPRTL__kmpc_doacross_wait: {
2175     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2176     // *vec);
2177     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2178                                 CGM.Int64Ty->getPointerTo()};
2179     auto *FnTy =
2180         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2181     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2182     break;
2183   }
2184   case OMPRTL__kmpc_task_reduction_init: {
2185     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2186     // *data);
2187     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2188     auto *FnTy =
2189         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2190     RTLFn =
2191         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2192     break;
2193   }
2194   case OMPRTL__kmpc_task_reduction_get_th_data: {
2195     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2196     // *d);
2197     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2198     auto *FnTy =
2199         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2200     RTLFn = CGM.CreateRuntimeFunction(
2201         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2202     break;
2203   }
2204   case OMPRTL__kmpc_alloc: {
2205     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2206     // al); omp_allocator_handle_t type is void *.
2207     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2208     auto *FnTy =
2209         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2210     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2211     break;
2212   }
2213   case OMPRTL__kmpc_free: {
2214     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2215     // al); omp_allocator_handle_t type is void *.
2216     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2217     auto *FnTy =
2218         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2219     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2220     break;
2221   }
2222   case OMPRTL__kmpc_push_target_tripcount: {
2223     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2224     // size);
2225     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2226     llvm::FunctionType *FnTy =
2227         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2228     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2229     break;
2230   }
2231   case OMPRTL__tgt_target: {
2232     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2233     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2234     // *arg_types);
2235     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2236                                 CGM.VoidPtrTy,
2237                                 CGM.Int32Ty,
2238                                 CGM.VoidPtrPtrTy,
2239                                 CGM.VoidPtrPtrTy,
2240                                 CGM.SizeTy->getPointerTo(),
2241                                 CGM.Int64Ty->getPointerTo()};
2242     auto *FnTy =
2243         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2244     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2245     break;
2246   }
2247   case OMPRTL__tgt_target_nowait: {
2248     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2249     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2250     // int64_t *arg_types);
2251     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2252                                 CGM.VoidPtrTy,
2253                                 CGM.Int32Ty,
2254                                 CGM.VoidPtrPtrTy,
2255                                 CGM.VoidPtrPtrTy,
2256                                 CGM.SizeTy->getPointerTo(),
2257                                 CGM.Int64Ty->getPointerTo()};
2258     auto *FnTy =
2259         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2260     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2261     break;
2262   }
2263   case OMPRTL__tgt_target_teams: {
2264     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2265     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2266     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2267     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2268                                 CGM.VoidPtrTy,
2269                                 CGM.Int32Ty,
2270                                 CGM.VoidPtrPtrTy,
2271                                 CGM.VoidPtrPtrTy,
2272                                 CGM.SizeTy->getPointerTo(),
2273                                 CGM.Int64Ty->getPointerTo(),
2274                                 CGM.Int32Ty,
2275                                 CGM.Int32Ty};
2276     auto *FnTy =
2277         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2278     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2279     break;
2280   }
2281   case OMPRTL__tgt_target_teams_nowait: {
2282     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2283     // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2284     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2285     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2286                                 CGM.VoidPtrTy,
2287                                 CGM.Int32Ty,
2288                                 CGM.VoidPtrPtrTy,
2289                                 CGM.VoidPtrPtrTy,
2290                                 CGM.SizeTy->getPointerTo(),
2291                                 CGM.Int64Ty->getPointerTo(),
2292                                 CGM.Int32Ty,
2293                                 CGM.Int32Ty};
2294     auto *FnTy =
2295         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2296     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2297     break;
2298   }
2299   case OMPRTL__tgt_register_lib: {
2300     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2301     QualType ParamTy =
2302         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2303     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2304     auto *FnTy =
2305         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2306     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2307     break;
2308   }
2309   case OMPRTL__tgt_unregister_lib: {
2310     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2311     QualType ParamTy =
2312         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2313     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2314     auto *FnTy =
2315         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2316     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2317     break;
2318   }
2319   case OMPRTL__tgt_target_data_begin: {
2320     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2321     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2322     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2323                                 CGM.Int32Ty,
2324                                 CGM.VoidPtrPtrTy,
2325                                 CGM.VoidPtrPtrTy,
2326                                 CGM.SizeTy->getPointerTo(),
2327                                 CGM.Int64Ty->getPointerTo()};
2328     auto *FnTy =
2329         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2330     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2331     break;
2332   }
2333   case OMPRTL__tgt_target_data_begin_nowait: {
2334     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2335     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2336     // *arg_types);
2337     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2338                                 CGM.Int32Ty,
2339                                 CGM.VoidPtrPtrTy,
2340                                 CGM.VoidPtrPtrTy,
2341                                 CGM.SizeTy->getPointerTo(),
2342                                 CGM.Int64Ty->getPointerTo()};
2343     auto *FnTy =
2344         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2345     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2346     break;
2347   }
2348   case OMPRTL__tgt_target_data_end: {
2349     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2350     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2351     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2352                                 CGM.Int32Ty,
2353                                 CGM.VoidPtrPtrTy,
2354                                 CGM.VoidPtrPtrTy,
2355                                 CGM.SizeTy->getPointerTo(),
2356                                 CGM.Int64Ty->getPointerTo()};
2357     auto *FnTy =
2358         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2359     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2360     break;
2361   }
2362   case OMPRTL__tgt_target_data_end_nowait: {
2363     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2364     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2365     // *arg_types);
2366     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2367                                 CGM.Int32Ty,
2368                                 CGM.VoidPtrPtrTy,
2369                                 CGM.VoidPtrPtrTy,
2370                                 CGM.SizeTy->getPointerTo(),
2371                                 CGM.Int64Ty->getPointerTo()};
2372     auto *FnTy =
2373         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2374     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2375     break;
2376   }
2377   case OMPRTL__tgt_target_data_update: {
2378     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2379     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2380     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2381                                 CGM.Int32Ty,
2382                                 CGM.VoidPtrPtrTy,
2383                                 CGM.VoidPtrPtrTy,
2384                                 CGM.SizeTy->getPointerTo(),
2385                                 CGM.Int64Ty->getPointerTo()};
2386     auto *FnTy =
2387         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2388     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2389     break;
2390   }
2391   case OMPRTL__tgt_target_data_update_nowait: {
2392     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2393     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2394     // *arg_types);
2395     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2396                                 CGM.Int32Ty,
2397                                 CGM.VoidPtrPtrTy,
2398                                 CGM.VoidPtrPtrTy,
2399                                 CGM.SizeTy->getPointerTo(),
2400                                 CGM.Int64Ty->getPointerTo()};
2401     auto *FnTy =
2402         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2403     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2404     break;
2405   }
2406   }
2407   assert(RTLFn && "Unable to find OpenMP runtime function");
2408   return RTLFn;
2409 }
2410 
2411 llvm::FunctionCallee
2412 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2413   assert((IVSize == 32 || IVSize == 64) &&
2414          "IV size is not compatible with the omp runtime");
2415   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2416                                             : "__kmpc_for_static_init_4u")
2417                                 : (IVSigned ? "__kmpc_for_static_init_8"
2418                                             : "__kmpc_for_static_init_8u");
2419   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2420   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2421   llvm::Type *TypeParams[] = {
2422     getIdentTyPointerTy(),                     // loc
2423     CGM.Int32Ty,                               // tid
2424     CGM.Int32Ty,                               // schedtype
2425     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2426     PtrTy,                                     // p_lower
2427     PtrTy,                                     // p_upper
2428     PtrTy,                                     // p_stride
2429     ITy,                                       // incr
2430     ITy                                        // chunk
2431   };
2432   auto *FnTy =
2433       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2434   return CGM.CreateRuntimeFunction(FnTy, Name);
2435 }
2436 
2437 llvm::FunctionCallee
2438 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2439   assert((IVSize == 32 || IVSize == 64) &&
2440          "IV size is not compatible with the omp runtime");
2441   StringRef Name =
2442       IVSize == 32
2443           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2444           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2445   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2446   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2447                                CGM.Int32Ty,           // tid
2448                                CGM.Int32Ty,           // schedtype
2449                                ITy,                   // lower
2450                                ITy,                   // upper
2451                                ITy,                   // stride
2452                                ITy                    // chunk
2453   };
2454   auto *FnTy =
2455       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2456   return CGM.CreateRuntimeFunction(FnTy, Name);
2457 }
2458 
2459 llvm::FunctionCallee
2460 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2461   assert((IVSize == 32 || IVSize == 64) &&
2462          "IV size is not compatible with the omp runtime");
2463   StringRef Name =
2464       IVSize == 32
2465           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2466           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2467   llvm::Type *TypeParams[] = {
2468       getIdentTyPointerTy(), // loc
2469       CGM.Int32Ty,           // tid
2470   };
2471   auto *FnTy =
2472       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2473   return CGM.CreateRuntimeFunction(FnTy, Name);
2474 }
2475 
2476 llvm::FunctionCallee
2477 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2478   assert((IVSize == 32 || IVSize == 64) &&
2479          "IV size is not compatible with the omp runtime");
2480   StringRef Name =
2481       IVSize == 32
2482           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2483           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2484   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2485   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2486   llvm::Type *TypeParams[] = {
2487     getIdentTyPointerTy(),                     // loc
2488     CGM.Int32Ty,                               // tid
2489     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2490     PtrTy,                                     // p_lower
2491     PtrTy,                                     // p_upper
2492     PtrTy                                      // p_stride
2493   };
2494   auto *FnTy =
2495       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2496   return CGM.CreateRuntimeFunction(FnTy, Name);
2497 }
2498 
2499 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) {
2500   if (CGM.getLangOpts().OpenMPSimd)
2501     return Address::invalid();
2502   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2503       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2504   if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
2505     SmallString<64> PtrName;
2506     {
2507       llvm::raw_svector_ostream OS(PtrName);
2508       OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
2509     }
2510     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2511     if (!Ptr) {
2512       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2513       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2514                                         PtrName);
2515       if (!CGM.getLangOpts().OpenMPIsDevice) {
2516         auto *GV = cast<llvm::GlobalVariable>(Ptr);
2517         GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2518         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2519       }
2520       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2521       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2522     }
2523     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2524   }
2525   return Address::invalid();
2526 }
2527 
2528 llvm::Constant *
2529 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2530   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2531          !CGM.getContext().getTargetInfo().isTLSSupported());
2532   // Lookup the entry, lazily creating it if necessary.
2533   std::string Suffix = getName({"cache", ""});
2534   return getOrCreateInternalVariable(
2535       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2536 }
2537 
2538 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2539                                                 const VarDecl *VD,
2540                                                 Address VDAddr,
2541                                                 SourceLocation Loc) {
2542   if (CGM.getLangOpts().OpenMPUseTLS &&
2543       CGM.getContext().getTargetInfo().isTLSSupported())
2544     return VDAddr;
2545 
2546   llvm::Type *VarTy = VDAddr.getElementType();
2547   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2548                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2549                                                        CGM.Int8PtrTy),
2550                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2551                          getOrCreateThreadPrivateCache(VD)};
2552   return Address(CGF.EmitRuntimeCall(
2553       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2554                  VDAddr.getAlignment());
2555 }
2556 
2557 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2558     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2559     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2560   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2561   // library.
2562   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2563   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2564                       OMPLoc);
2565   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2566   // to register constructor/destructor for variable.
2567   llvm::Value *Args[] = {
2568       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2569       Ctor, CopyCtor, Dtor};
2570   CGF.EmitRuntimeCall(
2571       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2572 }
2573 
2574 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2575     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2576     bool PerformInit, CodeGenFunction *CGF) {
2577   if (CGM.getLangOpts().OpenMPUseTLS &&
2578       CGM.getContext().getTargetInfo().isTLSSupported())
2579     return nullptr;
2580 
2581   VD = VD->getDefinition(CGM.getContext());
2582   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2583     QualType ASTTy = VD->getType();
2584 
2585     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2586     const Expr *Init = VD->getAnyInitializer();
2587     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2588       // Generate function that re-emits the declaration's initializer into the
2589       // threadprivate copy of the variable VD
2590       CodeGenFunction CtorCGF(CGM);
2591       FunctionArgList Args;
2592       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2593                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2594                             ImplicitParamDecl::Other);
2595       Args.push_back(&Dst);
2596 
2597       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2598           CGM.getContext().VoidPtrTy, Args);
2599       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2600       std::string Name = getName({"__kmpc_global_ctor_", ""});
2601       llvm::Function *Fn =
2602           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2603       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2604                             Args, Loc, Loc);
2605       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2606           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2607           CGM.getContext().VoidPtrTy, Dst.getLocation());
2608       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2609       Arg = CtorCGF.Builder.CreateElementBitCast(
2610           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2611       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2612                                /*IsInitializer=*/true);
2613       ArgVal = CtorCGF.EmitLoadOfScalar(
2614           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2615           CGM.getContext().VoidPtrTy, Dst.getLocation());
2616       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2617       CtorCGF.FinishFunction();
2618       Ctor = Fn;
2619     }
2620     if (VD->getType().isDestructedType() != QualType::DK_none) {
2621       // Generate function that emits destructor call for the threadprivate copy
2622       // of the variable VD
2623       CodeGenFunction DtorCGF(CGM);
2624       FunctionArgList Args;
2625       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2626                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2627                             ImplicitParamDecl::Other);
2628       Args.push_back(&Dst);
2629 
2630       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2631           CGM.getContext().VoidTy, Args);
2632       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2633       std::string Name = getName({"__kmpc_global_dtor_", ""});
2634       llvm::Function *Fn =
2635           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2636       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2637       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2638                             Loc, Loc);
2639       // Create a scope with an artificial location for the body of this function.
2640       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2641       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2642           DtorCGF.GetAddrOfLocalVar(&Dst),
2643           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2644       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2645                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2646                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2647       DtorCGF.FinishFunction();
2648       Dtor = Fn;
2649     }
2650     // Do not emit init function if it is not required.
2651     if (!Ctor && !Dtor)
2652       return nullptr;
2653 
2654     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2655     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2656                                                /*isVarArg=*/false)
2657                            ->getPointerTo();
2658     // Copying constructor for the threadprivate variable.
2659     // Must be NULL - reserved by runtime, but currently it requires that this
2660     // parameter is always NULL. Otherwise it fires assertion.
2661     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2662     if (Ctor == nullptr) {
2663       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2664                                              /*isVarArg=*/false)
2665                          ->getPointerTo();
2666       Ctor = llvm::Constant::getNullValue(CtorTy);
2667     }
2668     if (Dtor == nullptr) {
2669       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2670                                              /*isVarArg=*/false)
2671                          ->getPointerTo();
2672       Dtor = llvm::Constant::getNullValue(DtorTy);
2673     }
2674     if (!CGF) {
2675       auto *InitFunctionTy =
2676           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2677       std::string Name = getName({"__omp_threadprivate_init_", ""});
2678       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2679           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2680       CodeGenFunction InitCGF(CGM);
2681       FunctionArgList ArgList;
2682       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2683                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2684                             Loc, Loc);
2685       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2686       InitCGF.FinishFunction();
2687       return InitFunction;
2688     }
2689     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2690   }
2691   return nullptr;
2692 }
2693 
2694 /// Obtain information that uniquely identifies a target entry. This
2695 /// consists of the file and device IDs as well as line number associated with
2696 /// the relevant entry source location.
2697 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2698                                      unsigned &DeviceID, unsigned &FileID,
2699                                      unsigned &LineNum) {
2700   SourceManager &SM = C.getSourceManager();
2701 
2702   // The loc should be always valid and have a file ID (the user cannot use
2703   // #pragma directives in macros)
2704 
2705   assert(Loc.isValid() && "Source location is expected to be always valid.");
2706 
2707   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2708   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2709 
2710   llvm::sys::fs::UniqueID ID;
2711   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2712     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2713         << PLoc.getFilename() << EC.message();
2714 
2715   DeviceID = ID.getDevice();
2716   FileID = ID.getFile();
2717   LineNum = PLoc.getLine();
2718 }
2719 
2720 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2721                                                      llvm::GlobalVariable *Addr,
2722                                                      bool PerformInit) {
2723   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2724       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2725   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
2726     return CGM.getLangOpts().OpenMPIsDevice;
2727   VD = VD->getDefinition(CGM.getContext());
2728   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2729     return CGM.getLangOpts().OpenMPIsDevice;
2730 
2731   QualType ASTTy = VD->getType();
2732 
2733   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2734   // Produce the unique prefix to identify the new target regions. We use
2735   // the source location of the variable declaration which we know to not
2736   // conflict with any target region.
2737   unsigned DeviceID;
2738   unsigned FileID;
2739   unsigned Line;
2740   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2741   SmallString<128> Buffer, Out;
2742   {
2743     llvm::raw_svector_ostream OS(Buffer);
2744     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2745        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2746   }
2747 
2748   const Expr *Init = VD->getAnyInitializer();
2749   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2750     llvm::Constant *Ctor;
2751     llvm::Constant *ID;
2752     if (CGM.getLangOpts().OpenMPIsDevice) {
2753       // Generate function that re-emits the declaration's initializer into
2754       // the threadprivate copy of the variable VD
2755       CodeGenFunction CtorCGF(CGM);
2756 
2757       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2758       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2759       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2760           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2761       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2762       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2763                             FunctionArgList(), Loc, Loc);
2764       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2765       CtorCGF.EmitAnyExprToMem(Init,
2766                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2767                                Init->getType().getQualifiers(),
2768                                /*IsInitializer=*/true);
2769       CtorCGF.FinishFunction();
2770       Ctor = Fn;
2771       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2772       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2773     } else {
2774       Ctor = new llvm::GlobalVariable(
2775           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2776           llvm::GlobalValue::PrivateLinkage,
2777           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2778       ID = Ctor;
2779     }
2780 
2781     // Register the information for the entry associated with the constructor.
2782     Out.clear();
2783     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2784         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2785         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2786   }
2787   if (VD->getType().isDestructedType() != QualType::DK_none) {
2788     llvm::Constant *Dtor;
2789     llvm::Constant *ID;
2790     if (CGM.getLangOpts().OpenMPIsDevice) {
2791       // Generate function that emits destructor call for the threadprivate
2792       // copy of the variable VD
2793       CodeGenFunction DtorCGF(CGM);
2794 
2795       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2796       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2797       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2798           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2799       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2800       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2801                             FunctionArgList(), Loc, Loc);
2802       // Create a scope with an artificial location for the body of this
2803       // function.
2804       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2805       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2806                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2807                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2808       DtorCGF.FinishFunction();
2809       Dtor = Fn;
2810       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2811       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2812     } else {
2813       Dtor = new llvm::GlobalVariable(
2814           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2815           llvm::GlobalValue::PrivateLinkage,
2816           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2817       ID = Dtor;
2818     }
2819     // Register the information for the entry associated with the destructor.
2820     Out.clear();
2821     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2822         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2823         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2824   }
2825   return CGM.getLangOpts().OpenMPIsDevice;
2826 }
2827 
2828 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2829                                                           QualType VarType,
2830                                                           StringRef Name) {
2831   std::string Suffix = getName({"artificial", ""});
2832   std::string CacheSuffix = getName({"cache", ""});
2833   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2834   llvm::Value *GAddr =
2835       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2836   llvm::Value *Args[] = {
2837       emitUpdateLocation(CGF, SourceLocation()),
2838       getThreadID(CGF, SourceLocation()),
2839       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2840       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2841                                 /*IsSigned=*/false),
2842       getOrCreateInternalVariable(
2843           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2844   return Address(
2845       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2846           CGF.EmitRuntimeCall(
2847               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2848           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2849       CGM.getPointerAlign());
2850 }
2851 
2852 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2853                                       const RegionCodeGenTy &ThenGen,
2854                                       const RegionCodeGenTy &ElseGen) {
2855   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2856 
2857   // If the condition constant folds and can be elided, try to avoid emitting
2858   // the condition and the dead arm of the if/else.
2859   bool CondConstant;
2860   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2861     if (CondConstant)
2862       ThenGen(CGF);
2863     else
2864       ElseGen(CGF);
2865     return;
2866   }
2867 
2868   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2869   // emit the conditional branch.
2870   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2871   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2872   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2873   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2874 
2875   // Emit the 'then' code.
2876   CGF.EmitBlock(ThenBlock);
2877   ThenGen(CGF);
2878   CGF.EmitBranch(ContBlock);
2879   // Emit the 'else' code if present.
2880   // There is no need to emit line number for unconditional branch.
2881   (void)ApplyDebugLocation::CreateEmpty(CGF);
2882   CGF.EmitBlock(ElseBlock);
2883   ElseGen(CGF);
2884   // There is no need to emit line number for unconditional branch.
2885   (void)ApplyDebugLocation::CreateEmpty(CGF);
2886   CGF.EmitBranch(ContBlock);
2887   // Emit the continuation block for code after the if.
2888   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2889 }
2890 
2891 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2892                                        llvm::Function *OutlinedFn,
2893                                        ArrayRef<llvm::Value *> CapturedVars,
2894                                        const Expr *IfCond) {
2895   if (!CGF.HaveInsertPoint())
2896     return;
2897   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2898   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2899                                                      PrePostActionTy &) {
2900     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2901     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2902     llvm::Value *Args[] = {
2903         RTLoc,
2904         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2905         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2906     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2907     RealArgs.append(std::begin(Args), std::end(Args));
2908     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2909 
2910     llvm::FunctionCallee RTLFn =
2911         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2912     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2913   };
2914   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2915                                                           PrePostActionTy &) {
2916     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2917     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2918     // Build calls:
2919     // __kmpc_serialized_parallel(&Loc, GTid);
2920     llvm::Value *Args[] = {RTLoc, ThreadID};
2921     CGF.EmitRuntimeCall(
2922         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2923 
2924     // OutlinedFn(&GTid, &zero, CapturedStruct);
2925     Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2926                                                         /*Name*/ ".zero.addr");
2927     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2928     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2929     // ThreadId for serialized parallels is 0.
2930     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2931     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2932     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2933     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2934 
2935     // __kmpc_end_serialized_parallel(&Loc, GTid);
2936     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2937     CGF.EmitRuntimeCall(
2938         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2939         EndArgs);
2940   };
2941   if (IfCond) {
2942     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2943   } else {
2944     RegionCodeGenTy ThenRCG(ThenGen);
2945     ThenRCG(CGF);
2946   }
2947 }
2948 
2949 // If we're inside an (outlined) parallel region, use the region info's
2950 // thread-ID variable (it is passed in a first argument of the outlined function
2951 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2952 // regular serial code region, get thread ID by calling kmp_int32
2953 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2954 // return the address of that temp.
2955 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2956                                              SourceLocation Loc) {
2957   if (auto *OMPRegionInfo =
2958           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2959     if (OMPRegionInfo->getThreadIDVariable())
2960       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2961 
2962   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2963   QualType Int32Ty =
2964       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2965   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2966   CGF.EmitStoreOfScalar(ThreadID,
2967                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2968 
2969   return ThreadIDTemp;
2970 }
2971 
2972 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2973     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2974   SmallString<256> Buffer;
2975   llvm::raw_svector_ostream Out(Buffer);
2976   Out << Name;
2977   StringRef RuntimeName = Out.str();
2978   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2979   if (Elem.second) {
2980     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2981            "OMP internal variable has different type than requested");
2982     return &*Elem.second;
2983   }
2984 
2985   return Elem.second = new llvm::GlobalVariable(
2986              CGM.getModule(), Ty, /*IsConstant*/ false,
2987              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2988              Elem.first(), /*InsertBefore=*/nullptr,
2989              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2990 }
2991 
2992 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2993   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2994   std::string Name = getName({Prefix, "var"});
2995   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2996 }
2997 
2998 namespace {
2999 /// Common pre(post)-action for different OpenMP constructs.
3000 class CommonActionTy final : public PrePostActionTy {
3001   llvm::FunctionCallee EnterCallee;
3002   ArrayRef<llvm::Value *> EnterArgs;
3003   llvm::FunctionCallee ExitCallee;
3004   ArrayRef<llvm::Value *> ExitArgs;
3005   bool Conditional;
3006   llvm::BasicBlock *ContBlock = nullptr;
3007 
3008 public:
3009   CommonActionTy(llvm::FunctionCallee EnterCallee,
3010                  ArrayRef<llvm::Value *> EnterArgs,
3011                  llvm::FunctionCallee ExitCallee,
3012                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3013       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3014         ExitArgs(ExitArgs), Conditional(Conditional) {}
3015   void Enter(CodeGenFunction &CGF) override {
3016     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3017     if (Conditional) {
3018       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3019       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3020       ContBlock = CGF.createBasicBlock("omp_if.end");
3021       // Generate the branch (If-stmt)
3022       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3023       CGF.EmitBlock(ThenBlock);
3024     }
3025   }
3026   void Done(CodeGenFunction &CGF) {
3027     // Emit the rest of blocks/branches
3028     CGF.EmitBranch(ContBlock);
3029     CGF.EmitBlock(ContBlock, true);
3030   }
3031   void Exit(CodeGenFunction &CGF) override {
3032     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3033   }
3034 };
3035 } // anonymous namespace
3036 
3037 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3038                                          StringRef CriticalName,
3039                                          const RegionCodeGenTy &CriticalOpGen,
3040                                          SourceLocation Loc, const Expr *Hint) {
3041   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3042   // CriticalOpGen();
3043   // __kmpc_end_critical(ident_t *, gtid, Lock);
3044   // Prepare arguments and build a call to __kmpc_critical
3045   if (!CGF.HaveInsertPoint())
3046     return;
3047   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3048                          getCriticalRegionLock(CriticalName)};
3049   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3050                                                 std::end(Args));
3051   if (Hint) {
3052     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3053         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3054   }
3055   CommonActionTy Action(
3056       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3057                                  : OMPRTL__kmpc_critical),
3058       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3059   CriticalOpGen.setAction(Action);
3060   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3061 }
3062 
3063 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3064                                        const RegionCodeGenTy &MasterOpGen,
3065                                        SourceLocation Loc) {
3066   if (!CGF.HaveInsertPoint())
3067     return;
3068   // if(__kmpc_master(ident_t *, gtid)) {
3069   //   MasterOpGen();
3070   //   __kmpc_end_master(ident_t *, gtid);
3071   // }
3072   // Prepare arguments and build a call to __kmpc_master
3073   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3074   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3075                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3076                         /*Conditional=*/true);
3077   MasterOpGen.setAction(Action);
3078   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3079   Action.Done(CGF);
3080 }
3081 
3082 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3083                                         SourceLocation Loc) {
3084   if (!CGF.HaveInsertPoint())
3085     return;
3086   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3087   llvm::Value *Args[] = {
3088       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3089       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3090   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3091   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3092     Region->emitUntiedSwitch(CGF);
3093 }
3094 
3095 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3096                                           const RegionCodeGenTy &TaskgroupOpGen,
3097                                           SourceLocation Loc) {
3098   if (!CGF.HaveInsertPoint())
3099     return;
3100   // __kmpc_taskgroup(ident_t *, gtid);
3101   // TaskgroupOpGen();
3102   // __kmpc_end_taskgroup(ident_t *, gtid);
3103   // Prepare arguments and build a call to __kmpc_taskgroup
3104   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3105   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3106                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3107                         Args);
3108   TaskgroupOpGen.setAction(Action);
3109   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3110 }
3111 
3112 /// Given an array of pointers to variables, project the address of a
3113 /// given variable.
3114 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3115                                       unsigned Index, const VarDecl *Var) {
3116   // Pull out the pointer to the variable.
3117   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3118   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3119 
3120   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3121   Addr = CGF.Builder.CreateElementBitCast(
3122       Addr, CGF.ConvertTypeForMem(Var->getType()));
3123   return Addr;
3124 }
3125 
3126 static llvm::Value *emitCopyprivateCopyFunction(
3127     CodeGenModule &CGM, llvm::Type *ArgsType,
3128     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3129     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3130     SourceLocation Loc) {
3131   ASTContext &C = CGM.getContext();
3132   // void copy_func(void *LHSArg, void *RHSArg);
3133   FunctionArgList Args;
3134   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3135                            ImplicitParamDecl::Other);
3136   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3137                            ImplicitParamDecl::Other);
3138   Args.push_back(&LHSArg);
3139   Args.push_back(&RHSArg);
3140   const auto &CGFI =
3141       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3142   std::string Name =
3143       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3144   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3145                                     llvm::GlobalValue::InternalLinkage, Name,
3146                                     &CGM.getModule());
3147   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3148   Fn->setDoesNotRecurse();
3149   CodeGenFunction CGF(CGM);
3150   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3151   // Dest = (void*[n])(LHSArg);
3152   // Src = (void*[n])(RHSArg);
3153   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3154       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3155       ArgsType), CGF.getPointerAlign());
3156   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3157       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3158       ArgsType), CGF.getPointerAlign());
3159   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3160   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3161   // ...
3162   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3163   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3164     const auto *DestVar =
3165         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3166     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3167 
3168     const auto *SrcVar =
3169         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3170     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3171 
3172     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3173     QualType Type = VD->getType();
3174     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3175   }
3176   CGF.FinishFunction();
3177   return Fn;
3178 }
3179 
3180 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3181                                        const RegionCodeGenTy &SingleOpGen,
3182                                        SourceLocation Loc,
3183                                        ArrayRef<const Expr *> CopyprivateVars,
3184                                        ArrayRef<const Expr *> SrcExprs,
3185                                        ArrayRef<const Expr *> DstExprs,
3186                                        ArrayRef<const Expr *> AssignmentOps) {
3187   if (!CGF.HaveInsertPoint())
3188     return;
3189   assert(CopyprivateVars.size() == SrcExprs.size() &&
3190          CopyprivateVars.size() == DstExprs.size() &&
3191          CopyprivateVars.size() == AssignmentOps.size());
3192   ASTContext &C = CGM.getContext();
3193   // int32 did_it = 0;
3194   // if(__kmpc_single(ident_t *, gtid)) {
3195   //   SingleOpGen();
3196   //   __kmpc_end_single(ident_t *, gtid);
3197   //   did_it = 1;
3198   // }
3199   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3200   // <copy_func>, did_it);
3201 
3202   Address DidIt = Address::invalid();
3203   if (!CopyprivateVars.empty()) {
3204     // int32 did_it = 0;
3205     QualType KmpInt32Ty =
3206         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3207     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3208     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3209   }
3210   // Prepare arguments and build a call to __kmpc_single
3211   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3212   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3213                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3214                         /*Conditional=*/true);
3215   SingleOpGen.setAction(Action);
3216   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3217   if (DidIt.isValid()) {
3218     // did_it = 1;
3219     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3220   }
3221   Action.Done(CGF);
3222   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3223   // <copy_func>, did_it);
3224   if (DidIt.isValid()) {
3225     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3226     QualType CopyprivateArrayTy =
3227         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3228                                /*IndexTypeQuals=*/0);
3229     // Create a list of all private variables for copyprivate.
3230     Address CopyprivateList =
3231         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3232     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3233       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3234       CGF.Builder.CreateStore(
3235           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3236               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3237           Elem);
3238     }
3239     // Build function that copies private values from single region to all other
3240     // threads in the corresponding parallel region.
3241     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3242         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3243         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3244     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3245     Address CL =
3246       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3247                                                       CGF.VoidPtrTy);
3248     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3249     llvm::Value *Args[] = {
3250         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3251         getThreadID(CGF, Loc),        // i32 <gtid>
3252         BufSize,                      // size_t <buf_size>
3253         CL.getPointer(),              // void *<copyprivate list>
3254         CpyFn,                        // void (*) (void *, void *) <copy_func>
3255         DidItVal                      // i32 did_it
3256     };
3257     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3258   }
3259 }
3260 
3261 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3262                                         const RegionCodeGenTy &OrderedOpGen,
3263                                         SourceLocation Loc, bool IsThreads) {
3264   if (!CGF.HaveInsertPoint())
3265     return;
3266   // __kmpc_ordered(ident_t *, gtid);
3267   // OrderedOpGen();
3268   // __kmpc_end_ordered(ident_t *, gtid);
3269   // Prepare arguments and build a call to __kmpc_ordered
3270   if (IsThreads) {
3271     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3272     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3273                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3274                           Args);
3275     OrderedOpGen.setAction(Action);
3276     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3277     return;
3278   }
3279   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3280 }
3281 
3282 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3283   unsigned Flags;
3284   if (Kind == OMPD_for)
3285     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3286   else if (Kind == OMPD_sections)
3287     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3288   else if (Kind == OMPD_single)
3289     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3290   else if (Kind == OMPD_barrier)
3291     Flags = OMP_IDENT_BARRIER_EXPL;
3292   else
3293     Flags = OMP_IDENT_BARRIER_IMPL;
3294   return Flags;
3295 }
3296 
3297 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3298     CodeGenFunction &CGF, const OMPLoopDirective &S,
3299     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3300   // Check if the loop directive is actually a doacross loop directive. In this
3301   // case choose static, 1 schedule.
3302   if (llvm::any_of(
3303           S.getClausesOfKind<OMPOrderedClause>(),
3304           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3305     ScheduleKind = OMPC_SCHEDULE_static;
3306     // Chunk size is 1 in this case.
3307     llvm::APInt ChunkSize(32, 1);
3308     ChunkExpr = IntegerLiteral::Create(
3309         CGF.getContext(), ChunkSize,
3310         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3311         SourceLocation());
3312   }
3313 }
3314 
3315 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3316                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3317                                       bool ForceSimpleCall) {
3318   if (!CGF.HaveInsertPoint())
3319     return;
3320   // Build call __kmpc_cancel_barrier(loc, thread_id);
3321   // Build call __kmpc_barrier(loc, thread_id);
3322   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3323   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3324   // thread_id);
3325   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3326                          getThreadID(CGF, Loc)};
3327   if (auto *OMPRegionInfo =
3328           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3329     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3330       llvm::Value *Result = CGF.EmitRuntimeCall(
3331           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3332       if (EmitChecks) {
3333         // if (__kmpc_cancel_barrier()) {
3334         //   exit from construct;
3335         // }
3336         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3337         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3338         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3339         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3340         CGF.EmitBlock(ExitBB);
3341         //   exit from construct;
3342         CodeGenFunction::JumpDest CancelDestination =
3343             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3344         CGF.EmitBranchThroughCleanup(CancelDestination);
3345         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3346       }
3347       return;
3348     }
3349   }
3350   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3351 }
3352 
3353 /// Map the OpenMP loop schedule to the runtime enumeration.
3354 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3355                                           bool Chunked, bool Ordered) {
3356   switch (ScheduleKind) {
3357   case OMPC_SCHEDULE_static:
3358     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3359                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3360   case OMPC_SCHEDULE_dynamic:
3361     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3362   case OMPC_SCHEDULE_guided:
3363     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3364   case OMPC_SCHEDULE_runtime:
3365     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3366   case OMPC_SCHEDULE_auto:
3367     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3368   case OMPC_SCHEDULE_unknown:
3369     assert(!Chunked && "chunk was specified but schedule kind not known");
3370     return Ordered ? OMP_ord_static : OMP_sch_static;
3371   }
3372   llvm_unreachable("Unexpected runtime schedule");
3373 }
3374 
3375 /// Map the OpenMP distribute schedule to the runtime enumeration.
3376 static OpenMPSchedType
3377 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3378   // only static is allowed for dist_schedule
3379   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3380 }
3381 
3382 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3383                                          bool Chunked) const {
3384   OpenMPSchedType Schedule =
3385       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3386   return Schedule == OMP_sch_static;
3387 }
3388 
3389 bool CGOpenMPRuntime::isStaticNonchunked(
3390     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3391   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3392   return Schedule == OMP_dist_sch_static;
3393 }
3394 
3395 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3396                                       bool Chunked) const {
3397   OpenMPSchedType Schedule =
3398       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3399   return Schedule == OMP_sch_static_chunked;
3400 }
3401 
3402 bool CGOpenMPRuntime::isStaticChunked(
3403     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3404   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3405   return Schedule == OMP_dist_sch_static_chunked;
3406 }
3407 
3408 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3409   OpenMPSchedType Schedule =
3410       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3411   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3412   return Schedule != OMP_sch_static;
3413 }
3414 
3415 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3416                                   OpenMPScheduleClauseModifier M1,
3417                                   OpenMPScheduleClauseModifier M2) {
3418   int Modifier = 0;
3419   switch (M1) {
3420   case OMPC_SCHEDULE_MODIFIER_monotonic:
3421     Modifier = OMP_sch_modifier_monotonic;
3422     break;
3423   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3424     Modifier = OMP_sch_modifier_nonmonotonic;
3425     break;
3426   case OMPC_SCHEDULE_MODIFIER_simd:
3427     if (Schedule == OMP_sch_static_chunked)
3428       Schedule = OMP_sch_static_balanced_chunked;
3429     break;
3430   case OMPC_SCHEDULE_MODIFIER_last:
3431   case OMPC_SCHEDULE_MODIFIER_unknown:
3432     break;
3433   }
3434   switch (M2) {
3435   case OMPC_SCHEDULE_MODIFIER_monotonic:
3436     Modifier = OMP_sch_modifier_monotonic;
3437     break;
3438   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3439     Modifier = OMP_sch_modifier_nonmonotonic;
3440     break;
3441   case OMPC_SCHEDULE_MODIFIER_simd:
3442     if (Schedule == OMP_sch_static_chunked)
3443       Schedule = OMP_sch_static_balanced_chunked;
3444     break;
3445   case OMPC_SCHEDULE_MODIFIER_last:
3446   case OMPC_SCHEDULE_MODIFIER_unknown:
3447     break;
3448   }
3449   return Schedule | Modifier;
3450 }
3451 
3452 void CGOpenMPRuntime::emitForDispatchInit(
3453     CodeGenFunction &CGF, SourceLocation Loc,
3454     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3455     bool Ordered, const DispatchRTInput &DispatchValues) {
3456   if (!CGF.HaveInsertPoint())
3457     return;
3458   OpenMPSchedType Schedule = getRuntimeSchedule(
3459       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3460   assert(Ordered ||
3461          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3462           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3463           Schedule != OMP_sch_static_balanced_chunked));
3464   // Call __kmpc_dispatch_init(
3465   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3466   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3467   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3468 
3469   // If the Chunk was not specified in the clause - use default value 1.
3470   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3471                                             : CGF.Builder.getIntN(IVSize, 1);
3472   llvm::Value *Args[] = {
3473       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3474       CGF.Builder.getInt32(addMonoNonMonoModifier(
3475           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3476       DispatchValues.LB,                                // Lower
3477       DispatchValues.UB,                                // Upper
3478       CGF.Builder.getIntN(IVSize, 1),                   // Stride
3479       Chunk                                             // Chunk
3480   };
3481   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3482 }
3483 
3484 static void emitForStaticInitCall(
3485     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3486     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3487     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3488     const CGOpenMPRuntime::StaticRTInput &Values) {
3489   if (!CGF.HaveInsertPoint())
3490     return;
3491 
3492   assert(!Values.Ordered);
3493   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3494          Schedule == OMP_sch_static_balanced_chunked ||
3495          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3496          Schedule == OMP_dist_sch_static ||
3497          Schedule == OMP_dist_sch_static_chunked);
3498 
3499   // Call __kmpc_for_static_init(
3500   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3501   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3502   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3503   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3504   llvm::Value *Chunk = Values.Chunk;
3505   if (Chunk == nullptr) {
3506     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3507             Schedule == OMP_dist_sch_static) &&
3508            "expected static non-chunked schedule");
3509     // If the Chunk was not specified in the clause - use default value 1.
3510     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3511   } else {
3512     assert((Schedule == OMP_sch_static_chunked ||
3513             Schedule == OMP_sch_static_balanced_chunked ||
3514             Schedule == OMP_ord_static_chunked ||
3515             Schedule == OMP_dist_sch_static_chunked) &&
3516            "expected static chunked schedule");
3517   }
3518   llvm::Value *Args[] = {
3519       UpdateLocation,
3520       ThreadId,
3521       CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3522                                                   M2)), // Schedule type
3523       Values.IL.getPointer(),                           // &isLastIter
3524       Values.LB.getPointer(),                           // &LB
3525       Values.UB.getPointer(),                           // &UB
3526       Values.ST.getPointer(),                           // &Stride
3527       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3528       Chunk                                             // Chunk
3529   };
3530   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3531 }
3532 
3533 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3534                                         SourceLocation Loc,
3535                                         OpenMPDirectiveKind DKind,
3536                                         const OpenMPScheduleTy &ScheduleKind,
3537                                         const StaticRTInput &Values) {
3538   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3539       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3540   assert(isOpenMPWorksharingDirective(DKind) &&
3541          "Expected loop-based or sections-based directive.");
3542   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3543                                              isOpenMPLoopDirective(DKind)
3544                                                  ? OMP_IDENT_WORK_LOOP
3545                                                  : OMP_IDENT_WORK_SECTIONS);
3546   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3547   llvm::FunctionCallee StaticInitFunction =
3548       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3549   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3550                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3551 }
3552 
3553 void CGOpenMPRuntime::emitDistributeStaticInit(
3554     CodeGenFunction &CGF, SourceLocation Loc,
3555     OpenMPDistScheduleClauseKind SchedKind,
3556     const CGOpenMPRuntime::StaticRTInput &Values) {
3557   OpenMPSchedType ScheduleNum =
3558       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3559   llvm::Value *UpdatedLocation =
3560       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3561   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3562   llvm::FunctionCallee StaticInitFunction =
3563       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3564   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3565                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3566                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3567 }
3568 
3569 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3570                                           SourceLocation Loc,
3571                                           OpenMPDirectiveKind DKind) {
3572   if (!CGF.HaveInsertPoint())
3573     return;
3574   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3575   llvm::Value *Args[] = {
3576       emitUpdateLocation(CGF, Loc,
3577                          isOpenMPDistributeDirective(DKind)
3578                              ? OMP_IDENT_WORK_DISTRIBUTE
3579                              : isOpenMPLoopDirective(DKind)
3580                                    ? OMP_IDENT_WORK_LOOP
3581                                    : OMP_IDENT_WORK_SECTIONS),
3582       getThreadID(CGF, Loc)};
3583   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3584                       Args);
3585 }
3586 
3587 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3588                                                  SourceLocation Loc,
3589                                                  unsigned IVSize,
3590                                                  bool IVSigned) {
3591   if (!CGF.HaveInsertPoint())
3592     return;
3593   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3594   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3595   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3596 }
3597 
3598 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3599                                           SourceLocation Loc, unsigned IVSize,
3600                                           bool IVSigned, Address IL,
3601                                           Address LB, Address UB,
3602                                           Address ST) {
3603   // Call __kmpc_dispatch_next(
3604   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3605   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3606   //          kmp_int[32|64] *p_stride);
3607   llvm::Value *Args[] = {
3608       emitUpdateLocation(CGF, Loc),
3609       getThreadID(CGF, Loc),
3610       IL.getPointer(), // &isLastIter
3611       LB.getPointer(), // &Lower
3612       UB.getPointer(), // &Upper
3613       ST.getPointer()  // &Stride
3614   };
3615   llvm::Value *Call =
3616       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3617   return CGF.EmitScalarConversion(
3618       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3619       CGF.getContext().BoolTy, Loc);
3620 }
3621 
3622 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3623                                            llvm::Value *NumThreads,
3624                                            SourceLocation Loc) {
3625   if (!CGF.HaveInsertPoint())
3626     return;
3627   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3628   llvm::Value *Args[] = {
3629       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3630       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3631   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3632                       Args);
3633 }
3634 
3635 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3636                                          OpenMPProcBindClauseKind ProcBind,
3637                                          SourceLocation Loc) {
3638   if (!CGF.HaveInsertPoint())
3639     return;
3640   // Constants for proc bind value accepted by the runtime.
3641   enum ProcBindTy {
3642     ProcBindFalse = 0,
3643     ProcBindTrue,
3644     ProcBindMaster,
3645     ProcBindClose,
3646     ProcBindSpread,
3647     ProcBindIntel,
3648     ProcBindDefault
3649   } RuntimeProcBind;
3650   switch (ProcBind) {
3651   case OMPC_PROC_BIND_master:
3652     RuntimeProcBind = ProcBindMaster;
3653     break;
3654   case OMPC_PROC_BIND_close:
3655     RuntimeProcBind = ProcBindClose;
3656     break;
3657   case OMPC_PROC_BIND_spread:
3658     RuntimeProcBind = ProcBindSpread;
3659     break;
3660   case OMPC_PROC_BIND_unknown:
3661     llvm_unreachable("Unsupported proc_bind value.");
3662   }
3663   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3664   llvm::Value *Args[] = {
3665       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3666       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3667   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3668 }
3669 
3670 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3671                                 SourceLocation Loc) {
3672   if (!CGF.HaveInsertPoint())
3673     return;
3674   // Build call void __kmpc_flush(ident_t *loc)
3675   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3676                       emitUpdateLocation(CGF, Loc));
3677 }
3678 
3679 namespace {
3680 /// Indexes of fields for type kmp_task_t.
3681 enum KmpTaskTFields {
3682   /// List of shared variables.
3683   KmpTaskTShareds,
3684   /// Task routine.
3685   KmpTaskTRoutine,
3686   /// Partition id for the untied tasks.
3687   KmpTaskTPartId,
3688   /// Function with call of destructors for private variables.
3689   Data1,
3690   /// Task priority.
3691   Data2,
3692   /// (Taskloops only) Lower bound.
3693   KmpTaskTLowerBound,
3694   /// (Taskloops only) Upper bound.
3695   KmpTaskTUpperBound,
3696   /// (Taskloops only) Stride.
3697   KmpTaskTStride,
3698   /// (Taskloops only) Is last iteration flag.
3699   KmpTaskTLastIter,
3700   /// (Taskloops only) Reduction data.
3701   KmpTaskTReductions,
3702 };
3703 } // anonymous namespace
3704 
3705 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3706   return OffloadEntriesTargetRegion.empty() &&
3707          OffloadEntriesDeviceGlobalVar.empty();
3708 }
3709 
3710 /// Initialize target region entry.
3711 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3712     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3713                                     StringRef ParentName, unsigned LineNum,
3714                                     unsigned Order) {
3715   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3716                                              "only required for the device "
3717                                              "code generation.");
3718   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3719       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3720                                    OMPTargetRegionEntryTargetRegion);
3721   ++OffloadingEntriesNum;
3722 }
3723 
3724 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3725     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3726                                   StringRef ParentName, unsigned LineNum,
3727                                   llvm::Constant *Addr, llvm::Constant *ID,
3728                                   OMPTargetRegionEntryKind Flags) {
3729   // If we are emitting code for a target, the entry is already initialized,
3730   // only has to be registered.
3731   if (CGM.getLangOpts().OpenMPIsDevice) {
3732     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3733       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3734           DiagnosticsEngine::Error,
3735           "Unable to find target region on line '%0' in the device code.");
3736       CGM.getDiags().Report(DiagID) << LineNum;
3737       return;
3738     }
3739     auto &Entry =
3740         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3741     assert(Entry.isValid() && "Entry not initialized!");
3742     Entry.setAddress(Addr);
3743     Entry.setID(ID);
3744     Entry.setFlags(Flags);
3745   } else {
3746     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3747     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3748     ++OffloadingEntriesNum;
3749   }
3750 }
3751 
3752 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3753     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3754     unsigned LineNum) const {
3755   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3756   if (PerDevice == OffloadEntriesTargetRegion.end())
3757     return false;
3758   auto PerFile = PerDevice->second.find(FileID);
3759   if (PerFile == PerDevice->second.end())
3760     return false;
3761   auto PerParentName = PerFile->second.find(ParentName);
3762   if (PerParentName == PerFile->second.end())
3763     return false;
3764   auto PerLine = PerParentName->second.find(LineNum);
3765   if (PerLine == PerParentName->second.end())
3766     return false;
3767   // Fail if this entry is already registered.
3768   if (PerLine->second.getAddress() || PerLine->second.getID())
3769     return false;
3770   return true;
3771 }
3772 
3773 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3774     const OffloadTargetRegionEntryInfoActTy &Action) {
3775   // Scan all target region entries and perform the provided action.
3776   for (const auto &D : OffloadEntriesTargetRegion)
3777     for (const auto &F : D.second)
3778       for (const auto &P : F.second)
3779         for (const auto &L : P.second)
3780           Action(D.first, F.first, P.first(), L.first, L.second);
3781 }
3782 
3783 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3784     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3785                                        OMPTargetGlobalVarEntryKind Flags,
3786                                        unsigned Order) {
3787   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3788                                              "only required for the device "
3789                                              "code generation.");
3790   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3791   ++OffloadingEntriesNum;
3792 }
3793 
3794 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3795     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3796                                      CharUnits VarSize,
3797                                      OMPTargetGlobalVarEntryKind Flags,
3798                                      llvm::GlobalValue::LinkageTypes Linkage) {
3799   if (CGM.getLangOpts().OpenMPIsDevice) {
3800     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3801     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3802            "Entry not initialized!");
3803     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3804            "Resetting with the new address.");
3805     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3806       if (Entry.getVarSize().isZero()) {
3807         Entry.setVarSize(VarSize);
3808         Entry.setLinkage(Linkage);
3809       }
3810       return;
3811     }
3812     Entry.setVarSize(VarSize);
3813     Entry.setLinkage(Linkage);
3814     Entry.setAddress(Addr);
3815   } else {
3816     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3817       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3818       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3819              "Entry not initialized!");
3820       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3821              "Resetting with the new address.");
3822       if (Entry.getVarSize().isZero()) {
3823         Entry.setVarSize(VarSize);
3824         Entry.setLinkage(Linkage);
3825       }
3826       return;
3827     }
3828     OffloadEntriesDeviceGlobalVar.try_emplace(
3829         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3830     ++OffloadingEntriesNum;
3831   }
3832 }
3833 
3834 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3835     actOnDeviceGlobalVarEntriesInfo(
3836         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3837   // Scan all target region entries and perform the provided action.
3838   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3839     Action(E.getKey(), E.getValue());
3840 }
3841 
3842 llvm::Function *
3843 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3844   // If we don't have entries or if we are emitting code for the device, we
3845   // don't need to do anything.
3846   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3847     return nullptr;
3848 
3849   llvm::Module &M = CGM.getModule();
3850   ASTContext &C = CGM.getContext();
3851 
3852   // Get list of devices we care about
3853   const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3854 
3855   // We should be creating an offloading descriptor only if there are devices
3856   // specified.
3857   assert(!Devices.empty() && "No OpenMP offloading devices??");
3858 
3859   // Create the external variables that will point to the begin and end of the
3860   // host entries section. These will be defined by the linker.
3861   llvm::Type *OffloadEntryTy =
3862       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3863   std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3864   auto *HostEntriesBegin = new llvm::GlobalVariable(
3865       M, OffloadEntryTy, /*isConstant=*/true,
3866       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3867       EntriesBeginName);
3868   std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3869   auto *HostEntriesEnd =
3870       new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3871                                llvm::GlobalValue::ExternalLinkage,
3872                                /*Initializer=*/nullptr, EntriesEndName);
3873 
3874   // Create all device images
3875   auto *DeviceImageTy = cast<llvm::StructType>(
3876       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3877   ConstantInitBuilder DeviceImagesBuilder(CGM);
3878   ConstantArrayBuilder DeviceImagesEntries =
3879       DeviceImagesBuilder.beginArray(DeviceImageTy);
3880 
3881   for (const llvm::Triple &Device : Devices) {
3882     StringRef T = Device.getTriple();
3883     std::string BeginName = getName({"omp_offloading", "img_start", ""});
3884     auto *ImgBegin = new llvm::GlobalVariable(
3885         M, CGM.Int8Ty, /*isConstant=*/true,
3886         llvm::GlobalValue::ExternalWeakLinkage,
3887         /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3888     std::string EndName = getName({"omp_offloading", "img_end", ""});
3889     auto *ImgEnd = new llvm::GlobalVariable(
3890         M, CGM.Int8Ty, /*isConstant=*/true,
3891         llvm::GlobalValue::ExternalWeakLinkage,
3892         /*Initializer=*/nullptr, Twine(EndName).concat(T));
3893 
3894     llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3895                               HostEntriesEnd};
3896     createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
3897                                              DeviceImagesEntries);
3898   }
3899 
3900   // Create device images global array.
3901   std::string ImagesName = getName({"omp_offloading", "device_images"});
3902   llvm::GlobalVariable *DeviceImages =
3903       DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3904                                                 CGM.getPointerAlign(),
3905                                                 /*isConstant=*/true);
3906   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3907 
3908   // This is a Zero array to be used in the creation of the constant expressions
3909   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3910                              llvm::Constant::getNullValue(CGM.Int32Ty)};
3911 
3912   // Create the target region descriptor.
3913   llvm::Constant *Data[] = {
3914       llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3915       llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3916                                            DeviceImages, Index),
3917       HostEntriesBegin, HostEntriesEnd};
3918   std::string Descriptor = getName({"omp_offloading", "descriptor"});
3919   llvm::GlobalVariable *Desc = createGlobalStruct(
3920       CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3921 
3922   // Emit code to register or unregister the descriptor at execution
3923   // startup or closing, respectively.
3924 
3925   llvm::Function *UnRegFn;
3926   {
3927     FunctionArgList Args;
3928     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3929     Args.push_back(&DummyPtr);
3930 
3931     CodeGenFunction CGF(CGM);
3932     // Disable debug info for global (de-)initializer because they are not part
3933     // of some particular construct.
3934     CGF.disableDebugInfo();
3935     const auto &FI =
3936         CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3937     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3938     std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3939     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
3940     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
3941     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
3942                         Desc);
3943     CGF.FinishFunction();
3944   }
3945   llvm::Function *RegFn;
3946   {
3947     CodeGenFunction CGF(CGM);
3948     // Disable debug info for global (de-)initializer because they are not part
3949     // of some particular construct.
3950     CGF.disableDebugInfo();
3951     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
3952     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3953 
3954     // Encode offload target triples into the registration function name. It
3955     // will serve as a comdat key for the registration/unregistration code for
3956     // this particular combination of offloading targets.
3957     SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
3958     RegFnNameParts[0] = "omp_offloading";
3959     RegFnNameParts[1] = "descriptor_reg";
3960     llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
3961                     [](const llvm::Triple &T) -> const std::string& {
3962                       return T.getTriple();
3963                     });
3964     llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
3965     std::string Descriptor = getName(RegFnNameParts);
3966     RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
3967     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
3968     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
3969     // Create a variable to drive the registration and unregistration of the
3970     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3971     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
3972                                   SourceLocation(), nullptr, C.CharTy,
3973                                   ImplicitParamDecl::Other);
3974     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3975     CGF.FinishFunction();
3976   }
3977   if (CGM.supportsCOMDAT()) {
3978     // It is sufficient to call registration function only once, so create a
3979     // COMDAT group for registration/unregistration functions and associated
3980     // data. That would reduce startup time and code size. Registration
3981     // function serves as a COMDAT group key.
3982     llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
3983     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3984     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3985     RegFn->setComdat(ComdatKey);
3986     UnRegFn->setComdat(ComdatKey);
3987     DeviceImages->setComdat(ComdatKey);
3988     Desc->setComdat(ComdatKey);
3989   }
3990   return RegFn;
3991 }
3992 
3993 void CGOpenMPRuntime::createOffloadEntry(
3994     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3995     llvm::GlobalValue::LinkageTypes Linkage) {
3996   StringRef Name = Addr->getName();
3997   llvm::Module &M = CGM.getModule();
3998   llvm::LLVMContext &C = M.getContext();
3999 
4000   // Create constant string with the name.
4001   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4002 
4003   std::string StringName = getName({"omp_offloading", "entry_name"});
4004   auto *Str = new llvm::GlobalVariable(
4005       M, StrPtrInit->getType(), /*isConstant=*/true,
4006       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4007   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4008 
4009   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4010                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4011                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4012                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4013                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4014   std::string EntryName = getName({"omp_offloading", "entry", ""});
4015   llvm::GlobalVariable *Entry = createGlobalStruct(
4016       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4017       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4018 
4019   // The entry has to be created in the section the linker expects it to be.
4020   std::string Section = getName({"omp_offloading", "entries"});
4021   Entry->setSection(Section);
4022 }
4023 
4024 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4025   // Emit the offloading entries and metadata so that the device codegen side
4026   // can easily figure out what to emit. The produced metadata looks like
4027   // this:
4028   //
4029   // !omp_offload.info = !{!1, ...}
4030   //
4031   // Right now we only generate metadata for function that contain target
4032   // regions.
4033 
4034   // If we do not have entries, we don't need to do anything.
4035   if (OffloadEntriesInfoManager.empty())
4036     return;
4037 
4038   llvm::Module &M = CGM.getModule();
4039   llvm::LLVMContext &C = M.getContext();
4040   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
4041       OrderedEntries(OffloadEntriesInfoManager.size());
4042   llvm::SmallVector<StringRef, 16> ParentFunctions(
4043       OffloadEntriesInfoManager.size());
4044 
4045   // Auxiliary methods to create metadata values and strings.
4046   auto &&GetMDInt = [this](unsigned V) {
4047     return llvm::ConstantAsMetadata::get(
4048         llvm::ConstantInt::get(CGM.Int32Ty, V));
4049   };
4050 
4051   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4052 
4053   // Create the offloading info metadata node.
4054   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4055 
4056   // Create function that emits metadata for each target region entry;
4057   auto &&TargetRegionMetadataEmitter =
4058       [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4059           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4060           unsigned Line,
4061           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4062         // Generate metadata for target regions. Each entry of this metadata
4063         // contains:
4064         // - Entry 0 -> Kind of this type of metadata (0).
4065         // - Entry 1 -> Device ID of the file where the entry was identified.
4066         // - Entry 2 -> File ID of the file where the entry was identified.
4067         // - Entry 3 -> Mangled name of the function where the entry was
4068         // identified.
4069         // - Entry 4 -> Line in the file where the entry was identified.
4070         // - Entry 5 -> Order the entry was created.
4071         // The first element of the metadata node is the kind.
4072         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4073                                  GetMDInt(FileID),      GetMDString(ParentName),
4074                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4075 
4076         // Save this entry in the right position of the ordered entries array.
4077         OrderedEntries[E.getOrder()] = &E;
4078         ParentFunctions[E.getOrder()] = ParentName;
4079 
4080         // Add metadata to the named metadata node.
4081         MD->addOperand(llvm::MDNode::get(C, Ops));
4082       };
4083 
4084   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4085       TargetRegionMetadataEmitter);
4086 
4087   // Create function that emits metadata for each device global variable entry;
4088   auto &&DeviceGlobalVarMetadataEmitter =
4089       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4090        MD](StringRef MangledName,
4091            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4092                &E) {
4093         // Generate metadata for global variables. Each entry of this metadata
4094         // contains:
4095         // - Entry 0 -> Kind of this type of metadata (1).
4096         // - Entry 1 -> Mangled name of the variable.
4097         // - Entry 2 -> Declare target kind.
4098         // - Entry 3 -> Order the entry was created.
4099         // The first element of the metadata node is the kind.
4100         llvm::Metadata *Ops[] = {
4101             GetMDInt(E.getKind()), GetMDString(MangledName),
4102             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4103 
4104         // Save this entry in the right position of the ordered entries array.
4105         OrderedEntries[E.getOrder()] = &E;
4106 
4107         // Add metadata to the named metadata node.
4108         MD->addOperand(llvm::MDNode::get(C, Ops));
4109       };
4110 
4111   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4112       DeviceGlobalVarMetadataEmitter);
4113 
4114   for (const auto *E : OrderedEntries) {
4115     assert(E && "All ordered entries must exist!");
4116     if (const auto *CE =
4117             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4118                 E)) {
4119       if (!CE->getID() || !CE->getAddress()) {
4120         // Do not blame the entry if the parent funtion is not emitted.
4121         StringRef FnName = ParentFunctions[CE->getOrder()];
4122         if (!CGM.GetGlobalValue(FnName))
4123           continue;
4124         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4125             DiagnosticsEngine::Error,
4126             "Offloading entry for target region is incorrect: either the "
4127             "address or the ID is invalid.");
4128         CGM.getDiags().Report(DiagID);
4129         continue;
4130       }
4131       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4132                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4133     } else if (const auto *CE =
4134                    dyn_cast<OffloadEntriesInfoManagerTy::
4135                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
4136       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4137           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4138               CE->getFlags());
4139       switch (Flags) {
4140       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4141         if (!CE->getAddress()) {
4142           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4143               DiagnosticsEngine::Error,
4144               "Offloading entry for declare target variable is incorrect: the "
4145               "address is invalid.");
4146           CGM.getDiags().Report(DiagID);
4147           continue;
4148         }
4149         // The vaiable has no definition - no need to add the entry.
4150         if (CE->getVarSize().isZero())
4151           continue;
4152         break;
4153       }
4154       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4155         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4156                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4157                "Declaret target link address is set.");
4158         if (CGM.getLangOpts().OpenMPIsDevice)
4159           continue;
4160         if (!CE->getAddress()) {
4161           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4162               DiagnosticsEngine::Error,
4163               "Offloading entry for declare target variable is incorrect: the "
4164               "address is invalid.");
4165           CGM.getDiags().Report(DiagID);
4166           continue;
4167         }
4168         break;
4169       }
4170       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4171                          CE->getVarSize().getQuantity(), Flags,
4172                          CE->getLinkage());
4173     } else {
4174       llvm_unreachable("Unsupported entry kind.");
4175     }
4176   }
4177 }
4178 
4179 /// Loads all the offload entries information from the host IR
4180 /// metadata.
4181 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4182   // If we are in target mode, load the metadata from the host IR. This code has
4183   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4184 
4185   if (!CGM.getLangOpts().OpenMPIsDevice)
4186     return;
4187 
4188   if (CGM.getLangOpts().OMPHostIRFile.empty())
4189     return;
4190 
4191   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4192   if (auto EC = Buf.getError()) {
4193     CGM.getDiags().Report(diag::err_cannot_open_file)
4194         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4195     return;
4196   }
4197 
4198   llvm::LLVMContext C;
4199   auto ME = expectedToErrorOrAndEmitErrors(
4200       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4201 
4202   if (auto EC = ME.getError()) {
4203     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4204         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4205     CGM.getDiags().Report(DiagID)
4206         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4207     return;
4208   }
4209 
4210   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4211   if (!MD)
4212     return;
4213 
4214   for (llvm::MDNode *MN : MD->operands()) {
4215     auto &&GetMDInt = [MN](unsigned Idx) {
4216       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4217       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4218     };
4219 
4220     auto &&GetMDString = [MN](unsigned Idx) {
4221       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4222       return V->getString();
4223     };
4224 
4225     switch (GetMDInt(0)) {
4226     default:
4227       llvm_unreachable("Unexpected metadata!");
4228       break;
4229     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4230         OffloadingEntryInfoTargetRegion:
4231       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4232           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4233           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4234           /*Order=*/GetMDInt(5));
4235       break;
4236     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4237         OffloadingEntryInfoDeviceGlobalVar:
4238       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4239           /*MangledName=*/GetMDString(1),
4240           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4241               /*Flags=*/GetMDInt(2)),
4242           /*Order=*/GetMDInt(3));
4243       break;
4244     }
4245   }
4246 }
4247 
4248 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4249   if (!KmpRoutineEntryPtrTy) {
4250     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4251     ASTContext &C = CGM.getContext();
4252     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4253     FunctionProtoType::ExtProtoInfo EPI;
4254     KmpRoutineEntryPtrQTy = C.getPointerType(
4255         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4256     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4257   }
4258 }
4259 
4260 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4261   // Make sure the type of the entry is already created. This is the type we
4262   // have to create:
4263   // struct __tgt_offload_entry{
4264   //   void      *addr;       // Pointer to the offload entry info.
4265   //                          // (function or global)
4266   //   char      *name;       // Name of the function or global.
4267   //   size_t     size;       // Size of the entry info (0 if it a function).
4268   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4269   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4270   // };
4271   if (TgtOffloadEntryQTy.isNull()) {
4272     ASTContext &C = CGM.getContext();
4273     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4274     RD->startDefinition();
4275     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4276     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4277     addFieldToRecordDecl(C, RD, C.getSizeType());
4278     addFieldToRecordDecl(
4279         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4280     addFieldToRecordDecl(
4281         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4282     RD->completeDefinition();
4283     RD->addAttr(PackedAttr::CreateImplicit(C));
4284     TgtOffloadEntryQTy = C.getRecordType(RD);
4285   }
4286   return TgtOffloadEntryQTy;
4287 }
4288 
4289 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4290   // These are the types we need to build:
4291   // struct __tgt_device_image{
4292   // void   *ImageStart;       // Pointer to the target code start.
4293   // void   *ImageEnd;         // Pointer to the target code end.
4294   // // We also add the host entries to the device image, as it may be useful
4295   // // for the target runtime to have access to that information.
4296   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4297   //                                       // the entries.
4298   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4299   //                                       // entries (non inclusive).
4300   // };
4301   if (TgtDeviceImageQTy.isNull()) {
4302     ASTContext &C = CGM.getContext();
4303     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4304     RD->startDefinition();
4305     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4306     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4307     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4308     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4309     RD->completeDefinition();
4310     TgtDeviceImageQTy = C.getRecordType(RD);
4311   }
4312   return TgtDeviceImageQTy;
4313 }
4314 
4315 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4316   // struct __tgt_bin_desc{
4317   //   int32_t              NumDevices;      // Number of devices supported.
4318   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4319   //                                         // (one per device).
4320   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4321   //                                         // entries.
4322   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4323   //                                         // entries (non inclusive).
4324   // };
4325   if (TgtBinaryDescriptorQTy.isNull()) {
4326     ASTContext &C = CGM.getContext();
4327     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4328     RD->startDefinition();
4329     addFieldToRecordDecl(
4330         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4331     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4332     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4333     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4334     RD->completeDefinition();
4335     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4336   }
4337   return TgtBinaryDescriptorQTy;
4338 }
4339 
4340 namespace {
4341 struct PrivateHelpersTy {
4342   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4343                    const VarDecl *PrivateElemInit)
4344       : Original(Original), PrivateCopy(PrivateCopy),
4345         PrivateElemInit(PrivateElemInit) {}
4346   const VarDecl *Original;
4347   const VarDecl *PrivateCopy;
4348   const VarDecl *PrivateElemInit;
4349 };
4350 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4351 } // anonymous namespace
4352 
4353 static RecordDecl *
4354 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4355   if (!Privates.empty()) {
4356     ASTContext &C = CGM.getContext();
4357     // Build struct .kmp_privates_t. {
4358     //         /*  private vars  */
4359     //       };
4360     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4361     RD->startDefinition();
4362     for (const auto &Pair : Privates) {
4363       const VarDecl *VD = Pair.second.Original;
4364       QualType Type = VD->getType().getNonReferenceType();
4365       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4366       if (VD->hasAttrs()) {
4367         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4368              E(VD->getAttrs().end());
4369              I != E; ++I)
4370           FD->addAttr(*I);
4371       }
4372     }
4373     RD->completeDefinition();
4374     return RD;
4375   }
4376   return nullptr;
4377 }
4378 
4379 static RecordDecl *
4380 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4381                          QualType KmpInt32Ty,
4382                          QualType KmpRoutineEntryPointerQTy) {
4383   ASTContext &C = CGM.getContext();
4384   // Build struct kmp_task_t {
4385   //         void *              shareds;
4386   //         kmp_routine_entry_t routine;
4387   //         kmp_int32           part_id;
4388   //         kmp_cmplrdata_t data1;
4389   //         kmp_cmplrdata_t data2;
4390   // For taskloops additional fields:
4391   //         kmp_uint64          lb;
4392   //         kmp_uint64          ub;
4393   //         kmp_int64           st;
4394   //         kmp_int32           liter;
4395   //         void *              reductions;
4396   //       };
4397   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4398   UD->startDefinition();
4399   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4400   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4401   UD->completeDefinition();
4402   QualType KmpCmplrdataTy = C.getRecordType(UD);
4403   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4404   RD->startDefinition();
4405   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4406   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4407   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4408   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4409   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4410   if (isOpenMPTaskLoopDirective(Kind)) {
4411     QualType KmpUInt64Ty =
4412         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4413     QualType KmpInt64Ty =
4414         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4415     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4416     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4417     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4418     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4419     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4420   }
4421   RD->completeDefinition();
4422   return RD;
4423 }
4424 
4425 static RecordDecl *
4426 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4427                                      ArrayRef<PrivateDataTy> Privates) {
4428   ASTContext &C = CGM.getContext();
4429   // Build struct kmp_task_t_with_privates {
4430   //         kmp_task_t task_data;
4431   //         .kmp_privates_t. privates;
4432   //       };
4433   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4434   RD->startDefinition();
4435   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4436   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4437     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4438   RD->completeDefinition();
4439   return RD;
4440 }
4441 
4442 /// Emit a proxy function which accepts kmp_task_t as the second
4443 /// argument.
4444 /// \code
4445 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4446 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4447 ///   For taskloops:
4448 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4449 ///   tt->reductions, tt->shareds);
4450 ///   return 0;
4451 /// }
4452 /// \endcode
4453 static llvm::Function *
4454 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4455                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4456                       QualType KmpTaskTWithPrivatesPtrQTy,
4457                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4458                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4459                       llvm::Value *TaskPrivatesMap) {
4460   ASTContext &C = CGM.getContext();
4461   FunctionArgList Args;
4462   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4463                             ImplicitParamDecl::Other);
4464   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4465                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4466                                 ImplicitParamDecl::Other);
4467   Args.push_back(&GtidArg);
4468   Args.push_back(&TaskTypeArg);
4469   const auto &TaskEntryFnInfo =
4470       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4471   llvm::FunctionType *TaskEntryTy =
4472       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4473   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4474   auto *TaskEntry = llvm::Function::Create(
4475       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4476   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4477   TaskEntry->setDoesNotRecurse();
4478   CodeGenFunction CGF(CGM);
4479   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4480                     Loc, Loc);
4481 
4482   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4483   // tt,
4484   // For taskloops:
4485   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4486   // tt->task_data.shareds);
4487   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4488       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4489   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4490       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4491       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4492   const auto *KmpTaskTWithPrivatesQTyRD =
4493       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4494   LValue Base =
4495       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4496   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4497   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4498   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4499   llvm::Value *PartidParam = PartIdLVal.getPointer();
4500 
4501   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4502   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4503   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4504       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4505       CGF.ConvertTypeForMem(SharedsPtrTy));
4506 
4507   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4508   llvm::Value *PrivatesParam;
4509   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4510     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4511     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4512         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4513   } else {
4514     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4515   }
4516 
4517   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4518                                TaskPrivatesMap,
4519                                CGF.Builder
4520                                    .CreatePointerBitCastOrAddrSpaceCast(
4521                                        TDBase.getAddress(), CGF.VoidPtrTy)
4522                                    .getPointer()};
4523   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4524                                           std::end(CommonArgs));
4525   if (isOpenMPTaskLoopDirective(Kind)) {
4526     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4527     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4528     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4529     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4530     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4531     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4532     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4533     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4534     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4535     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4536     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4537     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4538     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4539     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4540     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4541     CallArgs.push_back(LBParam);
4542     CallArgs.push_back(UBParam);
4543     CallArgs.push_back(StParam);
4544     CallArgs.push_back(LIParam);
4545     CallArgs.push_back(RParam);
4546   }
4547   CallArgs.push_back(SharedsParam);
4548 
4549   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4550                                                   CallArgs);
4551   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4552                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4553   CGF.FinishFunction();
4554   return TaskEntry;
4555 }
4556 
4557 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4558                                             SourceLocation Loc,
4559                                             QualType KmpInt32Ty,
4560                                             QualType KmpTaskTWithPrivatesPtrQTy,
4561                                             QualType KmpTaskTWithPrivatesQTy) {
4562   ASTContext &C = CGM.getContext();
4563   FunctionArgList Args;
4564   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4565                             ImplicitParamDecl::Other);
4566   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4567                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4568                                 ImplicitParamDecl::Other);
4569   Args.push_back(&GtidArg);
4570   Args.push_back(&TaskTypeArg);
4571   const auto &DestructorFnInfo =
4572       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4573   llvm::FunctionType *DestructorFnTy =
4574       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4575   std::string Name =
4576       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4577   auto *DestructorFn =
4578       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4579                              Name, &CGM.getModule());
4580   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4581                                     DestructorFnInfo);
4582   DestructorFn->setDoesNotRecurse();
4583   CodeGenFunction CGF(CGM);
4584   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4585                     Args, Loc, Loc);
4586 
4587   LValue Base = CGF.EmitLoadOfPointerLValue(
4588       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4589       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4590   const auto *KmpTaskTWithPrivatesQTyRD =
4591       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4592   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4593   Base = CGF.EmitLValueForField(Base, *FI);
4594   for (const auto *Field :
4595        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4596     if (QualType::DestructionKind DtorKind =
4597             Field->getType().isDestructedType()) {
4598       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4599       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4600     }
4601   }
4602   CGF.FinishFunction();
4603   return DestructorFn;
4604 }
4605 
4606 /// Emit a privates mapping function for correct handling of private and
4607 /// firstprivate variables.
4608 /// \code
4609 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4610 /// **noalias priv1,...,  <tyn> **noalias privn) {
4611 ///   *priv1 = &.privates.priv1;
4612 ///   ...;
4613 ///   *privn = &.privates.privn;
4614 /// }
4615 /// \endcode
4616 static llvm::Value *
4617 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4618                                ArrayRef<const Expr *> PrivateVars,
4619                                ArrayRef<const Expr *> FirstprivateVars,
4620                                ArrayRef<const Expr *> LastprivateVars,
4621                                QualType PrivatesQTy,
4622                                ArrayRef<PrivateDataTy> Privates) {
4623   ASTContext &C = CGM.getContext();
4624   FunctionArgList Args;
4625   ImplicitParamDecl TaskPrivatesArg(
4626       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4627       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4628       ImplicitParamDecl::Other);
4629   Args.push_back(&TaskPrivatesArg);
4630   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4631   unsigned Counter = 1;
4632   for (const Expr *E : PrivateVars) {
4633     Args.push_back(ImplicitParamDecl::Create(
4634         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4635         C.getPointerType(C.getPointerType(E->getType()))
4636             .withConst()
4637             .withRestrict(),
4638         ImplicitParamDecl::Other));
4639     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4640     PrivateVarsPos[VD] = Counter;
4641     ++Counter;
4642   }
4643   for (const Expr *E : FirstprivateVars) {
4644     Args.push_back(ImplicitParamDecl::Create(
4645         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4646         C.getPointerType(C.getPointerType(E->getType()))
4647             .withConst()
4648             .withRestrict(),
4649         ImplicitParamDecl::Other));
4650     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4651     PrivateVarsPos[VD] = Counter;
4652     ++Counter;
4653   }
4654   for (const Expr *E : LastprivateVars) {
4655     Args.push_back(ImplicitParamDecl::Create(
4656         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4657         C.getPointerType(C.getPointerType(E->getType()))
4658             .withConst()
4659             .withRestrict(),
4660         ImplicitParamDecl::Other));
4661     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4662     PrivateVarsPos[VD] = Counter;
4663     ++Counter;
4664   }
4665   const auto &TaskPrivatesMapFnInfo =
4666       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4667   llvm::FunctionType *TaskPrivatesMapTy =
4668       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4669   std::string Name =
4670       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4671   auto *TaskPrivatesMap = llvm::Function::Create(
4672       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4673       &CGM.getModule());
4674   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4675                                     TaskPrivatesMapFnInfo);
4676   if (CGM.getLangOpts().Optimize) {
4677     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4678     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4679     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4680   }
4681   CodeGenFunction CGF(CGM);
4682   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4683                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4684 
4685   // *privi = &.privates.privi;
4686   LValue Base = CGF.EmitLoadOfPointerLValue(
4687       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4688       TaskPrivatesArg.getType()->castAs<PointerType>());
4689   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4690   Counter = 0;
4691   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4692     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4693     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4694     LValue RefLVal =
4695         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4696     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4697         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4698     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4699     ++Counter;
4700   }
4701   CGF.FinishFunction();
4702   return TaskPrivatesMap;
4703 }
4704 
4705 /// Emit initialization for private variables in task-based directives.
4706 static void emitPrivatesInit(CodeGenFunction &CGF,
4707                              const OMPExecutableDirective &D,
4708                              Address KmpTaskSharedsPtr, LValue TDBase,
4709                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4710                              QualType SharedsTy, QualType SharedsPtrTy,
4711                              const OMPTaskDataTy &Data,
4712                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4713   ASTContext &C = CGF.getContext();
4714   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4715   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4716   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4717                                  ? OMPD_taskloop
4718                                  : OMPD_task;
4719   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4720   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4721   LValue SrcBase;
4722   bool IsTargetTask =
4723       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4724       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4725   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4726   // PointersArray and SizesArray. The original variables for these arrays are
4727   // not captured and we get their addresses explicitly.
4728   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4729       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4730     SrcBase = CGF.MakeAddrLValue(
4731         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4732             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4733         SharedsTy);
4734   }
4735   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4736   for (const PrivateDataTy &Pair : Privates) {
4737     const VarDecl *VD = Pair.second.PrivateCopy;
4738     const Expr *Init = VD->getAnyInitializer();
4739     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4740                              !CGF.isTrivialInitializer(Init)))) {
4741       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4742       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4743         const VarDecl *OriginalVD = Pair.second.Original;
4744         // Check if the variable is the target-based BasePointersArray,
4745         // PointersArray or SizesArray.
4746         LValue SharedRefLValue;
4747         QualType Type = PrivateLValue.getType();
4748         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4749         if (IsTargetTask && !SharedField) {
4750           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4751                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4752                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4753                          ->getNumParams() == 0 &&
4754                  isa<TranslationUnitDecl>(
4755                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4756                          ->getDeclContext()) &&
4757                  "Expected artificial target data variable.");
4758           SharedRefLValue =
4759               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4760         } else {
4761           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4762           SharedRefLValue = CGF.MakeAddrLValue(
4763               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4764               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4765               SharedRefLValue.getTBAAInfo());
4766         }
4767         if (Type->isArrayType()) {
4768           // Initialize firstprivate array.
4769           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4770             // Perform simple memcpy.
4771             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4772           } else {
4773             // Initialize firstprivate array using element-by-element
4774             // initialization.
4775             CGF.EmitOMPAggregateAssign(
4776                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4777                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4778                                                   Address SrcElement) {
4779                   // Clean up any temporaries needed by the initialization.
4780                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4781                   InitScope.addPrivate(
4782                       Elem, [SrcElement]() -> Address { return SrcElement; });
4783                   (void)InitScope.Privatize();
4784                   // Emit initialization for single element.
4785                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4786                       CGF, &CapturesInfo);
4787                   CGF.EmitAnyExprToMem(Init, DestElement,
4788                                        Init->getType().getQualifiers(),
4789                                        /*IsInitializer=*/false);
4790                 });
4791           }
4792         } else {
4793           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4794           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4795             return SharedRefLValue.getAddress();
4796           });
4797           (void)InitScope.Privatize();
4798           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4799           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4800                              /*capturedByInit=*/false);
4801         }
4802       } else {
4803         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4804       }
4805     }
4806     ++FI;
4807   }
4808 }
4809 
4810 /// Check if duplication function is required for taskloops.
4811 static bool checkInitIsRequired(CodeGenFunction &CGF,
4812                                 ArrayRef<PrivateDataTy> Privates) {
4813   bool InitRequired = false;
4814   for (const PrivateDataTy &Pair : Privates) {
4815     const VarDecl *VD = Pair.second.PrivateCopy;
4816     const Expr *Init = VD->getAnyInitializer();
4817     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4818                                     !CGF.isTrivialInitializer(Init));
4819     if (InitRequired)
4820       break;
4821   }
4822   return InitRequired;
4823 }
4824 
4825 
4826 /// Emit task_dup function (for initialization of
4827 /// private/firstprivate/lastprivate vars and last_iter flag)
4828 /// \code
4829 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4830 /// lastpriv) {
4831 /// // setup lastprivate flag
4832 ///    task_dst->last = lastpriv;
4833 /// // could be constructor calls here...
4834 /// }
4835 /// \endcode
4836 static llvm::Value *
4837 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4838                     const OMPExecutableDirective &D,
4839                     QualType KmpTaskTWithPrivatesPtrQTy,
4840                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4841                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4842                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4843                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4844   ASTContext &C = CGM.getContext();
4845   FunctionArgList Args;
4846   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4847                            KmpTaskTWithPrivatesPtrQTy,
4848                            ImplicitParamDecl::Other);
4849   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4850                            KmpTaskTWithPrivatesPtrQTy,
4851                            ImplicitParamDecl::Other);
4852   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4853                                 ImplicitParamDecl::Other);
4854   Args.push_back(&DstArg);
4855   Args.push_back(&SrcArg);
4856   Args.push_back(&LastprivArg);
4857   const auto &TaskDupFnInfo =
4858       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4859   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4860   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4861   auto *TaskDup = llvm::Function::Create(
4862       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4863   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4864   TaskDup->setDoesNotRecurse();
4865   CodeGenFunction CGF(CGM);
4866   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4867                     Loc);
4868 
4869   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4870       CGF.GetAddrOfLocalVar(&DstArg),
4871       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4872   // task_dst->liter = lastpriv;
4873   if (WithLastIter) {
4874     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4875     LValue Base = CGF.EmitLValueForField(
4876         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4877     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4878     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4879         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4880     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4881   }
4882 
4883   // Emit initial values for private copies (if any).
4884   assert(!Privates.empty());
4885   Address KmpTaskSharedsPtr = Address::invalid();
4886   if (!Data.FirstprivateVars.empty()) {
4887     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4888         CGF.GetAddrOfLocalVar(&SrcArg),
4889         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4890     LValue Base = CGF.EmitLValueForField(
4891         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4892     KmpTaskSharedsPtr = Address(
4893         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4894                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4895                                                   KmpTaskTShareds)),
4896                              Loc),
4897         CGF.getNaturalTypeAlignment(SharedsTy));
4898   }
4899   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4900                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4901   CGF.FinishFunction();
4902   return TaskDup;
4903 }
4904 
4905 /// Checks if destructor function is required to be generated.
4906 /// \return true if cleanups are required, false otherwise.
4907 static bool
4908 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4909   bool NeedsCleanup = false;
4910   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4911   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4912   for (const FieldDecl *FD : PrivateRD->fields()) {
4913     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4914     if (NeedsCleanup)
4915       break;
4916   }
4917   return NeedsCleanup;
4918 }
4919 
4920 CGOpenMPRuntime::TaskResultTy
4921 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4922                               const OMPExecutableDirective &D,
4923                               llvm::Function *TaskFunction, QualType SharedsTy,
4924                               Address Shareds, const OMPTaskDataTy &Data) {
4925   ASTContext &C = CGM.getContext();
4926   llvm::SmallVector<PrivateDataTy, 4> Privates;
4927   // Aggregate privates and sort them by the alignment.
4928   auto I = Data.PrivateCopies.begin();
4929   for (const Expr *E : Data.PrivateVars) {
4930     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4931     Privates.emplace_back(
4932         C.getDeclAlign(VD),
4933         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4934                          /*PrivateElemInit=*/nullptr));
4935     ++I;
4936   }
4937   I = Data.FirstprivateCopies.begin();
4938   auto IElemInitRef = Data.FirstprivateInits.begin();
4939   for (const Expr *E : Data.FirstprivateVars) {
4940     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4941     Privates.emplace_back(
4942         C.getDeclAlign(VD),
4943         PrivateHelpersTy(
4944             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4945             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4946     ++I;
4947     ++IElemInitRef;
4948   }
4949   I = Data.LastprivateCopies.begin();
4950   for (const Expr *E : Data.LastprivateVars) {
4951     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4952     Privates.emplace_back(
4953         C.getDeclAlign(VD),
4954         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4955                          /*PrivateElemInit=*/nullptr));
4956     ++I;
4957   }
4958   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4959     return L.first > R.first;
4960   });
4961   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4962   // Build type kmp_routine_entry_t (if not built yet).
4963   emitKmpRoutineEntryT(KmpInt32Ty);
4964   // Build type kmp_task_t (if not built yet).
4965   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4966     if (SavedKmpTaskloopTQTy.isNull()) {
4967       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4968           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4969     }
4970     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4971   } else {
4972     assert((D.getDirectiveKind() == OMPD_task ||
4973             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4974             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4975            "Expected taskloop, task or target directive");
4976     if (SavedKmpTaskTQTy.isNull()) {
4977       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4978           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4979     }
4980     KmpTaskTQTy = SavedKmpTaskTQTy;
4981   }
4982   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4983   // Build particular struct kmp_task_t for the given task.
4984   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4985       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4986   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4987   QualType KmpTaskTWithPrivatesPtrQTy =
4988       C.getPointerType(KmpTaskTWithPrivatesQTy);
4989   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4990   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4991       KmpTaskTWithPrivatesTy->getPointerTo();
4992   llvm::Value *KmpTaskTWithPrivatesTySize =
4993       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4994   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4995 
4996   // Emit initial values for private copies (if any).
4997   llvm::Value *TaskPrivatesMap = nullptr;
4998   llvm::Type *TaskPrivatesMapTy =
4999       std::next(TaskFunction->arg_begin(), 3)->getType();
5000   if (!Privates.empty()) {
5001     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5002     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5003         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5004         FI->getType(), Privates);
5005     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5006         TaskPrivatesMap, TaskPrivatesMapTy);
5007   } else {
5008     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5009         cast<llvm::PointerType>(TaskPrivatesMapTy));
5010   }
5011   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5012   // kmp_task_t *tt);
5013   llvm::Function *TaskEntry = emitProxyTaskFunction(
5014       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5015       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5016       TaskPrivatesMap);
5017 
5018   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5019   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5020   // kmp_routine_entry_t *task_entry);
5021   // Task flags. Format is taken from
5022   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5023   // description of kmp_tasking_flags struct.
5024   enum {
5025     TiedFlag = 0x1,
5026     FinalFlag = 0x2,
5027     DestructorsFlag = 0x8,
5028     PriorityFlag = 0x20
5029   };
5030   unsigned Flags = Data.Tied ? TiedFlag : 0;
5031   bool NeedsCleanup = false;
5032   if (!Privates.empty()) {
5033     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5034     if (NeedsCleanup)
5035       Flags = Flags | DestructorsFlag;
5036   }
5037   if (Data.Priority.getInt())
5038     Flags = Flags | PriorityFlag;
5039   llvm::Value *TaskFlags =
5040       Data.Final.getPointer()
5041           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5042                                      CGF.Builder.getInt32(FinalFlag),
5043                                      CGF.Builder.getInt32(/*C=*/0))
5044           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5045   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5046   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5047   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
5048                               getThreadID(CGF, Loc), TaskFlags,
5049                               KmpTaskTWithPrivatesTySize, SharedsSize,
5050                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5051                                   TaskEntry, KmpRoutineEntryPtrTy)};
5052   llvm::Value *NewTask = CGF.EmitRuntimeCall(
5053       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5054   llvm::Value *NewTaskNewTaskTTy =
5055       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5056           NewTask, KmpTaskTWithPrivatesPtrTy);
5057   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5058                                                KmpTaskTWithPrivatesQTy);
5059   LValue TDBase =
5060       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5061   // Fill the data in the resulting kmp_task_t record.
5062   // Copy shareds if there are any.
5063   Address KmpTaskSharedsPtr = Address::invalid();
5064   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5065     KmpTaskSharedsPtr =
5066         Address(CGF.EmitLoadOfScalar(
5067                     CGF.EmitLValueForField(
5068                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5069                                            KmpTaskTShareds)),
5070                     Loc),
5071                 CGF.getNaturalTypeAlignment(SharedsTy));
5072     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5073     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5074     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5075   }
5076   // Emit initial values for private copies (if any).
5077   TaskResultTy Result;
5078   if (!Privates.empty()) {
5079     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5080                      SharedsTy, SharedsPtrTy, Data, Privates,
5081                      /*ForDup=*/false);
5082     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5083         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5084       Result.TaskDupFn = emitTaskDupFunction(
5085           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5086           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5087           /*WithLastIter=*/!Data.LastprivateVars.empty());
5088     }
5089   }
5090   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5091   enum { Priority = 0, Destructors = 1 };
5092   // Provide pointer to function with destructors for privates.
5093   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5094   const RecordDecl *KmpCmplrdataUD =
5095       (*FI)->getType()->getAsUnionType()->getDecl();
5096   if (NeedsCleanup) {
5097     llvm::Value *DestructorFn = emitDestructorsFunction(
5098         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5099         KmpTaskTWithPrivatesQTy);
5100     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5101     LValue DestructorsLV = CGF.EmitLValueForField(
5102         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5103     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5104                               DestructorFn, KmpRoutineEntryPtrTy),
5105                           DestructorsLV);
5106   }
5107   // Set priority.
5108   if (Data.Priority.getInt()) {
5109     LValue Data2LV = CGF.EmitLValueForField(
5110         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5111     LValue PriorityLV = CGF.EmitLValueForField(
5112         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5113     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5114   }
5115   Result.NewTask = NewTask;
5116   Result.TaskEntry = TaskEntry;
5117   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5118   Result.TDBase = TDBase;
5119   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5120   return Result;
5121 }
5122 
5123 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5124                                    const OMPExecutableDirective &D,
5125                                    llvm::Function *TaskFunction,
5126                                    QualType SharedsTy, Address Shareds,
5127                                    const Expr *IfCond,
5128                                    const OMPTaskDataTy &Data) {
5129   if (!CGF.HaveInsertPoint())
5130     return;
5131 
5132   TaskResultTy Result =
5133       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5134   llvm::Value *NewTask = Result.NewTask;
5135   llvm::Function *TaskEntry = Result.TaskEntry;
5136   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5137   LValue TDBase = Result.TDBase;
5138   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5139   ASTContext &C = CGM.getContext();
5140   // Process list of dependences.
5141   Address DependenciesArray = Address::invalid();
5142   unsigned NumDependencies = Data.Dependences.size();
5143   if (NumDependencies) {
5144     // Dependence kind for RTL.
5145     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5146     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5147     RecordDecl *KmpDependInfoRD;
5148     QualType FlagsTy =
5149         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5150     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5151     if (KmpDependInfoTy.isNull()) {
5152       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5153       KmpDependInfoRD->startDefinition();
5154       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5155       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5156       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5157       KmpDependInfoRD->completeDefinition();
5158       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5159     } else {
5160       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5161     }
5162     // Define type kmp_depend_info[<Dependences.size()>];
5163     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5164         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5165         ArrayType::Normal, /*IndexTypeQuals=*/0);
5166     // kmp_depend_info[<Dependences.size()>] deps;
5167     DependenciesArray =
5168         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5169     for (unsigned I = 0; I < NumDependencies; ++I) {
5170       const Expr *E = Data.Dependences[I].second;
5171       LValue Addr = CGF.EmitLValue(E);
5172       llvm::Value *Size;
5173       QualType Ty = E->getType();
5174       if (const auto *ASE =
5175               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5176         LValue UpAddrLVal =
5177             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
5178         llvm::Value *UpAddr =
5179             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5180         llvm::Value *LowIntPtr =
5181             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5182         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5183         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5184       } else {
5185         Size = CGF.getTypeSize(Ty);
5186       }
5187       LValue Base = CGF.MakeAddrLValue(
5188           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5189           KmpDependInfoTy);
5190       // deps[i].base_addr = &<Dependences[i].second>;
5191       LValue BaseAddrLVal = CGF.EmitLValueForField(
5192           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5193       CGF.EmitStoreOfScalar(
5194           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5195           BaseAddrLVal);
5196       // deps[i].len = sizeof(<Dependences[i].second>);
5197       LValue LenLVal = CGF.EmitLValueForField(
5198           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5199       CGF.EmitStoreOfScalar(Size, LenLVal);
5200       // deps[i].flags = <Dependences[i].first>;
5201       RTLDependenceKindTy DepKind;
5202       switch (Data.Dependences[I].first) {
5203       case OMPC_DEPEND_in:
5204         DepKind = DepIn;
5205         break;
5206       // Out and InOut dependencies must use the same code.
5207       case OMPC_DEPEND_out:
5208       case OMPC_DEPEND_inout:
5209         DepKind = DepInOut;
5210         break;
5211       case OMPC_DEPEND_mutexinoutset:
5212         DepKind = DepMutexInOutSet;
5213         break;
5214       case OMPC_DEPEND_source:
5215       case OMPC_DEPEND_sink:
5216       case OMPC_DEPEND_unknown:
5217         llvm_unreachable("Unknown task dependence type");
5218       }
5219       LValue FlagsLVal = CGF.EmitLValueForField(
5220           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5221       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5222                             FlagsLVal);
5223     }
5224     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5225         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5226   }
5227 
5228   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5229   // libcall.
5230   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5231   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5232   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5233   // list is not empty
5234   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5235   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5236   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5237   llvm::Value *DepTaskArgs[7];
5238   if (NumDependencies) {
5239     DepTaskArgs[0] = UpLoc;
5240     DepTaskArgs[1] = ThreadID;
5241     DepTaskArgs[2] = NewTask;
5242     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5243     DepTaskArgs[4] = DependenciesArray.getPointer();
5244     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5245     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5246   }
5247   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5248                         &TaskArgs,
5249                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5250     if (!Data.Tied) {
5251       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5252       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5253       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5254     }
5255     if (NumDependencies) {
5256       CGF.EmitRuntimeCall(
5257           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5258     } else {
5259       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5260                           TaskArgs);
5261     }
5262     // Check if parent region is untied and build return for untied task;
5263     if (auto *Region =
5264             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5265       Region->emitUntiedSwitch(CGF);
5266   };
5267 
5268   llvm::Value *DepWaitTaskArgs[6];
5269   if (NumDependencies) {
5270     DepWaitTaskArgs[0] = UpLoc;
5271     DepWaitTaskArgs[1] = ThreadID;
5272     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5273     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5274     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5275     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5276   }
5277   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5278                         NumDependencies, &DepWaitTaskArgs,
5279                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5280     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5281     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5282     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5283     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5284     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5285     // is specified.
5286     if (NumDependencies)
5287       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5288                           DepWaitTaskArgs);
5289     // Call proxy_task_entry(gtid, new_task);
5290     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5291                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5292       Action.Enter(CGF);
5293       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5294       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5295                                                           OutlinedFnArgs);
5296     };
5297 
5298     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5299     // kmp_task_t *new_task);
5300     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5301     // kmp_task_t *new_task);
5302     RegionCodeGenTy RCG(CodeGen);
5303     CommonActionTy Action(
5304         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5305         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5306     RCG.setAction(Action);
5307     RCG(CGF);
5308   };
5309 
5310   if (IfCond) {
5311     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5312   } else {
5313     RegionCodeGenTy ThenRCG(ThenCodeGen);
5314     ThenRCG(CGF);
5315   }
5316 }
5317 
5318 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5319                                        const OMPLoopDirective &D,
5320                                        llvm::Function *TaskFunction,
5321                                        QualType SharedsTy, Address Shareds,
5322                                        const Expr *IfCond,
5323                                        const OMPTaskDataTy &Data) {
5324   if (!CGF.HaveInsertPoint())
5325     return;
5326   TaskResultTy Result =
5327       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5328   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5329   // libcall.
5330   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5331   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5332   // sched, kmp_uint64 grainsize, void *task_dup);
5333   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5334   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5335   llvm::Value *IfVal;
5336   if (IfCond) {
5337     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5338                                       /*isSigned=*/true);
5339   } else {
5340     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5341   }
5342 
5343   LValue LBLVal = CGF.EmitLValueForField(
5344       Result.TDBase,
5345       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5346   const auto *LBVar =
5347       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5348   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5349                        /*IsInitializer=*/true);
5350   LValue UBLVal = CGF.EmitLValueForField(
5351       Result.TDBase,
5352       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5353   const auto *UBVar =
5354       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5355   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5356                        /*IsInitializer=*/true);
5357   LValue StLVal = CGF.EmitLValueForField(
5358       Result.TDBase,
5359       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5360   const auto *StVar =
5361       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5362   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5363                        /*IsInitializer=*/true);
5364   // Store reductions address.
5365   LValue RedLVal = CGF.EmitLValueForField(
5366       Result.TDBase,
5367       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5368   if (Data.Reductions) {
5369     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5370   } else {
5371     CGF.EmitNullInitialization(RedLVal.getAddress(),
5372                                CGF.getContext().VoidPtrTy);
5373   }
5374   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5375   llvm::Value *TaskArgs[] = {
5376       UpLoc,
5377       ThreadID,
5378       Result.NewTask,
5379       IfVal,
5380       LBLVal.getPointer(),
5381       UBLVal.getPointer(),
5382       CGF.EmitLoadOfScalar(StLVal, Loc),
5383       llvm::ConstantInt::getSigned(
5384               CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5385       llvm::ConstantInt::getSigned(
5386           CGF.IntTy, Data.Schedule.getPointer()
5387                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5388                          : NoSchedule),
5389       Data.Schedule.getPointer()
5390           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5391                                       /*isSigned=*/false)
5392           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5393       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5394                              Result.TaskDupFn, CGF.VoidPtrTy)
5395                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5396   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5397 }
5398 
5399 /// Emit reduction operation for each element of array (required for
5400 /// array sections) LHS op = RHS.
5401 /// \param Type Type of array.
5402 /// \param LHSVar Variable on the left side of the reduction operation
5403 /// (references element of array in original variable).
5404 /// \param RHSVar Variable on the right side of the reduction operation
5405 /// (references element of array in original variable).
5406 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5407 /// RHSVar.
5408 static void EmitOMPAggregateReduction(
5409     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5410     const VarDecl *RHSVar,
5411     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5412                                   const Expr *, const Expr *)> &RedOpGen,
5413     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5414     const Expr *UpExpr = nullptr) {
5415   // Perform element-by-element initialization.
5416   QualType ElementTy;
5417   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5418   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5419 
5420   // Drill down to the base element type on both arrays.
5421   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5422   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5423 
5424   llvm::Value *RHSBegin = RHSAddr.getPointer();
5425   llvm::Value *LHSBegin = LHSAddr.getPointer();
5426   // Cast from pointer to array type to pointer to single element.
5427   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5428   // The basic structure here is a while-do loop.
5429   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5430   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5431   llvm::Value *IsEmpty =
5432       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5433   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5434 
5435   // Enter the loop body, making that address the current address.
5436   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5437   CGF.EmitBlock(BodyBB);
5438 
5439   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5440 
5441   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5442       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5443   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5444   Address RHSElementCurrent =
5445       Address(RHSElementPHI,
5446               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5447 
5448   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5449       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5450   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5451   Address LHSElementCurrent =
5452       Address(LHSElementPHI,
5453               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5454 
5455   // Emit copy.
5456   CodeGenFunction::OMPPrivateScope Scope(CGF);
5457   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5458   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5459   Scope.Privatize();
5460   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5461   Scope.ForceCleanup();
5462 
5463   // Shift the address forward by one element.
5464   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5465       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5466   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5467       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5468   // Check whether we've reached the end.
5469   llvm::Value *Done =
5470       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5471   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5472   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5473   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5474 
5475   // Done.
5476   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5477 }
5478 
5479 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5480 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5481 /// UDR combiner function.
5482 static void emitReductionCombiner(CodeGenFunction &CGF,
5483                                   const Expr *ReductionOp) {
5484   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5485     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5486       if (const auto *DRE =
5487               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5488         if (const auto *DRD =
5489                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5490           std::pair<llvm::Function *, llvm::Function *> Reduction =
5491               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5492           RValue Func = RValue::get(Reduction.first);
5493           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5494           CGF.EmitIgnoredExpr(ReductionOp);
5495           return;
5496         }
5497   CGF.EmitIgnoredExpr(ReductionOp);
5498 }
5499 
5500 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5501     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5502     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5503     ArrayRef<const Expr *> ReductionOps) {
5504   ASTContext &C = CGM.getContext();
5505 
5506   // void reduction_func(void *LHSArg, void *RHSArg);
5507   FunctionArgList Args;
5508   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5509                            ImplicitParamDecl::Other);
5510   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5511                            ImplicitParamDecl::Other);
5512   Args.push_back(&LHSArg);
5513   Args.push_back(&RHSArg);
5514   const auto &CGFI =
5515       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5516   std::string Name = getName({"omp", "reduction", "reduction_func"});
5517   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5518                                     llvm::GlobalValue::InternalLinkage, Name,
5519                                     &CGM.getModule());
5520   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5521   Fn->setDoesNotRecurse();
5522   CodeGenFunction CGF(CGM);
5523   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5524 
5525   // Dst = (void*[n])(LHSArg);
5526   // Src = (void*[n])(RHSArg);
5527   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5528       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5529       ArgsType), CGF.getPointerAlign());
5530   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5531       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5532       ArgsType), CGF.getPointerAlign());
5533 
5534   //  ...
5535   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5536   //  ...
5537   CodeGenFunction::OMPPrivateScope Scope(CGF);
5538   auto IPriv = Privates.begin();
5539   unsigned Idx = 0;
5540   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5541     const auto *RHSVar =
5542         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5543     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5544       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5545     });
5546     const auto *LHSVar =
5547         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5548     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5549       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5550     });
5551     QualType PrivTy = (*IPriv)->getType();
5552     if (PrivTy->isVariablyModifiedType()) {
5553       // Get array size and emit VLA type.
5554       ++Idx;
5555       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5556       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5557       const VariableArrayType *VLA =
5558           CGF.getContext().getAsVariableArrayType(PrivTy);
5559       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5560       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5561           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5562       CGF.EmitVariablyModifiedType(PrivTy);
5563     }
5564   }
5565   Scope.Privatize();
5566   IPriv = Privates.begin();
5567   auto ILHS = LHSExprs.begin();
5568   auto IRHS = RHSExprs.begin();
5569   for (const Expr *E : ReductionOps) {
5570     if ((*IPriv)->getType()->isArrayType()) {
5571       // Emit reduction for array section.
5572       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5573       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5574       EmitOMPAggregateReduction(
5575           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5576           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5577             emitReductionCombiner(CGF, E);
5578           });
5579     } else {
5580       // Emit reduction for array subscript or single variable.
5581       emitReductionCombiner(CGF, E);
5582     }
5583     ++IPriv;
5584     ++ILHS;
5585     ++IRHS;
5586   }
5587   Scope.ForceCleanup();
5588   CGF.FinishFunction();
5589   return Fn;
5590 }
5591 
5592 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5593                                                   const Expr *ReductionOp,
5594                                                   const Expr *PrivateRef,
5595                                                   const DeclRefExpr *LHS,
5596                                                   const DeclRefExpr *RHS) {
5597   if (PrivateRef->getType()->isArrayType()) {
5598     // Emit reduction for array section.
5599     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5600     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5601     EmitOMPAggregateReduction(
5602         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5603         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5604           emitReductionCombiner(CGF, ReductionOp);
5605         });
5606   } else {
5607     // Emit reduction for array subscript or single variable.
5608     emitReductionCombiner(CGF, ReductionOp);
5609   }
5610 }
5611 
5612 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5613                                     ArrayRef<const Expr *> Privates,
5614                                     ArrayRef<const Expr *> LHSExprs,
5615                                     ArrayRef<const Expr *> RHSExprs,
5616                                     ArrayRef<const Expr *> ReductionOps,
5617                                     ReductionOptionsTy Options) {
5618   if (!CGF.HaveInsertPoint())
5619     return;
5620 
5621   bool WithNowait = Options.WithNowait;
5622   bool SimpleReduction = Options.SimpleReduction;
5623 
5624   // Next code should be emitted for reduction:
5625   //
5626   // static kmp_critical_name lock = { 0 };
5627   //
5628   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5629   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5630   //  ...
5631   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5632   //  *(Type<n>-1*)rhs[<n>-1]);
5633   // }
5634   //
5635   // ...
5636   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5637   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5638   // RedList, reduce_func, &<lock>)) {
5639   // case 1:
5640   //  ...
5641   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5642   //  ...
5643   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5644   // break;
5645   // case 2:
5646   //  ...
5647   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5648   //  ...
5649   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5650   // break;
5651   // default:;
5652   // }
5653   //
5654   // if SimpleReduction is true, only the next code is generated:
5655   //  ...
5656   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5657   //  ...
5658 
5659   ASTContext &C = CGM.getContext();
5660 
5661   if (SimpleReduction) {
5662     CodeGenFunction::RunCleanupsScope Scope(CGF);
5663     auto IPriv = Privates.begin();
5664     auto ILHS = LHSExprs.begin();
5665     auto IRHS = RHSExprs.begin();
5666     for (const Expr *E : ReductionOps) {
5667       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5668                                   cast<DeclRefExpr>(*IRHS));
5669       ++IPriv;
5670       ++ILHS;
5671       ++IRHS;
5672     }
5673     return;
5674   }
5675 
5676   // 1. Build a list of reduction variables.
5677   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5678   auto Size = RHSExprs.size();
5679   for (const Expr *E : Privates) {
5680     if (E->getType()->isVariablyModifiedType())
5681       // Reserve place for array size.
5682       ++Size;
5683   }
5684   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5685   QualType ReductionArrayTy =
5686       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5687                              /*IndexTypeQuals=*/0);
5688   Address ReductionList =
5689       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5690   auto IPriv = Privates.begin();
5691   unsigned Idx = 0;
5692   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5693     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5694     CGF.Builder.CreateStore(
5695         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5696             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5697         Elem);
5698     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5699       // Store array size.
5700       ++Idx;
5701       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5702       llvm::Value *Size = CGF.Builder.CreateIntCast(
5703           CGF.getVLASize(
5704                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5705               .NumElts,
5706           CGF.SizeTy, /*isSigned=*/false);
5707       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5708                               Elem);
5709     }
5710   }
5711 
5712   // 2. Emit reduce_func().
5713   llvm::Function *ReductionFn = emitReductionFunction(
5714       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5715       LHSExprs, RHSExprs, ReductionOps);
5716 
5717   // 3. Create static kmp_critical_name lock = { 0 };
5718   std::string Name = getName({"reduction"});
5719   llvm::Value *Lock = getCriticalRegionLock(Name);
5720 
5721   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5722   // RedList, reduce_func, &<lock>);
5723   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5724   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5725   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5726   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5727       ReductionList.getPointer(), CGF.VoidPtrTy);
5728   llvm::Value *Args[] = {
5729       IdentTLoc,                             // ident_t *<loc>
5730       ThreadId,                              // i32 <gtid>
5731       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5732       ReductionArrayTySize,                  // size_type sizeof(RedList)
5733       RL,                                    // void *RedList
5734       ReductionFn, // void (*) (void *, void *) <reduce_func>
5735       Lock         // kmp_critical_name *&<lock>
5736   };
5737   llvm::Value *Res = CGF.EmitRuntimeCall(
5738       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5739                                        : OMPRTL__kmpc_reduce),
5740       Args);
5741 
5742   // 5. Build switch(res)
5743   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5744   llvm::SwitchInst *SwInst =
5745       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5746 
5747   // 6. Build case 1:
5748   //  ...
5749   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5750   //  ...
5751   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5752   // break;
5753   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5754   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5755   CGF.EmitBlock(Case1BB);
5756 
5757   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5758   llvm::Value *EndArgs[] = {
5759       IdentTLoc, // ident_t *<loc>
5760       ThreadId,  // i32 <gtid>
5761       Lock       // kmp_critical_name *&<lock>
5762   };
5763   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5764                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5765     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5766     auto IPriv = Privates.begin();
5767     auto ILHS = LHSExprs.begin();
5768     auto IRHS = RHSExprs.begin();
5769     for (const Expr *E : ReductionOps) {
5770       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5771                                      cast<DeclRefExpr>(*IRHS));
5772       ++IPriv;
5773       ++ILHS;
5774       ++IRHS;
5775     }
5776   };
5777   RegionCodeGenTy RCG(CodeGen);
5778   CommonActionTy Action(
5779       nullptr, llvm::None,
5780       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5781                                        : OMPRTL__kmpc_end_reduce),
5782       EndArgs);
5783   RCG.setAction(Action);
5784   RCG(CGF);
5785 
5786   CGF.EmitBranch(DefaultBB);
5787 
5788   // 7. Build case 2:
5789   //  ...
5790   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5791   //  ...
5792   // break;
5793   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5794   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5795   CGF.EmitBlock(Case2BB);
5796 
5797   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5798                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5799     auto ILHS = LHSExprs.begin();
5800     auto IRHS = RHSExprs.begin();
5801     auto IPriv = Privates.begin();
5802     for (const Expr *E : ReductionOps) {
5803       const Expr *XExpr = nullptr;
5804       const Expr *EExpr = nullptr;
5805       const Expr *UpExpr = nullptr;
5806       BinaryOperatorKind BO = BO_Comma;
5807       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5808         if (BO->getOpcode() == BO_Assign) {
5809           XExpr = BO->getLHS();
5810           UpExpr = BO->getRHS();
5811         }
5812       }
5813       // Try to emit update expression as a simple atomic.
5814       const Expr *RHSExpr = UpExpr;
5815       if (RHSExpr) {
5816         // Analyze RHS part of the whole expression.
5817         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5818                 RHSExpr->IgnoreParenImpCasts())) {
5819           // If this is a conditional operator, analyze its condition for
5820           // min/max reduction operator.
5821           RHSExpr = ACO->getCond();
5822         }
5823         if (const auto *BORHS =
5824                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5825           EExpr = BORHS->getRHS();
5826           BO = BORHS->getOpcode();
5827         }
5828       }
5829       if (XExpr) {
5830         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5831         auto &&AtomicRedGen = [BO, VD,
5832                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5833                                     const Expr *EExpr, const Expr *UpExpr) {
5834           LValue X = CGF.EmitLValue(XExpr);
5835           RValue E;
5836           if (EExpr)
5837             E = CGF.EmitAnyExpr(EExpr);
5838           CGF.EmitOMPAtomicSimpleUpdateExpr(
5839               X, E, BO, /*IsXLHSInRHSPart=*/true,
5840               llvm::AtomicOrdering::Monotonic, Loc,
5841               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5842                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5843                 PrivateScope.addPrivate(
5844                     VD, [&CGF, VD, XRValue, Loc]() {
5845                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5846                       CGF.emitOMPSimpleStore(
5847                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5848                           VD->getType().getNonReferenceType(), Loc);
5849                       return LHSTemp;
5850                     });
5851                 (void)PrivateScope.Privatize();
5852                 return CGF.EmitAnyExpr(UpExpr);
5853               });
5854         };
5855         if ((*IPriv)->getType()->isArrayType()) {
5856           // Emit atomic reduction for array section.
5857           const auto *RHSVar =
5858               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5859           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5860                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5861         } else {
5862           // Emit atomic reduction for array subscript or single variable.
5863           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5864         }
5865       } else {
5866         // Emit as a critical region.
5867         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5868                                            const Expr *, const Expr *) {
5869           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5870           std::string Name = RT.getName({"atomic_reduction"});
5871           RT.emitCriticalRegion(
5872               CGF, Name,
5873               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5874                 Action.Enter(CGF);
5875                 emitReductionCombiner(CGF, E);
5876               },
5877               Loc);
5878         };
5879         if ((*IPriv)->getType()->isArrayType()) {
5880           const auto *LHSVar =
5881               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5882           const auto *RHSVar =
5883               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5884           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5885                                     CritRedGen);
5886         } else {
5887           CritRedGen(CGF, nullptr, nullptr, nullptr);
5888         }
5889       }
5890       ++ILHS;
5891       ++IRHS;
5892       ++IPriv;
5893     }
5894   };
5895   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5896   if (!WithNowait) {
5897     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5898     llvm::Value *EndArgs[] = {
5899         IdentTLoc, // ident_t *<loc>
5900         ThreadId,  // i32 <gtid>
5901         Lock       // kmp_critical_name *&<lock>
5902     };
5903     CommonActionTy Action(nullptr, llvm::None,
5904                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5905                           EndArgs);
5906     AtomicRCG.setAction(Action);
5907     AtomicRCG(CGF);
5908   } else {
5909     AtomicRCG(CGF);
5910   }
5911 
5912   CGF.EmitBranch(DefaultBB);
5913   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5914 }
5915 
5916 /// Generates unique name for artificial threadprivate variables.
5917 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5918 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5919                                       const Expr *Ref) {
5920   SmallString<256> Buffer;
5921   llvm::raw_svector_ostream Out(Buffer);
5922   const clang::DeclRefExpr *DE;
5923   const VarDecl *D = ::getBaseDecl(Ref, DE);
5924   if (!D)
5925     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5926   D = D->getCanonicalDecl();
5927   std::string Name = CGM.getOpenMPRuntime().getName(
5928       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5929   Out << Prefix << Name << "_"
5930       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5931   return Out.str();
5932 }
5933 
5934 /// Emits reduction initializer function:
5935 /// \code
5936 /// void @.red_init(void* %arg) {
5937 /// %0 = bitcast void* %arg to <type>*
5938 /// store <type> <init>, <type>* %0
5939 /// ret void
5940 /// }
5941 /// \endcode
5942 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5943                                            SourceLocation Loc,
5944                                            ReductionCodeGen &RCG, unsigned N) {
5945   ASTContext &C = CGM.getContext();
5946   FunctionArgList Args;
5947   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5948                           ImplicitParamDecl::Other);
5949   Args.emplace_back(&Param);
5950   const auto &FnInfo =
5951       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5952   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5953   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5954   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5955                                     Name, &CGM.getModule());
5956   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5957   Fn->setDoesNotRecurse();
5958   CodeGenFunction CGF(CGM);
5959   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5960   Address PrivateAddr = CGF.EmitLoadOfPointer(
5961       CGF.GetAddrOfLocalVar(&Param),
5962       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5963   llvm::Value *Size = nullptr;
5964   // If the size of the reduction item is non-constant, load it from global
5965   // threadprivate variable.
5966   if (RCG.getSizes(N).second) {
5967     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5968         CGF, CGM.getContext().getSizeType(),
5969         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5970     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5971                                 CGM.getContext().getSizeType(), Loc);
5972   }
5973   RCG.emitAggregateType(CGF, N, Size);
5974   LValue SharedLVal;
5975   // If initializer uses initializer from declare reduction construct, emit a
5976   // pointer to the address of the original reduction item (reuired by reduction
5977   // initializer)
5978   if (RCG.usesReductionInitializer(N)) {
5979     Address SharedAddr =
5980         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5981             CGF, CGM.getContext().VoidPtrTy,
5982             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
5983     SharedAddr = CGF.EmitLoadOfPointer(
5984         SharedAddr,
5985         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5986     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5987   } else {
5988     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
5989         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5990         CGM.getContext().VoidPtrTy);
5991   }
5992   // Emit the initializer:
5993   // %0 = bitcast void* %arg to <type>*
5994   // store <type> <init>, <type>* %0
5995   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
5996                          [](CodeGenFunction &) { return false; });
5997   CGF.FinishFunction();
5998   return Fn;
5999 }
6000 
6001 /// Emits reduction combiner function:
6002 /// \code
6003 /// void @.red_comb(void* %arg0, void* %arg1) {
6004 /// %lhs = bitcast void* %arg0 to <type>*
6005 /// %rhs = bitcast void* %arg1 to <type>*
6006 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6007 /// store <type> %2, <type>* %lhs
6008 /// ret void
6009 /// }
6010 /// \endcode
6011 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6012                                            SourceLocation Loc,
6013                                            ReductionCodeGen &RCG, unsigned N,
6014                                            const Expr *ReductionOp,
6015                                            const Expr *LHS, const Expr *RHS,
6016                                            const Expr *PrivateRef) {
6017   ASTContext &C = CGM.getContext();
6018   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6019   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6020   FunctionArgList Args;
6021   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6022                                C.VoidPtrTy, ImplicitParamDecl::Other);
6023   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6024                             ImplicitParamDecl::Other);
6025   Args.emplace_back(&ParamInOut);
6026   Args.emplace_back(&ParamIn);
6027   const auto &FnInfo =
6028       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6029   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6030   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6031   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6032                                     Name, &CGM.getModule());
6033   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6034   Fn->setDoesNotRecurse();
6035   CodeGenFunction CGF(CGM);
6036   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6037   llvm::Value *Size = nullptr;
6038   // If the size of the reduction item is non-constant, load it from global
6039   // threadprivate variable.
6040   if (RCG.getSizes(N).second) {
6041     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6042         CGF, CGM.getContext().getSizeType(),
6043         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6044     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6045                                 CGM.getContext().getSizeType(), Loc);
6046   }
6047   RCG.emitAggregateType(CGF, N, Size);
6048   // Remap lhs and rhs variables to the addresses of the function arguments.
6049   // %lhs = bitcast void* %arg0 to <type>*
6050   // %rhs = bitcast void* %arg1 to <type>*
6051   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6052   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6053     // Pull out the pointer to the variable.
6054     Address PtrAddr = CGF.EmitLoadOfPointer(
6055         CGF.GetAddrOfLocalVar(&ParamInOut),
6056         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6057     return CGF.Builder.CreateElementBitCast(
6058         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6059   });
6060   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6061     // Pull out the pointer to the variable.
6062     Address PtrAddr = CGF.EmitLoadOfPointer(
6063         CGF.GetAddrOfLocalVar(&ParamIn),
6064         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6065     return CGF.Builder.CreateElementBitCast(
6066         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6067   });
6068   PrivateScope.Privatize();
6069   // Emit the combiner body:
6070   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6071   // store <type> %2, <type>* %lhs
6072   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6073       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6074       cast<DeclRefExpr>(RHS));
6075   CGF.FinishFunction();
6076   return Fn;
6077 }
6078 
6079 /// Emits reduction finalizer function:
6080 /// \code
6081 /// void @.red_fini(void* %arg) {
6082 /// %0 = bitcast void* %arg to <type>*
6083 /// <destroy>(<type>* %0)
6084 /// ret void
6085 /// }
6086 /// \endcode
6087 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6088                                            SourceLocation Loc,
6089                                            ReductionCodeGen &RCG, unsigned N) {
6090   if (!RCG.needCleanups(N))
6091     return nullptr;
6092   ASTContext &C = CGM.getContext();
6093   FunctionArgList Args;
6094   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6095                           ImplicitParamDecl::Other);
6096   Args.emplace_back(&Param);
6097   const auto &FnInfo =
6098       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6099   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6100   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6101   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6102                                     Name, &CGM.getModule());
6103   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6104   Fn->setDoesNotRecurse();
6105   CodeGenFunction CGF(CGM);
6106   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6107   Address PrivateAddr = CGF.EmitLoadOfPointer(
6108       CGF.GetAddrOfLocalVar(&Param),
6109       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6110   llvm::Value *Size = nullptr;
6111   // If the size of the reduction item is non-constant, load it from global
6112   // threadprivate variable.
6113   if (RCG.getSizes(N).second) {
6114     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6115         CGF, CGM.getContext().getSizeType(),
6116         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6117     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6118                                 CGM.getContext().getSizeType(), Loc);
6119   }
6120   RCG.emitAggregateType(CGF, N, Size);
6121   // Emit the finalizer body:
6122   // <destroy>(<type>* %0)
6123   RCG.emitCleanups(CGF, N, PrivateAddr);
6124   CGF.FinishFunction();
6125   return Fn;
6126 }
6127 
6128 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6129     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6130     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6131   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6132     return nullptr;
6133 
6134   // Build typedef struct:
6135   // kmp_task_red_input {
6136   //   void *reduce_shar; // shared reduction item
6137   //   size_t reduce_size; // size of data item
6138   //   void *reduce_init; // data initialization routine
6139   //   void *reduce_fini; // data finalization routine
6140   //   void *reduce_comb; // data combiner routine
6141   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6142   // } kmp_task_red_input_t;
6143   ASTContext &C = CGM.getContext();
6144   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6145   RD->startDefinition();
6146   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6147   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6148   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6149   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6150   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6151   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6152       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6153   RD->completeDefinition();
6154   QualType RDType = C.getRecordType(RD);
6155   unsigned Size = Data.ReductionVars.size();
6156   llvm::APInt ArraySize(/*numBits=*/64, Size);
6157   QualType ArrayRDType = C.getConstantArrayType(
6158       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6159   // kmp_task_red_input_t .rd_input.[Size];
6160   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6161   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6162                        Data.ReductionOps);
6163   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6164     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6165     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6166                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6167     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6168         TaskRedInput.getPointer(), Idxs,
6169         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6170         ".rd_input.gep.");
6171     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6172     // ElemLVal.reduce_shar = &Shareds[Cnt];
6173     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6174     RCG.emitSharedLValue(CGF, Cnt);
6175     llvm::Value *CastedShared =
6176         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6177     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6178     RCG.emitAggregateType(CGF, Cnt);
6179     llvm::Value *SizeValInChars;
6180     llvm::Value *SizeVal;
6181     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6182     // We use delayed creation/initialization for VLAs, array sections and
6183     // custom reduction initializations. It is required because runtime does not
6184     // provide the way to pass the sizes of VLAs/array sections to
6185     // initializer/combiner/finalizer functions and does not pass the pointer to
6186     // original reduction item to the initializer. Instead threadprivate global
6187     // variables are used to store these values and use them in the functions.
6188     bool DelayedCreation = !!SizeVal;
6189     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6190                                                /*isSigned=*/false);
6191     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6192     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6193     // ElemLVal.reduce_init = init;
6194     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6195     llvm::Value *InitAddr =
6196         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6197     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6198     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6199     // ElemLVal.reduce_fini = fini;
6200     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6201     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6202     llvm::Value *FiniAddr = Fini
6203                                 ? CGF.EmitCastToVoidPtr(Fini)
6204                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6205     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6206     // ElemLVal.reduce_comb = comb;
6207     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6208     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6209         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6210         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6211     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6212     // ElemLVal.flags = 0;
6213     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6214     if (DelayedCreation) {
6215       CGF.EmitStoreOfScalar(
6216           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
6217           FlagsLVal);
6218     } else
6219       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6220   }
6221   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6222   // *data);
6223   llvm::Value *Args[] = {
6224       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6225                                 /*isSigned=*/true),
6226       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6227       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6228                                                       CGM.VoidPtrTy)};
6229   return CGF.EmitRuntimeCall(
6230       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6231 }
6232 
6233 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6234                                               SourceLocation Loc,
6235                                               ReductionCodeGen &RCG,
6236                                               unsigned N) {
6237   auto Sizes = RCG.getSizes(N);
6238   // Emit threadprivate global variable if the type is non-constant
6239   // (Sizes.second = nullptr).
6240   if (Sizes.second) {
6241     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6242                                                      /*isSigned=*/false);
6243     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6244         CGF, CGM.getContext().getSizeType(),
6245         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6246     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6247   }
6248   // Store address of the original reduction item if custom initializer is used.
6249   if (RCG.usesReductionInitializer(N)) {
6250     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6251         CGF, CGM.getContext().VoidPtrTy,
6252         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6253     CGF.Builder.CreateStore(
6254         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6255             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6256         SharedAddr, /*IsVolatile=*/false);
6257   }
6258 }
6259 
6260 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6261                                               SourceLocation Loc,
6262                                               llvm::Value *ReductionsPtr,
6263                                               LValue SharedLVal) {
6264   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6265   // *d);
6266   llvm::Value *Args[] = {
6267       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6268                                 /*isSigned=*/true),
6269       ReductionsPtr,
6270       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6271                                                       CGM.VoidPtrTy)};
6272   return Address(
6273       CGF.EmitRuntimeCall(
6274           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6275       SharedLVal.getAlignment());
6276 }
6277 
6278 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6279                                        SourceLocation Loc) {
6280   if (!CGF.HaveInsertPoint())
6281     return;
6282   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6283   // global_tid);
6284   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6285   // Ignore return result until untied tasks are supported.
6286   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6287   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6288     Region->emitUntiedSwitch(CGF);
6289 }
6290 
6291 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6292                                            OpenMPDirectiveKind InnerKind,
6293                                            const RegionCodeGenTy &CodeGen,
6294                                            bool HasCancel) {
6295   if (!CGF.HaveInsertPoint())
6296     return;
6297   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6298   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6299 }
6300 
6301 namespace {
6302 enum RTCancelKind {
6303   CancelNoreq = 0,
6304   CancelParallel = 1,
6305   CancelLoop = 2,
6306   CancelSections = 3,
6307   CancelTaskgroup = 4
6308 };
6309 } // anonymous namespace
6310 
6311 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6312   RTCancelKind CancelKind = CancelNoreq;
6313   if (CancelRegion == OMPD_parallel)
6314     CancelKind = CancelParallel;
6315   else if (CancelRegion == OMPD_for)
6316     CancelKind = CancelLoop;
6317   else if (CancelRegion == OMPD_sections)
6318     CancelKind = CancelSections;
6319   else {
6320     assert(CancelRegion == OMPD_taskgroup);
6321     CancelKind = CancelTaskgroup;
6322   }
6323   return CancelKind;
6324 }
6325 
6326 void CGOpenMPRuntime::emitCancellationPointCall(
6327     CodeGenFunction &CGF, SourceLocation Loc,
6328     OpenMPDirectiveKind CancelRegion) {
6329   if (!CGF.HaveInsertPoint())
6330     return;
6331   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6332   // global_tid, kmp_int32 cncl_kind);
6333   if (auto *OMPRegionInfo =
6334           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6335     // For 'cancellation point taskgroup', the task region info may not have a
6336     // cancel. This may instead happen in another adjacent task.
6337     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6338       llvm::Value *Args[] = {
6339           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6340           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6341       // Ignore return result until untied tasks are supported.
6342       llvm::Value *Result = CGF.EmitRuntimeCall(
6343           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6344       // if (__kmpc_cancellationpoint()) {
6345       //   exit from construct;
6346       // }
6347       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6348       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6349       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6350       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6351       CGF.EmitBlock(ExitBB);
6352       // exit from construct;
6353       CodeGenFunction::JumpDest CancelDest =
6354           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6355       CGF.EmitBranchThroughCleanup(CancelDest);
6356       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6357     }
6358   }
6359 }
6360 
6361 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6362                                      const Expr *IfCond,
6363                                      OpenMPDirectiveKind CancelRegion) {
6364   if (!CGF.HaveInsertPoint())
6365     return;
6366   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6367   // kmp_int32 cncl_kind);
6368   if (auto *OMPRegionInfo =
6369           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6370     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6371                                                         PrePostActionTy &) {
6372       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6373       llvm::Value *Args[] = {
6374           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6375           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6376       // Ignore return result until untied tasks are supported.
6377       llvm::Value *Result = CGF.EmitRuntimeCall(
6378           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6379       // if (__kmpc_cancel()) {
6380       //   exit from construct;
6381       // }
6382       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6383       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6384       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6385       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6386       CGF.EmitBlock(ExitBB);
6387       // exit from construct;
6388       CodeGenFunction::JumpDest CancelDest =
6389           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6390       CGF.EmitBranchThroughCleanup(CancelDest);
6391       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6392     };
6393     if (IfCond) {
6394       emitOMPIfClause(CGF, IfCond, ThenGen,
6395                       [](CodeGenFunction &, PrePostActionTy &) {});
6396     } else {
6397       RegionCodeGenTy ThenRCG(ThenGen);
6398       ThenRCG(CGF);
6399     }
6400   }
6401 }
6402 
6403 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6404     const OMPExecutableDirective &D, StringRef ParentName,
6405     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6406     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6407   assert(!ParentName.empty() && "Invalid target region parent name!");
6408   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6409                                    IsOffloadEntry, CodeGen);
6410 }
6411 
6412 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6413     const OMPExecutableDirective &D, StringRef ParentName,
6414     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6415     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6416   // Create a unique name for the entry function using the source location
6417   // information of the current target region. The name will be something like:
6418   //
6419   // __omp_offloading_DD_FFFF_PP_lBB
6420   //
6421   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6422   // mangled name of the function that encloses the target region and BB is the
6423   // line number of the target region.
6424 
6425   unsigned DeviceID;
6426   unsigned FileID;
6427   unsigned Line;
6428   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6429                            Line);
6430   SmallString<64> EntryFnName;
6431   {
6432     llvm::raw_svector_ostream OS(EntryFnName);
6433     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6434        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6435   }
6436 
6437   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6438 
6439   CodeGenFunction CGF(CGM, true);
6440   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6441   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6442 
6443   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6444 
6445   // If this target outline function is not an offload entry, we don't need to
6446   // register it.
6447   if (!IsOffloadEntry)
6448     return;
6449 
6450   // The target region ID is used by the runtime library to identify the current
6451   // target region, so it only has to be unique and not necessarily point to
6452   // anything. It could be the pointer to the outlined function that implements
6453   // the target region, but we aren't using that so that the compiler doesn't
6454   // need to keep that, and could therefore inline the host function if proven
6455   // worthwhile during optimization. In the other hand, if emitting code for the
6456   // device, the ID has to be the function address so that it can retrieved from
6457   // the offloading entry and launched by the runtime library. We also mark the
6458   // outlined function to have external linkage in case we are emitting code for
6459   // the device, because these functions will be entry points to the device.
6460 
6461   if (CGM.getLangOpts().OpenMPIsDevice) {
6462     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6463     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6464     OutlinedFn->setDSOLocal(false);
6465   } else {
6466     std::string Name = getName({EntryFnName, "region_id"});
6467     OutlinedFnID = new llvm::GlobalVariable(
6468         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6469         llvm::GlobalValue::WeakAnyLinkage,
6470         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6471   }
6472 
6473   // Register the information for the entry associated with this target region.
6474   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6475       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6476       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6477 }
6478 
6479 /// Checks if the expression is constant or does not have non-trivial function
6480 /// calls.
6481 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6482   // We can skip constant expressions.
6483   // We can skip expressions with trivial calls or simple expressions.
6484   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6485           !E->hasNonTrivialCall(Ctx)) &&
6486          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6487 }
6488 
6489 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6490                                                     const Stmt *Body) {
6491   const Stmt *Child = Body->IgnoreContainers();
6492   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6493     Child = nullptr;
6494     for (const Stmt *S : C->body()) {
6495       if (const auto *E = dyn_cast<Expr>(S)) {
6496         if (isTrivial(Ctx, E))
6497           continue;
6498       }
6499       // Some of the statements can be ignored.
6500       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6501           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6502         continue;
6503       // Analyze declarations.
6504       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6505         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6506               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6507                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6508                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6509                   isa<UsingDirectiveDecl>(D) ||
6510                   isa<OMPDeclareReductionDecl>(D) ||
6511                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6512                 return true;
6513               const auto *VD = dyn_cast<VarDecl>(D);
6514               if (!VD)
6515                 return false;
6516               return VD->isConstexpr() ||
6517                      ((VD->getType().isTrivialType(Ctx) ||
6518                        VD->getType()->isReferenceType()) &&
6519                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6520             }))
6521           continue;
6522       }
6523       // Found multiple children - cannot get the one child only.
6524       if (Child)
6525         return nullptr;
6526       Child = S;
6527     }
6528     if (Child)
6529       Child = Child->IgnoreContainers();
6530   }
6531   return Child;
6532 }
6533 
6534 /// Emit the number of teams for a target directive.  Inspect the num_teams
6535 /// clause associated with a teams construct combined or closely nested
6536 /// with the target directive.
6537 ///
6538 /// Emit a team of size one for directives such as 'target parallel' that
6539 /// have no associated teams construct.
6540 ///
6541 /// Otherwise, return nullptr.
6542 static llvm::Value *
6543 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6544                                const OMPExecutableDirective &D) {
6545   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6546          "Clauses associated with the teams directive expected to be emitted "
6547          "only for the host!");
6548   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6549   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6550          "Expected target-based executable directive.");
6551   CGBuilderTy &Bld = CGF.Builder;
6552   switch (DirectiveKind) {
6553   case OMPD_target: {
6554     const auto *CS = D.getInnermostCapturedStmt();
6555     const auto *Body =
6556         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6557     const Stmt *ChildStmt =
6558         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6559     if (const auto *NestedDir =
6560             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6561       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6562         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6563           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6564           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6565           const Expr *NumTeams =
6566               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6567           llvm::Value *NumTeamsVal =
6568               CGF.EmitScalarExpr(NumTeams,
6569                                  /*IgnoreResultAssign*/ true);
6570           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6571                                    /*IsSigned=*/true);
6572         }
6573         return Bld.getInt32(0);
6574       }
6575       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6576           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6577         return Bld.getInt32(1);
6578       return Bld.getInt32(0);
6579     }
6580     return nullptr;
6581   }
6582   case OMPD_target_teams:
6583   case OMPD_target_teams_distribute:
6584   case OMPD_target_teams_distribute_simd:
6585   case OMPD_target_teams_distribute_parallel_for:
6586   case OMPD_target_teams_distribute_parallel_for_simd: {
6587     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6588       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6589       const Expr *NumTeams =
6590           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6591       llvm::Value *NumTeamsVal =
6592           CGF.EmitScalarExpr(NumTeams,
6593                              /*IgnoreResultAssign*/ true);
6594       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6595                                /*IsSigned=*/true);
6596     }
6597     return Bld.getInt32(0);
6598   }
6599   case OMPD_target_parallel:
6600   case OMPD_target_parallel_for:
6601   case OMPD_target_parallel_for_simd:
6602   case OMPD_target_simd:
6603     return Bld.getInt32(1);
6604   case OMPD_parallel:
6605   case OMPD_for:
6606   case OMPD_parallel_for:
6607   case OMPD_parallel_sections:
6608   case OMPD_for_simd:
6609   case OMPD_parallel_for_simd:
6610   case OMPD_cancel:
6611   case OMPD_cancellation_point:
6612   case OMPD_ordered:
6613   case OMPD_threadprivate:
6614   case OMPD_allocate:
6615   case OMPD_task:
6616   case OMPD_simd:
6617   case OMPD_sections:
6618   case OMPD_section:
6619   case OMPD_single:
6620   case OMPD_master:
6621   case OMPD_critical:
6622   case OMPD_taskyield:
6623   case OMPD_barrier:
6624   case OMPD_taskwait:
6625   case OMPD_taskgroup:
6626   case OMPD_atomic:
6627   case OMPD_flush:
6628   case OMPD_teams:
6629   case OMPD_target_data:
6630   case OMPD_target_exit_data:
6631   case OMPD_target_enter_data:
6632   case OMPD_distribute:
6633   case OMPD_distribute_simd:
6634   case OMPD_distribute_parallel_for:
6635   case OMPD_distribute_parallel_for_simd:
6636   case OMPD_teams_distribute:
6637   case OMPD_teams_distribute_simd:
6638   case OMPD_teams_distribute_parallel_for:
6639   case OMPD_teams_distribute_parallel_for_simd:
6640   case OMPD_target_update:
6641   case OMPD_declare_simd:
6642   case OMPD_declare_target:
6643   case OMPD_end_declare_target:
6644   case OMPD_declare_reduction:
6645   case OMPD_declare_mapper:
6646   case OMPD_taskloop:
6647   case OMPD_taskloop_simd:
6648   case OMPD_requires:
6649   case OMPD_unknown:
6650     break;
6651   }
6652   llvm_unreachable("Unexpected directive kind.");
6653 }
6654 
6655 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6656                                   llvm::Value *DefaultThreadLimitVal) {
6657   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6658       CGF.getContext(), CS->getCapturedStmt());
6659   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6660     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6661       llvm::Value *NumThreads = nullptr;
6662       llvm::Value *CondVal = nullptr;
6663       // Handle if clause. If if clause present, the number of threads is
6664       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6665       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6666         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6667         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6668         const OMPIfClause *IfClause = nullptr;
6669         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6670           if (C->getNameModifier() == OMPD_unknown ||
6671               C->getNameModifier() == OMPD_parallel) {
6672             IfClause = C;
6673             break;
6674           }
6675         }
6676         if (IfClause) {
6677           const Expr *Cond = IfClause->getCondition();
6678           bool Result;
6679           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6680             if (!Result)
6681               return CGF.Builder.getInt32(1);
6682           } else {
6683             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6684             if (const auto *PreInit =
6685                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6686               for (const auto *I : PreInit->decls()) {
6687                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6688                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6689                 } else {
6690                   CodeGenFunction::AutoVarEmission Emission =
6691                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6692                   CGF.EmitAutoVarCleanups(Emission);
6693                 }
6694               }
6695             }
6696             CondVal = CGF.EvaluateExprAsBool(Cond);
6697           }
6698         }
6699       }
6700       // Check the value of num_threads clause iff if clause was not specified
6701       // or is not evaluated to false.
6702       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6703         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6704         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6705         const auto *NumThreadsClause =
6706             Dir->getSingleClause<OMPNumThreadsClause>();
6707         CodeGenFunction::LexicalScope Scope(
6708             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6709         if (const auto *PreInit =
6710                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6711           for (const auto *I : PreInit->decls()) {
6712             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6713               CGF.EmitVarDecl(cast<VarDecl>(*I));
6714             } else {
6715               CodeGenFunction::AutoVarEmission Emission =
6716                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6717               CGF.EmitAutoVarCleanups(Emission);
6718             }
6719           }
6720         }
6721         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6722         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6723                                                /*IsSigned=*/false);
6724         if (DefaultThreadLimitVal)
6725           NumThreads = CGF.Builder.CreateSelect(
6726               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6727               DefaultThreadLimitVal, NumThreads);
6728       } else {
6729         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6730                                            : CGF.Builder.getInt32(0);
6731       }
6732       // Process condition of the if clause.
6733       if (CondVal) {
6734         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6735                                               CGF.Builder.getInt32(1));
6736       }
6737       return NumThreads;
6738     }
6739     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6740       return CGF.Builder.getInt32(1);
6741     return DefaultThreadLimitVal;
6742   }
6743   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6744                                : CGF.Builder.getInt32(0);
6745 }
6746 
6747 /// Emit the number of threads for a target directive.  Inspect the
6748 /// thread_limit clause associated with a teams construct combined or closely
6749 /// nested with the target directive.
6750 ///
6751 /// Emit the num_threads clause for directives such as 'target parallel' that
6752 /// have no associated teams construct.
6753 ///
6754 /// Otherwise, return nullptr.
6755 static llvm::Value *
6756 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6757                                  const OMPExecutableDirective &D) {
6758   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6759          "Clauses associated with the teams directive expected to be emitted "
6760          "only for the host!");
6761   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6762   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6763          "Expected target-based executable directive.");
6764   CGBuilderTy &Bld = CGF.Builder;
6765   llvm::Value *ThreadLimitVal = nullptr;
6766   llvm::Value *NumThreadsVal = nullptr;
6767   switch (DirectiveKind) {
6768   case OMPD_target: {
6769     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6770     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6771       return NumThreads;
6772     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6773         CGF.getContext(), CS->getCapturedStmt());
6774     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6775       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6776         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6777         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6778         const auto *ThreadLimitClause =
6779             Dir->getSingleClause<OMPThreadLimitClause>();
6780         CodeGenFunction::LexicalScope Scope(
6781             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6782         if (const auto *PreInit =
6783                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6784           for (const auto *I : PreInit->decls()) {
6785             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6786               CGF.EmitVarDecl(cast<VarDecl>(*I));
6787             } else {
6788               CodeGenFunction::AutoVarEmission Emission =
6789                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6790               CGF.EmitAutoVarCleanups(Emission);
6791             }
6792           }
6793         }
6794         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6795             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6796         ThreadLimitVal =
6797             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6798       }
6799       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6800           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6801         CS = Dir->getInnermostCapturedStmt();
6802         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6803             CGF.getContext(), CS->getCapturedStmt());
6804         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6805       }
6806       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6807           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6808         CS = Dir->getInnermostCapturedStmt();
6809         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6810           return NumThreads;
6811       }
6812       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6813         return Bld.getInt32(1);
6814     }
6815     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6816   }
6817   case OMPD_target_teams: {
6818     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6819       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6820       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6821       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6822           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6823       ThreadLimitVal =
6824           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6825     }
6826     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6827     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6828       return NumThreads;
6829     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6830         CGF.getContext(), CS->getCapturedStmt());
6831     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6832       if (Dir->getDirectiveKind() == OMPD_distribute) {
6833         CS = Dir->getInnermostCapturedStmt();
6834         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6835           return NumThreads;
6836       }
6837     }
6838     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6839   }
6840   case OMPD_target_teams_distribute:
6841     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6842       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6843       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6844       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6845           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6846       ThreadLimitVal =
6847           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6848     }
6849     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6850   case OMPD_target_parallel:
6851   case OMPD_target_parallel_for:
6852   case OMPD_target_parallel_for_simd:
6853   case OMPD_target_teams_distribute_parallel_for:
6854   case OMPD_target_teams_distribute_parallel_for_simd: {
6855     llvm::Value *CondVal = nullptr;
6856     // Handle if clause. If if clause present, the number of threads is
6857     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6858     if (D.hasClausesOfKind<OMPIfClause>()) {
6859       const OMPIfClause *IfClause = nullptr;
6860       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6861         if (C->getNameModifier() == OMPD_unknown ||
6862             C->getNameModifier() == OMPD_parallel) {
6863           IfClause = C;
6864           break;
6865         }
6866       }
6867       if (IfClause) {
6868         const Expr *Cond = IfClause->getCondition();
6869         bool Result;
6870         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6871           if (!Result)
6872             return Bld.getInt32(1);
6873         } else {
6874           CodeGenFunction::RunCleanupsScope Scope(CGF);
6875           CondVal = CGF.EvaluateExprAsBool(Cond);
6876         }
6877       }
6878     }
6879     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6880       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6881       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6882       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6883           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6884       ThreadLimitVal =
6885           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6886     }
6887     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6888       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6889       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6890       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6891           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6892       NumThreadsVal =
6893           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/false);
6894       ThreadLimitVal = ThreadLimitVal
6895                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6896                                                                 ThreadLimitVal),
6897                                               NumThreadsVal, ThreadLimitVal)
6898                            : NumThreadsVal;
6899     }
6900     if (!ThreadLimitVal)
6901       ThreadLimitVal = Bld.getInt32(0);
6902     if (CondVal)
6903       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6904     return ThreadLimitVal;
6905   }
6906   case OMPD_target_teams_distribute_simd:
6907   case OMPD_target_simd:
6908     return Bld.getInt32(1);
6909   case OMPD_parallel:
6910   case OMPD_for:
6911   case OMPD_parallel_for:
6912   case OMPD_parallel_sections:
6913   case OMPD_for_simd:
6914   case OMPD_parallel_for_simd:
6915   case OMPD_cancel:
6916   case OMPD_cancellation_point:
6917   case OMPD_ordered:
6918   case OMPD_threadprivate:
6919   case OMPD_allocate:
6920   case OMPD_task:
6921   case OMPD_simd:
6922   case OMPD_sections:
6923   case OMPD_section:
6924   case OMPD_single:
6925   case OMPD_master:
6926   case OMPD_critical:
6927   case OMPD_taskyield:
6928   case OMPD_barrier:
6929   case OMPD_taskwait:
6930   case OMPD_taskgroup:
6931   case OMPD_atomic:
6932   case OMPD_flush:
6933   case OMPD_teams:
6934   case OMPD_target_data:
6935   case OMPD_target_exit_data:
6936   case OMPD_target_enter_data:
6937   case OMPD_distribute:
6938   case OMPD_distribute_simd:
6939   case OMPD_distribute_parallel_for:
6940   case OMPD_distribute_parallel_for_simd:
6941   case OMPD_teams_distribute:
6942   case OMPD_teams_distribute_simd:
6943   case OMPD_teams_distribute_parallel_for:
6944   case OMPD_teams_distribute_parallel_for_simd:
6945   case OMPD_target_update:
6946   case OMPD_declare_simd:
6947   case OMPD_declare_target:
6948   case OMPD_end_declare_target:
6949   case OMPD_declare_reduction:
6950   case OMPD_declare_mapper:
6951   case OMPD_taskloop:
6952   case OMPD_taskloop_simd:
6953   case OMPD_requires:
6954   case OMPD_unknown:
6955     break;
6956   }
6957   llvm_unreachable("Unsupported directive kind.");
6958 }
6959 
6960 namespace {
6961 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6962 
6963 // Utility to handle information from clauses associated with a given
6964 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6965 // It provides a convenient interface to obtain the information and generate
6966 // code for that information.
6967 class MappableExprsHandler {
6968 public:
6969   /// Values for bit flags used to specify the mapping type for
6970   /// offloading.
6971   enum OpenMPOffloadMappingFlags : uint64_t {
6972     /// No flags
6973     OMP_MAP_NONE = 0x0,
6974     /// Allocate memory on the device and move data from host to device.
6975     OMP_MAP_TO = 0x01,
6976     /// Allocate memory on the device and move data from device to host.
6977     OMP_MAP_FROM = 0x02,
6978     /// Always perform the requested mapping action on the element, even
6979     /// if it was already mapped before.
6980     OMP_MAP_ALWAYS = 0x04,
6981     /// Delete the element from the device environment, ignoring the
6982     /// current reference count associated with the element.
6983     OMP_MAP_DELETE = 0x08,
6984     /// The element being mapped is a pointer-pointee pair; both the
6985     /// pointer and the pointee should be mapped.
6986     OMP_MAP_PTR_AND_OBJ = 0x10,
6987     /// This flags signals that the base address of an entry should be
6988     /// passed to the target kernel as an argument.
6989     OMP_MAP_TARGET_PARAM = 0x20,
6990     /// Signal that the runtime library has to return the device pointer
6991     /// in the current position for the data being mapped. Used when we have the
6992     /// use_device_ptr clause.
6993     OMP_MAP_RETURN_PARAM = 0x40,
6994     /// This flag signals that the reference being passed is a pointer to
6995     /// private data.
6996     OMP_MAP_PRIVATE = 0x80,
6997     /// Pass the element to the device by value.
6998     OMP_MAP_LITERAL = 0x100,
6999     /// Implicit map
7000     OMP_MAP_IMPLICIT = 0x200,
7001     /// The 16 MSBs of the flags indicate whether the entry is member of some
7002     /// struct/class.
7003     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7004     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7005   };
7006 
7007   /// Class that associates information with a base pointer to be passed to the
7008   /// runtime library.
7009   class BasePointerInfo {
7010     /// The base pointer.
7011     llvm::Value *Ptr = nullptr;
7012     /// The base declaration that refers to this device pointer, or null if
7013     /// there is none.
7014     const ValueDecl *DevPtrDecl = nullptr;
7015 
7016   public:
7017     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7018         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7019     llvm::Value *operator*() const { return Ptr; }
7020     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7021     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7022   };
7023 
7024   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7025   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7026   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7027 
7028   /// Map between a struct and the its lowest & highest elements which have been
7029   /// mapped.
7030   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7031   ///                    HE(FieldIndex, Pointer)}
7032   struct StructRangeInfoTy {
7033     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7034         0, Address::invalid()};
7035     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7036         0, Address::invalid()};
7037     Address Base = Address::invalid();
7038   };
7039 
7040 private:
7041   /// Kind that defines how a device pointer has to be returned.
7042   struct MapInfo {
7043     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7044     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7045     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7046     bool ReturnDevicePointer = false;
7047     bool IsImplicit = false;
7048 
7049     MapInfo() = default;
7050     MapInfo(
7051         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7052         OpenMPMapClauseKind MapType,
7053         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7054         bool ReturnDevicePointer, bool IsImplicit)
7055         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7056           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7057   };
7058 
7059   /// If use_device_ptr is used on a pointer which is a struct member and there
7060   /// is no map information about it, then emission of that entry is deferred
7061   /// until the whole struct has been processed.
7062   struct DeferredDevicePtrEntryTy {
7063     const Expr *IE = nullptr;
7064     const ValueDecl *VD = nullptr;
7065 
7066     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7067         : IE(IE), VD(VD) {}
7068   };
7069 
7070   /// Directive from where the map clauses were extracted.
7071   const OMPExecutableDirective &CurDir;
7072 
7073   /// Function the directive is being generated for.
7074   CodeGenFunction &CGF;
7075 
7076   /// Set of all first private variables in the current directive.
7077   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
7078 
7079   /// Map between device pointer declarations and their expression components.
7080   /// The key value for declarations in 'this' is null.
7081   llvm::DenseMap<
7082       const ValueDecl *,
7083       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7084       DevPointersMap;
7085 
7086   llvm::Value *getExprTypeSize(const Expr *E) const {
7087     QualType ExprTy = E->getType().getCanonicalType();
7088 
7089     // Reference types are ignored for mapping purposes.
7090     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7091       ExprTy = RefTy->getPointeeType().getCanonicalType();
7092 
7093     // Given that an array section is considered a built-in type, we need to
7094     // do the calculation based on the length of the section instead of relying
7095     // on CGF.getTypeSize(E->getType()).
7096     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7097       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7098                             OAE->getBase()->IgnoreParenImpCasts())
7099                             .getCanonicalType();
7100 
7101       // If there is no length associated with the expression, that means we
7102       // are using the whole length of the base.
7103       if (!OAE->getLength() && OAE->getColonLoc().isValid())
7104         return CGF.getTypeSize(BaseTy);
7105 
7106       llvm::Value *ElemSize;
7107       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7108         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7109       } else {
7110         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7111         assert(ATy && "Expecting array type if not a pointer type.");
7112         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7113       }
7114 
7115       // If we don't have a length at this point, that is because we have an
7116       // array section with a single element.
7117       if (!OAE->getLength())
7118         return ElemSize;
7119 
7120       llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
7121       LengthVal =
7122           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
7123       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7124     }
7125     return CGF.getTypeSize(ExprTy);
7126   }
7127 
7128   /// Return the corresponding bits for a given map clause modifier. Add
7129   /// a flag marking the map as a pointer if requested. Add a flag marking the
7130   /// map as the first one of a series of maps that relate to the same map
7131   /// expression.
7132   OpenMPOffloadMappingFlags getMapTypeBits(
7133       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7134       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7135     OpenMPOffloadMappingFlags Bits =
7136         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7137     switch (MapType) {
7138     case OMPC_MAP_alloc:
7139     case OMPC_MAP_release:
7140       // alloc and release is the default behavior in the runtime library,  i.e.
7141       // if we don't pass any bits alloc/release that is what the runtime is
7142       // going to do. Therefore, we don't need to signal anything for these two
7143       // type modifiers.
7144       break;
7145     case OMPC_MAP_to:
7146       Bits |= OMP_MAP_TO;
7147       break;
7148     case OMPC_MAP_from:
7149       Bits |= OMP_MAP_FROM;
7150       break;
7151     case OMPC_MAP_tofrom:
7152       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7153       break;
7154     case OMPC_MAP_delete:
7155       Bits |= OMP_MAP_DELETE;
7156       break;
7157     case OMPC_MAP_unknown:
7158       llvm_unreachable("Unexpected map type!");
7159     }
7160     if (AddPtrFlag)
7161       Bits |= OMP_MAP_PTR_AND_OBJ;
7162     if (AddIsTargetParamFlag)
7163       Bits |= OMP_MAP_TARGET_PARAM;
7164     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7165         != MapModifiers.end())
7166       Bits |= OMP_MAP_ALWAYS;
7167     return Bits;
7168   }
7169 
7170   /// Return true if the provided expression is a final array section. A
7171   /// final array section, is one whose length can't be proved to be one.
7172   bool isFinalArraySectionExpression(const Expr *E) const {
7173     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7174 
7175     // It is not an array section and therefore not a unity-size one.
7176     if (!OASE)
7177       return false;
7178 
7179     // An array section with no colon always refer to a single element.
7180     if (OASE->getColonLoc().isInvalid())
7181       return false;
7182 
7183     const Expr *Length = OASE->getLength();
7184 
7185     // If we don't have a length we have to check if the array has size 1
7186     // for this dimension. Also, we should always expect a length if the
7187     // base type is pointer.
7188     if (!Length) {
7189       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7190                              OASE->getBase()->IgnoreParenImpCasts())
7191                              .getCanonicalType();
7192       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7193         return ATy->getSize().getSExtValue() != 1;
7194       // If we don't have a constant dimension length, we have to consider
7195       // the current section as having any size, so it is not necessarily
7196       // unitary. If it happen to be unity size, that's user fault.
7197       return true;
7198     }
7199 
7200     // Check if the length evaluates to 1.
7201     Expr::EvalResult Result;
7202     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7203       return true; // Can have more that size 1.
7204 
7205     llvm::APSInt ConstLength = Result.Val.getInt();
7206     return ConstLength.getSExtValue() != 1;
7207   }
7208 
7209   /// Generate the base pointers, section pointers, sizes and map type
7210   /// bits for the provided map type, map modifier, and expression components.
7211   /// \a IsFirstComponent should be set to true if the provided set of
7212   /// components is the first associated with a capture.
7213   void generateInfoForComponentList(
7214       OpenMPMapClauseKind MapType,
7215       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7216       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7217       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7218       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7219       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7220       bool IsImplicit,
7221       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7222           OverlappedElements = llvm::None) const {
7223     // The following summarizes what has to be generated for each map and the
7224     // types below. The generated information is expressed in this order:
7225     // base pointer, section pointer, size, flags
7226     // (to add to the ones that come from the map type and modifier).
7227     //
7228     // double d;
7229     // int i[100];
7230     // float *p;
7231     //
7232     // struct S1 {
7233     //   int i;
7234     //   float f[50];
7235     // }
7236     // struct S2 {
7237     //   int i;
7238     //   float f[50];
7239     //   S1 s;
7240     //   double *p;
7241     //   struct S2 *ps;
7242     // }
7243     // S2 s;
7244     // S2 *ps;
7245     //
7246     // map(d)
7247     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7248     //
7249     // map(i)
7250     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7251     //
7252     // map(i[1:23])
7253     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7254     //
7255     // map(p)
7256     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7257     //
7258     // map(p[1:24])
7259     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7260     //
7261     // map(s)
7262     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7263     //
7264     // map(s.i)
7265     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7266     //
7267     // map(s.s.f)
7268     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7269     //
7270     // map(s.p)
7271     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7272     //
7273     // map(to: s.p[:22])
7274     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7275     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7276     // &(s.p), &(s.p[0]), 22*sizeof(double),
7277     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7278     // (*) alloc space for struct members, only this is a target parameter
7279     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7280     //      optimizes this entry out, same in the examples below)
7281     // (***) map the pointee (map: to)
7282     //
7283     // map(s.ps)
7284     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7285     //
7286     // map(from: s.ps->s.i)
7287     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7288     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7289     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7290     //
7291     // map(to: s.ps->ps)
7292     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7293     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7294     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7295     //
7296     // map(s.ps->ps->ps)
7297     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7298     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7299     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7300     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7301     //
7302     // map(to: s.ps->ps->s.f[:22])
7303     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7304     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7305     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7306     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7307     //
7308     // map(ps)
7309     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7310     //
7311     // map(ps->i)
7312     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7313     //
7314     // map(ps->s.f)
7315     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7316     //
7317     // map(from: ps->p)
7318     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7319     //
7320     // map(to: ps->p[:22])
7321     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7322     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7323     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7324     //
7325     // map(ps->ps)
7326     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7327     //
7328     // map(from: ps->ps->s.i)
7329     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7330     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7331     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7332     //
7333     // map(from: ps->ps->ps)
7334     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7335     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7336     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7337     //
7338     // map(ps->ps->ps->ps)
7339     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7340     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7341     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7342     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7343     //
7344     // map(to: ps->ps->ps->s.f[:22])
7345     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7346     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7347     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7348     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7349     //
7350     // map(to: s.f[:22]) map(from: s.p[:33])
7351     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7352     //     sizeof(double*) (**), TARGET_PARAM
7353     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7354     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7355     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7356     // (*) allocate contiguous space needed to fit all mapped members even if
7357     //     we allocate space for members not mapped (in this example,
7358     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7359     //     them as well because they fall between &s.f[0] and &s.p)
7360     //
7361     // map(from: s.f[:22]) map(to: ps->p[:33])
7362     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7363     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7364     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7365     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7366     // (*) the struct this entry pertains to is the 2nd element in the list of
7367     //     arguments, hence MEMBER_OF(2)
7368     //
7369     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7370     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7371     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7372     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7373     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7374     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7375     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7376     // (*) the struct this entry pertains to is the 4th element in the list
7377     //     of arguments, hence MEMBER_OF(4)
7378 
7379     // Track if the map information being generated is the first for a capture.
7380     bool IsCaptureFirstInfo = IsFirstComponentList;
7381     bool IsLink = false; // Is this variable a "declare target link"?
7382 
7383     // Scan the components from the base to the complete expression.
7384     auto CI = Components.rbegin();
7385     auto CE = Components.rend();
7386     auto I = CI;
7387 
7388     // Track if the map information being generated is the first for a list of
7389     // components.
7390     bool IsExpressionFirstInfo = true;
7391     Address BP = Address::invalid();
7392     const Expr *AssocExpr = I->getAssociatedExpression();
7393     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7394     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7395 
7396     if (isa<MemberExpr>(AssocExpr)) {
7397       // The base is the 'this' pointer. The content of the pointer is going
7398       // to be the base of the field being mapped.
7399       BP = CGF.LoadCXXThisAddress();
7400     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7401                (OASE &&
7402                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7403       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7404     } else {
7405       // The base is the reference to the variable.
7406       // BP = &Var.
7407       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7408       if (const auto *VD =
7409               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7410         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7411                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
7412           if (*Res == OMPDeclareTargetDeclAttr::MT_Link) {
7413             IsLink = true;
7414             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
7415           }
7416       }
7417 
7418       // If the variable is a pointer and is being dereferenced (i.e. is not
7419       // the last component), the base has to be the pointer itself, not its
7420       // reference. References are ignored for mapping purposes.
7421       QualType Ty =
7422           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7423       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7424         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7425 
7426         // We do not need to generate individual map information for the
7427         // pointer, it can be associated with the combined storage.
7428         ++I;
7429       }
7430     }
7431 
7432     // Track whether a component of the list should be marked as MEMBER_OF some
7433     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7434     // in a component list should be marked as MEMBER_OF, all subsequent entries
7435     // do not belong to the base struct. E.g.
7436     // struct S2 s;
7437     // s.ps->ps->ps->f[:]
7438     //   (1) (2) (3) (4)
7439     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7440     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7441     // is the pointee of ps(2) which is not member of struct s, so it should not
7442     // be marked as such (it is still PTR_AND_OBJ).
7443     // The variable is initialized to false so that PTR_AND_OBJ entries which
7444     // are not struct members are not considered (e.g. array of pointers to
7445     // data).
7446     bool ShouldBeMemberOf = false;
7447 
7448     // Variable keeping track of whether or not we have encountered a component
7449     // in the component list which is a member expression. Useful when we have a
7450     // pointer or a final array section, in which case it is the previous
7451     // component in the list which tells us whether we have a member expression.
7452     // E.g. X.f[:]
7453     // While processing the final array section "[:]" it is "f" which tells us
7454     // whether we are dealing with a member of a declared struct.
7455     const MemberExpr *EncounteredME = nullptr;
7456 
7457     for (; I != CE; ++I) {
7458       // If the current component is member of a struct (parent struct) mark it.
7459       if (!EncounteredME) {
7460         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7461         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7462         // as MEMBER_OF the parent struct.
7463         if (EncounteredME)
7464           ShouldBeMemberOf = true;
7465       }
7466 
7467       auto Next = std::next(I);
7468 
7469       // We need to generate the addresses and sizes if this is the last
7470       // component, if the component is a pointer or if it is an array section
7471       // whose length can't be proved to be one. If this is a pointer, it
7472       // becomes the base address for the following components.
7473 
7474       // A final array section, is one whose length can't be proved to be one.
7475       bool IsFinalArraySection =
7476           isFinalArraySectionExpression(I->getAssociatedExpression());
7477 
7478       // Get information on whether the element is a pointer. Have to do a
7479       // special treatment for array sections given that they are built-in
7480       // types.
7481       const auto *OASE =
7482           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7483       bool IsPointer =
7484           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7485                        .getCanonicalType()
7486                        ->isAnyPointerType()) ||
7487           I->getAssociatedExpression()->getType()->isAnyPointerType();
7488 
7489       if (Next == CE || IsPointer || IsFinalArraySection) {
7490         // If this is not the last component, we expect the pointer to be
7491         // associated with an array expression or member expression.
7492         assert((Next == CE ||
7493                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7494                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7495                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7496                "Unexpected expression");
7497 
7498         Address LB =
7499             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7500 
7501         // If this component is a pointer inside the base struct then we don't
7502         // need to create any entry for it - it will be combined with the object
7503         // it is pointing to into a single PTR_AND_OBJ entry.
7504         bool IsMemberPointer =
7505             IsPointer && EncounteredME &&
7506             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7507              EncounteredME);
7508         if (!OverlappedElements.empty()) {
7509           // Handle base element with the info for overlapped elements.
7510           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7511           assert(Next == CE &&
7512                  "Expected last element for the overlapped elements.");
7513           assert(!IsPointer &&
7514                  "Unexpected base element with the pointer type.");
7515           // Mark the whole struct as the struct that requires allocation on the
7516           // device.
7517           PartialStruct.LowestElem = {0, LB};
7518           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7519               I->getAssociatedExpression()->getType());
7520           Address HB = CGF.Builder.CreateConstGEP(
7521               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7522                                                               CGF.VoidPtrTy),
7523               TypeSize.getQuantity() - 1);
7524           PartialStruct.HighestElem = {
7525               std::numeric_limits<decltype(
7526                   PartialStruct.HighestElem.first)>::max(),
7527               HB};
7528           PartialStruct.Base = BP;
7529           // Emit data for non-overlapped data.
7530           OpenMPOffloadMappingFlags Flags =
7531               OMP_MAP_MEMBER_OF |
7532               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7533                              /*AddPtrFlag=*/false,
7534                              /*AddIsTargetParamFlag=*/false);
7535           LB = BP;
7536           llvm::Value *Size = nullptr;
7537           // Do bitcopy of all non-overlapped structure elements.
7538           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7539                    Component : OverlappedElements) {
7540             Address ComponentLB = Address::invalid();
7541             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7542                  Component) {
7543               if (MC.getAssociatedDeclaration()) {
7544                 ComponentLB =
7545                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7546                         .getAddress();
7547                 Size = CGF.Builder.CreatePtrDiff(
7548                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7549                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7550                 break;
7551               }
7552             }
7553             BasePointers.push_back(BP.getPointer());
7554             Pointers.push_back(LB.getPointer());
7555             Sizes.push_back(Size);
7556             Types.push_back(Flags);
7557             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7558           }
7559           BasePointers.push_back(BP.getPointer());
7560           Pointers.push_back(LB.getPointer());
7561           Size = CGF.Builder.CreatePtrDiff(
7562               CGF.EmitCastToVoidPtr(
7563                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7564               CGF.EmitCastToVoidPtr(LB.getPointer()));
7565           Sizes.push_back(Size);
7566           Types.push_back(Flags);
7567           break;
7568         }
7569         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7570         if (!IsMemberPointer) {
7571           BasePointers.push_back(BP.getPointer());
7572           Pointers.push_back(LB.getPointer());
7573           Sizes.push_back(Size);
7574 
7575           // We need to add a pointer flag for each map that comes from the
7576           // same expression except for the first one. We also need to signal
7577           // this map is the first one that relates with the current capture
7578           // (there is a set of entries for each capture).
7579           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7580               MapType, MapModifiers, IsImplicit,
7581               !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink);
7582 
7583           if (!IsExpressionFirstInfo) {
7584             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7585             // then we reset the TO/FROM/ALWAYS/DELETE flags.
7586             if (IsPointer)
7587               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7588                          OMP_MAP_DELETE);
7589 
7590             if (ShouldBeMemberOf) {
7591               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7592               // should be later updated with the correct value of MEMBER_OF.
7593               Flags |= OMP_MAP_MEMBER_OF;
7594               // From now on, all subsequent PTR_AND_OBJ entries should not be
7595               // marked as MEMBER_OF.
7596               ShouldBeMemberOf = false;
7597             }
7598           }
7599 
7600           Types.push_back(Flags);
7601         }
7602 
7603         // If we have encountered a member expression so far, keep track of the
7604         // mapped member. If the parent is "*this", then the value declaration
7605         // is nullptr.
7606         if (EncounteredME) {
7607           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7608           unsigned FieldIndex = FD->getFieldIndex();
7609 
7610           // Update info about the lowest and highest elements for this struct
7611           if (!PartialStruct.Base.isValid()) {
7612             PartialStruct.LowestElem = {FieldIndex, LB};
7613             PartialStruct.HighestElem = {FieldIndex, LB};
7614             PartialStruct.Base = BP;
7615           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7616             PartialStruct.LowestElem = {FieldIndex, LB};
7617           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7618             PartialStruct.HighestElem = {FieldIndex, LB};
7619           }
7620         }
7621 
7622         // If we have a final array section, we are done with this expression.
7623         if (IsFinalArraySection)
7624           break;
7625 
7626         // The pointer becomes the base for the next element.
7627         if (Next != CE)
7628           BP = LB;
7629 
7630         IsExpressionFirstInfo = false;
7631         IsCaptureFirstInfo = false;
7632       }
7633     }
7634   }
7635 
7636   /// Return the adjusted map modifiers if the declaration a capture refers to
7637   /// appears in a first-private clause. This is expected to be used only with
7638   /// directives that start with 'target'.
7639   MappableExprsHandler::OpenMPOffloadMappingFlags
7640   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7641     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7642 
7643     // A first private variable captured by reference will use only the
7644     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7645     // declaration is known as first-private in this handler.
7646     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7647       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7648           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7649         return MappableExprsHandler::OMP_MAP_ALWAYS |
7650                MappableExprsHandler::OMP_MAP_TO;
7651       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7652         return MappableExprsHandler::OMP_MAP_TO |
7653                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7654       return MappableExprsHandler::OMP_MAP_PRIVATE |
7655              MappableExprsHandler::OMP_MAP_TO;
7656     }
7657     return MappableExprsHandler::OMP_MAP_TO |
7658            MappableExprsHandler::OMP_MAP_FROM;
7659   }
7660 
7661   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7662     // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
7663     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7664                                                   << 48);
7665   }
7666 
7667   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7668                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7669     // If the entry is PTR_AND_OBJ but has not been marked with the special
7670     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7671     // marked as MEMBER_OF.
7672     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7673         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7674       return;
7675 
7676     // Reset the placeholder value to prepare the flag for the assignment of the
7677     // proper MEMBER_OF value.
7678     Flags &= ~OMP_MAP_MEMBER_OF;
7679     Flags |= MemberOfFlag;
7680   }
7681 
7682   void getPlainLayout(const CXXRecordDecl *RD,
7683                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7684                       bool AsBase) const {
7685     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7686 
7687     llvm::StructType *St =
7688         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7689 
7690     unsigned NumElements = St->getNumElements();
7691     llvm::SmallVector<
7692         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7693         RecordLayout(NumElements);
7694 
7695     // Fill bases.
7696     for (const auto &I : RD->bases()) {
7697       if (I.isVirtual())
7698         continue;
7699       const auto *Base = I.getType()->getAsCXXRecordDecl();
7700       // Ignore empty bases.
7701       if (Base->isEmpty() || CGF.getContext()
7702                                  .getASTRecordLayout(Base)
7703                                  .getNonVirtualSize()
7704                                  .isZero())
7705         continue;
7706 
7707       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7708       RecordLayout[FieldIndex] = Base;
7709     }
7710     // Fill in virtual bases.
7711     for (const auto &I : RD->vbases()) {
7712       const auto *Base = I.getType()->getAsCXXRecordDecl();
7713       // Ignore empty bases.
7714       if (Base->isEmpty())
7715         continue;
7716       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7717       if (RecordLayout[FieldIndex])
7718         continue;
7719       RecordLayout[FieldIndex] = Base;
7720     }
7721     // Fill in all the fields.
7722     assert(!RD->isUnion() && "Unexpected union.");
7723     for (const auto *Field : RD->fields()) {
7724       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7725       // will fill in later.)
7726       if (!Field->isBitField()) {
7727         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7728         RecordLayout[FieldIndex] = Field;
7729       }
7730     }
7731     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7732              &Data : RecordLayout) {
7733       if (Data.isNull())
7734         continue;
7735       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7736         getPlainLayout(Base, Layout, /*AsBase=*/true);
7737       else
7738         Layout.push_back(Data.get<const FieldDecl *>());
7739     }
7740   }
7741 
7742 public:
7743   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7744       : CurDir(Dir), CGF(CGF) {
7745     // Extract firstprivate clause information.
7746     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7747       for (const auto *D : C->varlists())
7748         FirstPrivateDecls.insert(
7749             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
7750     // Extract device pointer clause information.
7751     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7752       for (auto L : C->component_lists())
7753         DevPointersMap[L.first].push_back(L.second);
7754   }
7755 
7756   /// Generate code for the combined entry if we have a partially mapped struct
7757   /// and take care of the mapping flags of the arguments corresponding to
7758   /// individual struct members.
7759   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7760                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7761                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7762                          const StructRangeInfoTy &PartialStruct) const {
7763     // Base is the base of the struct
7764     BasePointers.push_back(PartialStruct.Base.getPointer());
7765     // Pointer is the address of the lowest element
7766     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7767     Pointers.push_back(LB);
7768     // Size is (addr of {highest+1} element) - (addr of lowest element)
7769     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7770     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7771     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7772     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7773     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7774     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy,
7775                                                   /*isSinged=*/false);
7776     Sizes.push_back(Size);
7777     // Map type is always TARGET_PARAM
7778     Types.push_back(OMP_MAP_TARGET_PARAM);
7779     // Remove TARGET_PARAM flag from the first element
7780     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7781 
7782     // All other current entries will be MEMBER_OF the combined entry
7783     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7784     // 0xFFFF in the MEMBER_OF field).
7785     OpenMPOffloadMappingFlags MemberOfFlag =
7786         getMemberOfFlag(BasePointers.size() - 1);
7787     for (auto &M : CurTypes)
7788       setCorrectMemberOfFlag(M, MemberOfFlag);
7789   }
7790 
7791   /// Generate all the base pointers, section pointers, sizes and map
7792   /// types for the extracted mappable expressions. Also, for each item that
7793   /// relates with a device pointer, a pair of the relevant declaration and
7794   /// index where it occurs is appended to the device pointers info array.
7795   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7796                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7797                        MapFlagsArrayTy &Types) const {
7798     // We have to process the component lists that relate with the same
7799     // declaration in a single chunk so that we can generate the map flags
7800     // correctly. Therefore, we organize all lists in a map.
7801     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7802 
7803     // Helper function to fill the information map for the different supported
7804     // clauses.
7805     auto &&InfoGen = [&Info](
7806         const ValueDecl *D,
7807         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7808         OpenMPMapClauseKind MapType,
7809         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7810         bool ReturnDevicePointer, bool IsImplicit) {
7811       const ValueDecl *VD =
7812           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7813       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7814                             IsImplicit);
7815     };
7816 
7817     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7818     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7819       for (const auto &L : C->component_lists()) {
7820         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7821             /*ReturnDevicePointer=*/false, C->isImplicit());
7822       }
7823     for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
7824       for (const auto &L : C->component_lists()) {
7825         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7826             /*ReturnDevicePointer=*/false, C->isImplicit());
7827       }
7828     for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
7829       for (const auto &L : C->component_lists()) {
7830         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7831             /*ReturnDevicePointer=*/false, C->isImplicit());
7832       }
7833 
7834     // Look at the use_device_ptr clause information and mark the existing map
7835     // entries as such. If there is no map information for an entry in the
7836     // use_device_ptr list, we create one with map type 'alloc' and zero size
7837     // section. It is the user fault if that was not mapped before. If there is
7838     // no map information and the pointer is a struct member, then we defer the
7839     // emission of that entry until the whole struct has been processed.
7840     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7841         DeferredInfo;
7842 
7843     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7844     for (const auto *C :
7845         this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
7846       for (const auto &L : C->component_lists()) {
7847         assert(!L.second.empty() && "Not expecting empty list of components!");
7848         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7849         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7850         const Expr *IE = L.second.back().getAssociatedExpression();
7851         // If the first component is a member expression, we have to look into
7852         // 'this', which maps to null in the map of map information. Otherwise
7853         // look directly for the information.
7854         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7855 
7856         // We potentially have map information for this declaration already.
7857         // Look for the first set of components that refer to it.
7858         if (It != Info.end()) {
7859           auto CI = std::find_if(
7860               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7861                 return MI.Components.back().getAssociatedDeclaration() == VD;
7862               });
7863           // If we found a map entry, signal that the pointer has to be returned
7864           // and move on to the next declaration.
7865           if (CI != It->second.end()) {
7866             CI->ReturnDevicePointer = true;
7867             continue;
7868           }
7869         }
7870 
7871         // We didn't find any match in our map information - generate a zero
7872         // size array section - if the pointer is a struct member we defer this
7873         // action until the whole struct has been processed.
7874         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
7875         if (isa<MemberExpr>(IE)) {
7876           // Insert the pointer into Info to be processed by
7877           // generateInfoForComponentList. Because it is a member pointer
7878           // without a pointee, no entry will be generated for it, therefore
7879           // we need to generate one after the whole struct has been processed.
7880           // Nonetheless, generateInfoForComponentList must be called to take
7881           // the pointer into account for the calculation of the range of the
7882           // partial struct.
7883           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7884                   /*ReturnDevicePointer=*/false, C->isImplicit());
7885           DeferredInfo[nullptr].emplace_back(IE, VD);
7886         } else {
7887           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7888               this->CGF.EmitLValue(IE), IE->getExprLoc());
7889           BasePointers.emplace_back(Ptr, VD);
7890           Pointers.push_back(Ptr);
7891           Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7892           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
7893         }
7894       }
7895     }
7896 
7897     for (const auto &M : Info) {
7898       // We need to know when we generate information for the first component
7899       // associated with a capture, because the mapping flags depend on it.
7900       bool IsFirstComponentList = true;
7901 
7902       // Temporary versions of arrays
7903       MapBaseValuesArrayTy CurBasePointers;
7904       MapValuesArrayTy CurPointers;
7905       MapValuesArrayTy CurSizes;
7906       MapFlagsArrayTy CurTypes;
7907       StructRangeInfoTy PartialStruct;
7908 
7909       for (const MapInfo &L : M.second) {
7910         assert(!L.Components.empty() &&
7911                "Not expecting declaration with no component lists.");
7912 
7913         // Remember the current base pointer index.
7914         unsigned CurrentBasePointersIdx = CurBasePointers.size();
7915         // FIXME: MSVC 2013 seems to require this-> to find the member method.
7916         this->generateInfoForComponentList(
7917             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
7918             CurPointers, CurSizes, CurTypes, PartialStruct,
7919             IsFirstComponentList, L.IsImplicit);
7920 
7921         // If this entry relates with a device pointer, set the relevant
7922         // declaration and add the 'return pointer' flag.
7923         if (L.ReturnDevicePointer) {
7924           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
7925                  "Unexpected number of mapped base pointers.");
7926 
7927           const ValueDecl *RelevantVD =
7928               L.Components.back().getAssociatedDeclaration();
7929           assert(RelevantVD &&
7930                  "No relevant declaration related with device pointer??");
7931 
7932           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
7933           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
7934         }
7935         IsFirstComponentList = false;
7936       }
7937 
7938       // Append any pending zero-length pointers which are struct members and
7939       // used with use_device_ptr.
7940       auto CI = DeferredInfo.find(M.first);
7941       if (CI != DeferredInfo.end()) {
7942         for (const DeferredDevicePtrEntryTy &L : CI->second) {
7943           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
7944           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7945               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
7946           CurBasePointers.emplace_back(BasePtr, L.VD);
7947           CurPointers.push_back(Ptr);
7948           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7949           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
7950           // value MEMBER_OF=FFFF so that the entry is later updated with the
7951           // correct value of MEMBER_OF.
7952           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
7953                              OMP_MAP_MEMBER_OF);
7954         }
7955       }
7956 
7957       // If there is an entry in PartialStruct it means we have a struct with
7958       // individual members mapped. Emit an extra combined entry.
7959       if (PartialStruct.Base.isValid())
7960         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
7961                           PartialStruct);
7962 
7963       // We need to append the results of this capture to what we already have.
7964       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
7965       Pointers.append(CurPointers.begin(), CurPointers.end());
7966       Sizes.append(CurSizes.begin(), CurSizes.end());
7967       Types.append(CurTypes.begin(), CurTypes.end());
7968     }
7969   }
7970 
7971   /// Emit capture info for lambdas for variables captured by reference.
7972   void generateInfoForLambdaCaptures(
7973       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
7974       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7975       MapFlagsArrayTy &Types,
7976       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
7977     const auto *RD = VD->getType()
7978                          .getCanonicalType()
7979                          .getNonReferenceType()
7980                          ->getAsCXXRecordDecl();
7981     if (!RD || !RD->isLambda())
7982       return;
7983     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
7984     LValue VDLVal = CGF.MakeAddrLValue(
7985         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
7986     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
7987     FieldDecl *ThisCapture = nullptr;
7988     RD->getCaptureFields(Captures, ThisCapture);
7989     if (ThisCapture) {
7990       LValue ThisLVal =
7991           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
7992       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
7993       LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
7994       BasePointers.push_back(ThisLVal.getPointer());
7995       Pointers.push_back(ThisLValVal.getPointer());
7996       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
7997       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
7998                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
7999     }
8000     for (const LambdaCapture &LC : RD->captures()) {
8001       if (LC.getCaptureKind() != LCK_ByRef)
8002         continue;
8003       const VarDecl *VD = LC.getCapturedVar();
8004       auto It = Captures.find(VD);
8005       assert(It != Captures.end() && "Found lambda capture without field.");
8006       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8007       LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8008       LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8009       BasePointers.push_back(VarLVal.getPointer());
8010       Pointers.push_back(VarLValVal.getPointer());
8011       Sizes.push_back(CGF.getTypeSize(
8012           VD->getType().getCanonicalType().getNonReferenceType()));
8013       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8014                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8015     }
8016   }
8017 
8018   /// Set correct indices for lambdas captures.
8019   void adjustMemberOfForLambdaCaptures(
8020       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8021       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8022       MapFlagsArrayTy &Types) const {
8023     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8024       // Set correct member_of idx for all implicit lambda captures.
8025       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8026                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8027         continue;
8028       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8029       assert(BasePtr && "Unable to find base lambda address.");
8030       int TgtIdx = -1;
8031       for (unsigned J = I; J > 0; --J) {
8032         unsigned Idx = J - 1;
8033         if (Pointers[Idx] != BasePtr)
8034           continue;
8035         TgtIdx = Idx;
8036         break;
8037       }
8038       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8039       // All other current entries will be MEMBER_OF the combined entry
8040       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8041       // 0xFFFF in the MEMBER_OF field).
8042       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8043       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8044     }
8045   }
8046 
8047   /// Generate the base pointers, section pointers, sizes and map types
8048   /// associated to a given capture.
8049   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8050                               llvm::Value *Arg,
8051                               MapBaseValuesArrayTy &BasePointers,
8052                               MapValuesArrayTy &Pointers,
8053                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8054                               StructRangeInfoTy &PartialStruct) const {
8055     assert(!Cap->capturesVariableArrayType() &&
8056            "Not expecting to generate map info for a variable array type!");
8057 
8058     // We need to know when we generating information for the first component
8059     const ValueDecl *VD = Cap->capturesThis()
8060                               ? nullptr
8061                               : Cap->getCapturedVar()->getCanonicalDecl();
8062 
8063     // If this declaration appears in a is_device_ptr clause we just have to
8064     // pass the pointer by value. If it is a reference to a declaration, we just
8065     // pass its value.
8066     if (DevPointersMap.count(VD)) {
8067       BasePointers.emplace_back(Arg, VD);
8068       Pointers.push_back(Arg);
8069       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
8070       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8071       return;
8072     }
8073 
8074     using MapData =
8075         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8076                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8077     SmallVector<MapData, 4> DeclComponentLists;
8078     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
8079     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8080       for (const auto &L : C->decl_component_lists(VD)) {
8081         assert(L.first == VD &&
8082                "We got information for the wrong declaration??");
8083         assert(!L.second.empty() &&
8084                "Not expecting declaration with no component lists.");
8085         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8086                                         C->getMapTypeModifiers(),
8087                                         C->isImplicit());
8088       }
8089     }
8090 
8091     // Find overlapping elements (including the offset from the base element).
8092     llvm::SmallDenseMap<
8093         const MapData *,
8094         llvm::SmallVector<
8095             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8096         4>
8097         OverlappedData;
8098     size_t Count = 0;
8099     for (const MapData &L : DeclComponentLists) {
8100       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8101       OpenMPMapClauseKind MapType;
8102       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8103       bool IsImplicit;
8104       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8105       ++Count;
8106       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8107         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8108         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8109         auto CI = Components.rbegin();
8110         auto CE = Components.rend();
8111         auto SI = Components1.rbegin();
8112         auto SE = Components1.rend();
8113         for (; CI != CE && SI != SE; ++CI, ++SI) {
8114           if (CI->getAssociatedExpression()->getStmtClass() !=
8115               SI->getAssociatedExpression()->getStmtClass())
8116             break;
8117           // Are we dealing with different variables/fields?
8118           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8119             break;
8120         }
8121         // Found overlapping if, at least for one component, reached the head of
8122         // the components list.
8123         if (CI == CE || SI == SE) {
8124           assert((CI != CE || SI != SE) &&
8125                  "Unexpected full match of the mapping components.");
8126           const MapData &BaseData = CI == CE ? L : L1;
8127           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8128               SI == SE ? Components : Components1;
8129           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8130           OverlappedElements.getSecond().push_back(SubData);
8131         }
8132       }
8133     }
8134     // Sort the overlapped elements for each item.
8135     llvm::SmallVector<const FieldDecl *, 4> Layout;
8136     if (!OverlappedData.empty()) {
8137       if (const auto *CRD =
8138               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8139         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8140       else {
8141         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8142         Layout.append(RD->field_begin(), RD->field_end());
8143       }
8144     }
8145     for (auto &Pair : OverlappedData) {
8146       llvm::sort(
8147           Pair.getSecond(),
8148           [&Layout](
8149               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8150               OMPClauseMappableExprCommon::MappableExprComponentListRef
8151                   Second) {
8152             auto CI = First.rbegin();
8153             auto CE = First.rend();
8154             auto SI = Second.rbegin();
8155             auto SE = Second.rend();
8156             for (; CI != CE && SI != SE; ++CI, ++SI) {
8157               if (CI->getAssociatedExpression()->getStmtClass() !=
8158                   SI->getAssociatedExpression()->getStmtClass())
8159                 break;
8160               // Are we dealing with different variables/fields?
8161               if (CI->getAssociatedDeclaration() !=
8162                   SI->getAssociatedDeclaration())
8163                 break;
8164             }
8165 
8166             // Lists contain the same elements.
8167             if (CI == CE && SI == SE)
8168               return false;
8169 
8170             // List with less elements is less than list with more elements.
8171             if (CI == CE || SI == SE)
8172               return CI == CE;
8173 
8174             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8175             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8176             if (FD1->getParent() == FD2->getParent())
8177               return FD1->getFieldIndex() < FD2->getFieldIndex();
8178             const auto It =
8179                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8180                   return FD == FD1 || FD == FD2;
8181                 });
8182             return *It == FD1;
8183           });
8184     }
8185 
8186     // Associated with a capture, because the mapping flags depend on it.
8187     // Go through all of the elements with the overlapped elements.
8188     for (const auto &Pair : OverlappedData) {
8189       const MapData &L = *Pair.getFirst();
8190       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8191       OpenMPMapClauseKind MapType;
8192       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8193       bool IsImplicit;
8194       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8195       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8196           OverlappedComponents = Pair.getSecond();
8197       bool IsFirstComponentList = true;
8198       generateInfoForComponentList(MapType, MapModifiers, Components,
8199                                    BasePointers, Pointers, Sizes, Types,
8200                                    PartialStruct, IsFirstComponentList,
8201                                    IsImplicit, OverlappedComponents);
8202     }
8203     // Go through other elements without overlapped elements.
8204     bool IsFirstComponentList = OverlappedData.empty();
8205     for (const MapData &L : DeclComponentLists) {
8206       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8207       OpenMPMapClauseKind MapType;
8208       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8209       bool IsImplicit;
8210       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8211       auto It = OverlappedData.find(&L);
8212       if (It == OverlappedData.end())
8213         generateInfoForComponentList(MapType, MapModifiers, Components,
8214                                      BasePointers, Pointers, Sizes, Types,
8215                                      PartialStruct, IsFirstComponentList,
8216                                      IsImplicit);
8217       IsFirstComponentList = false;
8218     }
8219   }
8220 
8221   /// Generate the base pointers, section pointers, sizes and map types
8222   /// associated with the declare target link variables.
8223   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8224                                         MapValuesArrayTy &Pointers,
8225                                         MapValuesArrayTy &Sizes,
8226                                         MapFlagsArrayTy &Types) const {
8227     // Map other list items in the map clause which are not captured variables
8228     // but "declare target link" global variables.,
8229     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8230       for (const auto &L : C->component_lists()) {
8231         if (!L.first)
8232           continue;
8233         const auto *VD = dyn_cast<VarDecl>(L.first);
8234         if (!VD)
8235           continue;
8236         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8237             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8238         if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8239           continue;
8240         StructRangeInfoTy PartialStruct;
8241         generateInfoForComponentList(
8242             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8243             Pointers, Sizes, Types, PartialStruct,
8244             /*IsFirstComponentList=*/true, C->isImplicit());
8245         assert(!PartialStruct.Base.isValid() &&
8246                "No partial structs for declare target link expected.");
8247       }
8248     }
8249   }
8250 
8251   /// Generate the default map information for a given capture \a CI,
8252   /// record field declaration \a RI and captured value \a CV.
8253   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8254                               const FieldDecl &RI, llvm::Value *CV,
8255                               MapBaseValuesArrayTy &CurBasePointers,
8256                               MapValuesArrayTy &CurPointers,
8257                               MapValuesArrayTy &CurSizes,
8258                               MapFlagsArrayTy &CurMapTypes) const {
8259     // Do the default mapping.
8260     if (CI.capturesThis()) {
8261       CurBasePointers.push_back(CV);
8262       CurPointers.push_back(CV);
8263       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8264       CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
8265       // Default map type.
8266       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8267     } else if (CI.capturesVariableByCopy()) {
8268       CurBasePointers.push_back(CV);
8269       CurPointers.push_back(CV);
8270       if (!RI.getType()->isAnyPointerType()) {
8271         // We have to signal to the runtime captures passed by value that are
8272         // not pointers.
8273         CurMapTypes.push_back(OMP_MAP_LITERAL);
8274         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
8275       } else {
8276         // Pointers are implicitly mapped with a zero size and no flags
8277         // (other than first map that is added for all implicit maps).
8278         CurMapTypes.push_back(OMP_MAP_NONE);
8279         CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
8280       }
8281     } else {
8282       assert(CI.capturesVariable() && "Expected captured reference.");
8283       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8284       QualType ElementType = PtrTy->getPointeeType();
8285       CurSizes.push_back(CGF.getTypeSize(ElementType));
8286       // The default map type for a scalar/complex type is 'to' because by
8287       // default the value doesn't have to be retrieved. For an aggregate
8288       // type, the default is 'tofrom'.
8289       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8290       const VarDecl *VD = CI.getCapturedVar();
8291       if (FirstPrivateDecls.count(VD) &&
8292           VD->getType().isConstant(CGF.getContext())) {
8293         llvm::Constant *Addr =
8294             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8295         // Copy the value of the original variable to the new global copy.
8296         CGF.Builder.CreateMemCpy(
8297             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8298             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8299             CurSizes.back(), /*isVolatile=*/false);
8300         // Use new global variable as the base pointers.
8301         CurBasePointers.push_back(Addr);
8302         CurPointers.push_back(Addr);
8303       } else {
8304         CurBasePointers.push_back(CV);
8305         if (FirstPrivateDecls.count(VD) && ElementType->isAnyPointerType()) {
8306           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8307               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8308               AlignmentSource::Decl));
8309           CurPointers.push_back(PtrAddr.getPointer());
8310         } else {
8311           CurPointers.push_back(CV);
8312         }
8313       }
8314     }
8315     // Every default map produces a single argument which is a target parameter.
8316     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8317 
8318     // Add flag stating this is an implicit map.
8319     CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8320   }
8321 };
8322 
8323 enum OpenMPOffloadingReservedDeviceIDs {
8324   /// Device ID if the device was not defined, runtime should get it
8325   /// from environment variables in the spec.
8326   OMP_DEVICEID_UNDEF = -1,
8327 };
8328 } // anonymous namespace
8329 
8330 /// Emit the arrays used to pass the captures and map information to the
8331 /// offloading runtime library. If there is no map or capture information,
8332 /// return nullptr by reference.
8333 static void
8334 emitOffloadingArrays(CodeGenFunction &CGF,
8335                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8336                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8337                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8338                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8339                      CGOpenMPRuntime::TargetDataInfo &Info) {
8340   CodeGenModule &CGM = CGF.CGM;
8341   ASTContext &Ctx = CGF.getContext();
8342 
8343   // Reset the array information.
8344   Info.clearArrayInfo();
8345   Info.NumberOfPtrs = BasePointers.size();
8346 
8347   if (Info.NumberOfPtrs) {
8348     // Detect if we have any capture size requiring runtime evaluation of the
8349     // size so that a constant array could be eventually used.
8350     bool hasRuntimeEvaluationCaptureSize = false;
8351     for (llvm::Value *S : Sizes)
8352       if (!isa<llvm::Constant>(S)) {
8353         hasRuntimeEvaluationCaptureSize = true;
8354         break;
8355       }
8356 
8357     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8358     QualType PointerArrayType =
8359         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
8360                                  /*IndexTypeQuals=*/0);
8361 
8362     Info.BasePointersArray =
8363         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8364     Info.PointersArray =
8365         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8366 
8367     // If we don't have any VLA types or other types that require runtime
8368     // evaluation, we can use a constant array for the map sizes, otherwise we
8369     // need to fill up the arrays as we do for the pointers.
8370     if (hasRuntimeEvaluationCaptureSize) {
8371       QualType SizeArrayType = Ctx.getConstantArrayType(
8372           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
8373           /*IndexTypeQuals=*/0);
8374       Info.SizesArray =
8375           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8376     } else {
8377       // We expect all the sizes to be constant, so we collect them to create
8378       // a constant array.
8379       SmallVector<llvm::Constant *, 16> ConstSizes;
8380       for (llvm::Value *S : Sizes)
8381         ConstSizes.push_back(cast<llvm::Constant>(S));
8382 
8383       auto *SizesArrayInit = llvm::ConstantArray::get(
8384           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
8385       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8386       auto *SizesArrayGbl = new llvm::GlobalVariable(
8387           CGM.getModule(), SizesArrayInit->getType(),
8388           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8389           SizesArrayInit, Name);
8390       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8391       Info.SizesArray = SizesArrayGbl;
8392     }
8393 
8394     // The map types are always constant so we don't need to generate code to
8395     // fill arrays. Instead, we create an array constant.
8396     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8397     llvm::copy(MapTypes, Mapping.begin());
8398     llvm::Constant *MapTypesArrayInit =
8399         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8400     std::string MaptypesName =
8401         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8402     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8403         CGM.getModule(), MapTypesArrayInit->getType(),
8404         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8405         MapTypesArrayInit, MaptypesName);
8406     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8407     Info.MapTypesArray = MapTypesArrayGbl;
8408 
8409     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8410       llvm::Value *BPVal = *BasePointers[I];
8411       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8412           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8413           Info.BasePointersArray, 0, I);
8414       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8415           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8416       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8417       CGF.Builder.CreateStore(BPVal, BPAddr);
8418 
8419       if (Info.requiresDevicePointerInfo())
8420         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8421           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8422 
8423       llvm::Value *PVal = Pointers[I];
8424       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8425           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8426           Info.PointersArray, 0, I);
8427       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8428           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8429       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8430       CGF.Builder.CreateStore(PVal, PAddr);
8431 
8432       if (hasRuntimeEvaluationCaptureSize) {
8433         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8434             llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
8435             Info.SizesArray,
8436             /*Idx0=*/0,
8437             /*Idx1=*/I);
8438         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
8439         CGF.Builder.CreateStore(
8440             CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true),
8441             SAddr);
8442       }
8443     }
8444   }
8445 }
8446 /// Emit the arguments to be passed to the runtime library based on the
8447 /// arrays of pointers, sizes and map types.
8448 static void emitOffloadingArraysArgument(
8449     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8450     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8451     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8452   CodeGenModule &CGM = CGF.CGM;
8453   if (Info.NumberOfPtrs) {
8454     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8455         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8456         Info.BasePointersArray,
8457         /*Idx0=*/0, /*Idx1=*/0);
8458     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8459         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8460         Info.PointersArray,
8461         /*Idx0=*/0,
8462         /*Idx1=*/0);
8463     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8464         llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
8465         /*Idx0=*/0, /*Idx1=*/0);
8466     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8467         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8468         Info.MapTypesArray,
8469         /*Idx0=*/0,
8470         /*Idx1=*/0);
8471   } else {
8472     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8473     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8474     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
8475     MapTypesArrayArg =
8476         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8477   }
8478 }
8479 
8480 /// Check for inner distribute directive.
8481 static const OMPExecutableDirective *
8482 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8483   const auto *CS = D.getInnermostCapturedStmt();
8484   const auto *Body =
8485       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8486   const Stmt *ChildStmt =
8487       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8488 
8489   if (const auto *NestedDir =
8490           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8491     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8492     switch (D.getDirectiveKind()) {
8493     case OMPD_target:
8494       if (isOpenMPDistributeDirective(DKind))
8495         return NestedDir;
8496       if (DKind == OMPD_teams) {
8497         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8498             /*IgnoreCaptured=*/true);
8499         if (!Body)
8500           return nullptr;
8501         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8502         if (const auto *NND =
8503                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8504           DKind = NND->getDirectiveKind();
8505           if (isOpenMPDistributeDirective(DKind))
8506             return NND;
8507         }
8508       }
8509       return nullptr;
8510     case OMPD_target_teams:
8511       if (isOpenMPDistributeDirective(DKind))
8512         return NestedDir;
8513       return nullptr;
8514     case OMPD_target_parallel:
8515     case OMPD_target_simd:
8516     case OMPD_target_parallel_for:
8517     case OMPD_target_parallel_for_simd:
8518       return nullptr;
8519     case OMPD_target_teams_distribute:
8520     case OMPD_target_teams_distribute_simd:
8521     case OMPD_target_teams_distribute_parallel_for:
8522     case OMPD_target_teams_distribute_parallel_for_simd:
8523     case OMPD_parallel:
8524     case OMPD_for:
8525     case OMPD_parallel_for:
8526     case OMPD_parallel_sections:
8527     case OMPD_for_simd:
8528     case OMPD_parallel_for_simd:
8529     case OMPD_cancel:
8530     case OMPD_cancellation_point:
8531     case OMPD_ordered:
8532     case OMPD_threadprivate:
8533     case OMPD_allocate:
8534     case OMPD_task:
8535     case OMPD_simd:
8536     case OMPD_sections:
8537     case OMPD_section:
8538     case OMPD_single:
8539     case OMPD_master:
8540     case OMPD_critical:
8541     case OMPD_taskyield:
8542     case OMPD_barrier:
8543     case OMPD_taskwait:
8544     case OMPD_taskgroup:
8545     case OMPD_atomic:
8546     case OMPD_flush:
8547     case OMPD_teams:
8548     case OMPD_target_data:
8549     case OMPD_target_exit_data:
8550     case OMPD_target_enter_data:
8551     case OMPD_distribute:
8552     case OMPD_distribute_simd:
8553     case OMPD_distribute_parallel_for:
8554     case OMPD_distribute_parallel_for_simd:
8555     case OMPD_teams_distribute:
8556     case OMPD_teams_distribute_simd:
8557     case OMPD_teams_distribute_parallel_for:
8558     case OMPD_teams_distribute_parallel_for_simd:
8559     case OMPD_target_update:
8560     case OMPD_declare_simd:
8561     case OMPD_declare_target:
8562     case OMPD_end_declare_target:
8563     case OMPD_declare_reduction:
8564     case OMPD_declare_mapper:
8565     case OMPD_taskloop:
8566     case OMPD_taskloop_simd:
8567     case OMPD_requires:
8568     case OMPD_unknown:
8569       llvm_unreachable("Unexpected directive.");
8570     }
8571   }
8572 
8573   return nullptr;
8574 }
8575 
8576 void CGOpenMPRuntime::emitTargetNumIterationsCall(
8577     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
8578     const llvm::function_ref<llvm::Value *(
8579         CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
8580   OpenMPDirectiveKind Kind = D.getDirectiveKind();
8581   const OMPExecutableDirective *TD = &D;
8582   // Get nested teams distribute kind directive, if any.
8583   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
8584     TD = getNestedDistributeDirective(CGM.getContext(), D);
8585   if (!TD)
8586     return;
8587   const auto *LD = cast<OMPLoopDirective>(TD);
8588   auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
8589                                                      PrePostActionTy &) {
8590     llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
8591 
8592     // Emit device ID if any.
8593     llvm::Value *DeviceID;
8594     if (Device)
8595       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8596                                            CGF.Int64Ty, /*isSigned=*/true);
8597     else
8598       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8599 
8600     llvm::Value *Args[] = {DeviceID, NumIterations};
8601     CGF.EmitRuntimeCall(
8602         createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
8603   };
8604   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
8605 }
8606 
8607 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
8608                                      const OMPExecutableDirective &D,
8609                                      llvm::Function *OutlinedFn,
8610                                      llvm::Value *OutlinedFnID,
8611                                      const Expr *IfCond, const Expr *Device) {
8612   if (!CGF.HaveInsertPoint())
8613     return;
8614 
8615   assert(OutlinedFn && "Invalid outlined function!");
8616 
8617   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
8618   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
8619   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
8620   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
8621                                             PrePostActionTy &) {
8622     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8623   };
8624   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
8625 
8626   CodeGenFunction::OMPTargetDataInfo InputInfo;
8627   llvm::Value *MapTypesArray = nullptr;
8628   // Fill up the pointer arrays and transfer execution to the device.
8629   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
8630                     &MapTypesArray, &CS, RequiresOuterTask,
8631                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
8632     // On top of the arrays that were filled up, the target offloading call
8633     // takes as arguments the device id as well as the host pointer. The host
8634     // pointer is used by the runtime library to identify the current target
8635     // region, so it only has to be unique and not necessarily point to
8636     // anything. It could be the pointer to the outlined function that
8637     // implements the target region, but we aren't using that so that the
8638     // compiler doesn't need to keep that, and could therefore inline the host
8639     // function if proven worthwhile during optimization.
8640 
8641     // From this point on, we need to have an ID of the target region defined.
8642     assert(OutlinedFnID && "Invalid outlined function ID!");
8643 
8644     // Emit device ID if any.
8645     llvm::Value *DeviceID;
8646     if (Device) {
8647       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8648                                            CGF.Int64Ty, /*isSigned=*/true);
8649     } else {
8650       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8651     }
8652 
8653     // Emit the number of elements in the offloading arrays.
8654     llvm::Value *PointerNum =
8655         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8656 
8657     // Return value of the runtime offloading call.
8658     llvm::Value *Return;
8659 
8660     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
8661     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
8662 
8663     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8664     // The target region is an outlined function launched by the runtime
8665     // via calls __tgt_target() or __tgt_target_teams().
8666     //
8667     // __tgt_target() launches a target region with one team and one thread,
8668     // executing a serial region.  This master thread may in turn launch
8669     // more threads within its team upon encountering a parallel region,
8670     // however, no additional teams can be launched on the device.
8671     //
8672     // __tgt_target_teams() launches a target region with one or more teams,
8673     // each with one or more threads.  This call is required for target
8674     // constructs such as:
8675     //  'target teams'
8676     //  'target' / 'teams'
8677     //  'target teams distribute parallel for'
8678     //  'target parallel'
8679     // and so on.
8680     //
8681     // Note that on the host and CPU targets, the runtime implementation of
8682     // these calls simply call the outlined function without forking threads.
8683     // The outlined functions themselves have runtime calls to
8684     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
8685     // the compiler in emitTeamsCall() and emitParallelCall().
8686     //
8687     // In contrast, on the NVPTX target, the implementation of
8688     // __tgt_target_teams() launches a GPU kernel with the requested number
8689     // of teams and threads so no additional calls to the runtime are required.
8690     if (NumTeams) {
8691       // If we have NumTeams defined this means that we have an enclosed teams
8692       // region. Therefore we also expect to have NumThreads defined. These two
8693       // values should be defined in the presence of a teams directive,
8694       // regardless of having any clauses associated. If the user is using teams
8695       // but no clauses, these two values will be the default that should be
8696       // passed to the runtime library - a 32-bit integer with the value zero.
8697       assert(NumThreads && "Thread limit expression should be available along "
8698                            "with number of teams.");
8699       llvm::Value *OffloadingArgs[] = {DeviceID,
8700                                        OutlinedFnID,
8701                                        PointerNum,
8702                                        InputInfo.BasePointersArray.getPointer(),
8703                                        InputInfo.PointersArray.getPointer(),
8704                                        InputInfo.SizesArray.getPointer(),
8705                                        MapTypesArray,
8706                                        NumTeams,
8707                                        NumThreads};
8708       Return = CGF.EmitRuntimeCall(
8709           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
8710                                           : OMPRTL__tgt_target_teams),
8711           OffloadingArgs);
8712     } else {
8713       llvm::Value *OffloadingArgs[] = {DeviceID,
8714                                        OutlinedFnID,
8715                                        PointerNum,
8716                                        InputInfo.BasePointersArray.getPointer(),
8717                                        InputInfo.PointersArray.getPointer(),
8718                                        InputInfo.SizesArray.getPointer(),
8719                                        MapTypesArray};
8720       Return = CGF.EmitRuntimeCall(
8721           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
8722                                           : OMPRTL__tgt_target),
8723           OffloadingArgs);
8724     }
8725 
8726     // Check the error code and execute the host version if required.
8727     llvm::BasicBlock *OffloadFailedBlock =
8728         CGF.createBasicBlock("omp_offload.failed");
8729     llvm::BasicBlock *OffloadContBlock =
8730         CGF.createBasicBlock("omp_offload.cont");
8731     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
8732     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
8733 
8734     CGF.EmitBlock(OffloadFailedBlock);
8735     if (RequiresOuterTask) {
8736       CapturedVars.clear();
8737       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8738     }
8739     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8740     CGF.EmitBranch(OffloadContBlock);
8741 
8742     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
8743   };
8744 
8745   // Notify that the host version must be executed.
8746   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
8747                     RequiresOuterTask](CodeGenFunction &CGF,
8748                                        PrePostActionTy &) {
8749     if (RequiresOuterTask) {
8750       CapturedVars.clear();
8751       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8752     }
8753     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8754   };
8755 
8756   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
8757                           &CapturedVars, RequiresOuterTask,
8758                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
8759     // Fill up the arrays with all the captured variables.
8760     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8761     MappableExprsHandler::MapValuesArrayTy Pointers;
8762     MappableExprsHandler::MapValuesArrayTy Sizes;
8763     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8764 
8765     // Get mappable expression information.
8766     MappableExprsHandler MEHandler(D, CGF);
8767     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
8768 
8769     auto RI = CS.getCapturedRecordDecl()->field_begin();
8770     auto CV = CapturedVars.begin();
8771     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
8772                                               CE = CS.capture_end();
8773          CI != CE; ++CI, ++RI, ++CV) {
8774       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
8775       MappableExprsHandler::MapValuesArrayTy CurPointers;
8776       MappableExprsHandler::MapValuesArrayTy CurSizes;
8777       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
8778       MappableExprsHandler::StructRangeInfoTy PartialStruct;
8779 
8780       // VLA sizes are passed to the outlined region by copy and do not have map
8781       // information associated.
8782       if (CI->capturesVariableArrayType()) {
8783         CurBasePointers.push_back(*CV);
8784         CurPointers.push_back(*CV);
8785         CurSizes.push_back(CGF.getTypeSize(RI->getType()));
8786         // Copy to the device as an argument. No need to retrieve it.
8787         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
8788                               MappableExprsHandler::OMP_MAP_TARGET_PARAM);
8789       } else {
8790         // If we have any information in the map clause, we use it, otherwise we
8791         // just do a default mapping.
8792         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
8793                                          CurSizes, CurMapTypes, PartialStruct);
8794         if (CurBasePointers.empty())
8795           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
8796                                            CurPointers, CurSizes, CurMapTypes);
8797         // Generate correct mapping for variables captured by reference in
8798         // lambdas.
8799         if (CI->capturesVariable())
8800           MEHandler.generateInfoForLambdaCaptures(
8801               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
8802               CurMapTypes, LambdaPointers);
8803       }
8804       // We expect to have at least an element of information for this capture.
8805       assert(!CurBasePointers.empty() &&
8806              "Non-existing map pointer for capture!");
8807       assert(CurBasePointers.size() == CurPointers.size() &&
8808              CurBasePointers.size() == CurSizes.size() &&
8809              CurBasePointers.size() == CurMapTypes.size() &&
8810              "Inconsistent map information sizes!");
8811 
8812       // If there is an entry in PartialStruct it means we have a struct with
8813       // individual members mapped. Emit an extra combined entry.
8814       if (PartialStruct.Base.isValid())
8815         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
8816                                     CurMapTypes, PartialStruct);
8817 
8818       // We need to append the results of this capture to what we already have.
8819       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8820       Pointers.append(CurPointers.begin(), CurPointers.end());
8821       Sizes.append(CurSizes.begin(), CurSizes.end());
8822       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
8823     }
8824     // Adjust MEMBER_OF flags for the lambdas captures.
8825     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
8826                                               Pointers, MapTypes);
8827     // Map other list items in the map clause which are not captured variables
8828     // but "declare target link" global variables.
8829     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
8830                                                MapTypes);
8831 
8832     TargetDataInfo Info;
8833     // Fill up the arrays and create the arguments.
8834     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8835     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8836                                  Info.PointersArray, Info.SizesArray,
8837                                  Info.MapTypesArray, Info);
8838     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8839     InputInfo.BasePointersArray =
8840         Address(Info.BasePointersArray, CGM.getPointerAlign());
8841     InputInfo.PointersArray =
8842         Address(Info.PointersArray, CGM.getPointerAlign());
8843     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
8844     MapTypesArray = Info.MapTypesArray;
8845     if (RequiresOuterTask)
8846       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8847     else
8848       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8849   };
8850 
8851   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
8852                              CodeGenFunction &CGF, PrePostActionTy &) {
8853     if (RequiresOuterTask) {
8854       CodeGenFunction::OMPTargetDataInfo InputInfo;
8855       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
8856     } else {
8857       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
8858     }
8859   };
8860 
8861   // If we have a target function ID it means that we need to support
8862   // offloading, otherwise, just execute on the host. We need to execute on host
8863   // regardless of the conditional in the if clause if, e.g., the user do not
8864   // specify target triples.
8865   if (OutlinedFnID) {
8866     if (IfCond) {
8867       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
8868     } else {
8869       RegionCodeGenTy ThenRCG(TargetThenGen);
8870       ThenRCG(CGF);
8871     }
8872   } else {
8873     RegionCodeGenTy ElseRCG(TargetElseGen);
8874     ElseRCG(CGF);
8875   }
8876 }
8877 
8878 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
8879                                                     StringRef ParentName) {
8880   if (!S)
8881     return;
8882 
8883   // Codegen OMP target directives that offload compute to the device.
8884   bool RequiresDeviceCodegen =
8885       isa<OMPExecutableDirective>(S) &&
8886       isOpenMPTargetExecutionDirective(
8887           cast<OMPExecutableDirective>(S)->getDirectiveKind());
8888 
8889   if (RequiresDeviceCodegen) {
8890     const auto &E = *cast<OMPExecutableDirective>(S);
8891     unsigned DeviceID;
8892     unsigned FileID;
8893     unsigned Line;
8894     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
8895                              FileID, Line);
8896 
8897     // Is this a target region that should not be emitted as an entry point? If
8898     // so just signal we are done with this target region.
8899     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
8900                                                             ParentName, Line))
8901       return;
8902 
8903     switch (E.getDirectiveKind()) {
8904     case OMPD_target:
8905       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
8906                                                    cast<OMPTargetDirective>(E));
8907       break;
8908     case OMPD_target_parallel:
8909       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
8910           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
8911       break;
8912     case OMPD_target_teams:
8913       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
8914           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
8915       break;
8916     case OMPD_target_teams_distribute:
8917       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
8918           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
8919       break;
8920     case OMPD_target_teams_distribute_simd:
8921       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
8922           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
8923       break;
8924     case OMPD_target_parallel_for:
8925       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
8926           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
8927       break;
8928     case OMPD_target_parallel_for_simd:
8929       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
8930           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
8931       break;
8932     case OMPD_target_simd:
8933       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
8934           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
8935       break;
8936     case OMPD_target_teams_distribute_parallel_for:
8937       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
8938           CGM, ParentName,
8939           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
8940       break;
8941     case OMPD_target_teams_distribute_parallel_for_simd:
8942       CodeGenFunction::
8943           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
8944               CGM, ParentName,
8945               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
8946       break;
8947     case OMPD_parallel:
8948     case OMPD_for:
8949     case OMPD_parallel_for:
8950     case OMPD_parallel_sections:
8951     case OMPD_for_simd:
8952     case OMPD_parallel_for_simd:
8953     case OMPD_cancel:
8954     case OMPD_cancellation_point:
8955     case OMPD_ordered:
8956     case OMPD_threadprivate:
8957     case OMPD_allocate:
8958     case OMPD_task:
8959     case OMPD_simd:
8960     case OMPD_sections:
8961     case OMPD_section:
8962     case OMPD_single:
8963     case OMPD_master:
8964     case OMPD_critical:
8965     case OMPD_taskyield:
8966     case OMPD_barrier:
8967     case OMPD_taskwait:
8968     case OMPD_taskgroup:
8969     case OMPD_atomic:
8970     case OMPD_flush:
8971     case OMPD_teams:
8972     case OMPD_target_data:
8973     case OMPD_target_exit_data:
8974     case OMPD_target_enter_data:
8975     case OMPD_distribute:
8976     case OMPD_distribute_simd:
8977     case OMPD_distribute_parallel_for:
8978     case OMPD_distribute_parallel_for_simd:
8979     case OMPD_teams_distribute:
8980     case OMPD_teams_distribute_simd:
8981     case OMPD_teams_distribute_parallel_for:
8982     case OMPD_teams_distribute_parallel_for_simd:
8983     case OMPD_target_update:
8984     case OMPD_declare_simd:
8985     case OMPD_declare_target:
8986     case OMPD_end_declare_target:
8987     case OMPD_declare_reduction:
8988     case OMPD_declare_mapper:
8989     case OMPD_taskloop:
8990     case OMPD_taskloop_simd:
8991     case OMPD_requires:
8992     case OMPD_unknown:
8993       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
8994     }
8995     return;
8996   }
8997 
8998   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
8999     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9000       return;
9001 
9002     scanForTargetRegionsFunctions(
9003         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9004     return;
9005   }
9006 
9007   // If this is a lambda function, look into its body.
9008   if (const auto *L = dyn_cast<LambdaExpr>(S))
9009     S = L->getBody();
9010 
9011   // Keep looking for target regions recursively.
9012   for (const Stmt *II : S->children())
9013     scanForTargetRegionsFunctions(II, ParentName);
9014 }
9015 
9016 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9017   // If emitting code for the host, we do not process FD here. Instead we do
9018   // the normal code generation.
9019   if (!CGM.getLangOpts().OpenMPIsDevice)
9020     return false;
9021 
9022   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9023   StringRef Name = CGM.getMangledName(GD);
9024   // Try to detect target regions in the function.
9025   if (const auto *FD = dyn_cast<FunctionDecl>(VD))
9026     scanForTargetRegionsFunctions(FD->getBody(), Name);
9027 
9028   // Do not to emit function if it is not marked as declare target.
9029   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9030          AlreadyEmittedTargetFunctions.count(Name) == 0;
9031 }
9032 
9033 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9034   if (!CGM.getLangOpts().OpenMPIsDevice)
9035     return false;
9036 
9037   // Check if there are Ctors/Dtors in this declaration and look for target
9038   // regions in it. We use the complete variant to produce the kernel name
9039   // mangling.
9040   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9041   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9042     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9043       StringRef ParentName =
9044           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9045       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9046     }
9047     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9048       StringRef ParentName =
9049           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9050       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9051     }
9052   }
9053 
9054   // Do not to emit variable if it is not marked as declare target.
9055   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9056       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9057           cast<VarDecl>(GD.getDecl()));
9058   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) {
9059     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9060     return true;
9061   }
9062   return false;
9063 }
9064 
9065 llvm::Constant *
9066 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9067                                                 const VarDecl *VD) {
9068   assert(VD->getType().isConstant(CGM.getContext()) &&
9069          "Expected constant variable.");
9070   StringRef VarName;
9071   llvm::Constant *Addr;
9072   llvm::GlobalValue::LinkageTypes Linkage;
9073   QualType Ty = VD->getType();
9074   SmallString<128> Buffer;
9075   {
9076     unsigned DeviceID;
9077     unsigned FileID;
9078     unsigned Line;
9079     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9080                              FileID, Line);
9081     llvm::raw_svector_ostream OS(Buffer);
9082     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9083        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9084     VarName = OS.str();
9085   }
9086   Linkage = llvm::GlobalValue::InternalLinkage;
9087   Addr =
9088       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9089                                   getDefaultFirstprivateAddressSpace());
9090   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9091   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9092   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9093   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9094       VarName, Addr, VarSize,
9095       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9096   return Addr;
9097 }
9098 
9099 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9100                                                    llvm::Constant *Addr) {
9101   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9102       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9103   if (!Res) {
9104     if (CGM.getLangOpts().OpenMPIsDevice) {
9105       // Register non-target variables being emitted in device code (debug info
9106       // may cause this).
9107       StringRef VarName = CGM.getMangledName(VD);
9108       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9109     }
9110     return;
9111   }
9112   // Register declare target variables.
9113   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9114   StringRef VarName;
9115   CharUnits VarSize;
9116   llvm::GlobalValue::LinkageTypes Linkage;
9117   switch (*Res) {
9118   case OMPDeclareTargetDeclAttr::MT_To:
9119     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9120     VarName = CGM.getMangledName(VD);
9121     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9122       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9123       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9124     } else {
9125       VarSize = CharUnits::Zero();
9126     }
9127     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9128     // Temp solution to prevent optimizations of the internal variables.
9129     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9130       std::string RefName = getName({VarName, "ref"});
9131       if (!CGM.GetGlobalValue(RefName)) {
9132         llvm::Constant *AddrRef =
9133             getOrCreateInternalVariable(Addr->getType(), RefName);
9134         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9135         GVAddrRef->setConstant(/*Val=*/true);
9136         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9137         GVAddrRef->setInitializer(Addr);
9138         CGM.addCompilerUsedGlobal(GVAddrRef);
9139       }
9140     }
9141     break;
9142   case OMPDeclareTargetDeclAttr::MT_Link:
9143     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9144     if (CGM.getLangOpts().OpenMPIsDevice) {
9145       VarName = Addr->getName();
9146       Addr = nullptr;
9147     } else {
9148       VarName = getAddrOfDeclareTargetLink(VD).getName();
9149       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
9150     }
9151     VarSize = CGM.getPointerSize();
9152     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9153     break;
9154   }
9155   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9156       VarName, Addr, VarSize, Flags, Linkage);
9157 }
9158 
9159 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9160   if (isa<FunctionDecl>(GD.getDecl()) ||
9161       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9162     return emitTargetFunctions(GD);
9163 
9164   return emitTargetGlobalVariable(GD);
9165 }
9166 
9167 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9168   for (const VarDecl *VD : DeferredGlobalVariables) {
9169     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9170         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9171     if (!Res)
9172       continue;
9173     if (*Res == OMPDeclareTargetDeclAttr::MT_To) {
9174       CGM.EmitGlobal(VD);
9175     } else {
9176       assert(*Res == OMPDeclareTargetDeclAttr::MT_Link &&
9177              "Expected to or link clauses.");
9178       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
9179     }
9180   }
9181 }
9182 
9183 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9184     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9185   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9186          " Expected target-based directive.");
9187 }
9188 
9189 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9190                                                        LangAS &AS) {
9191   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9192     return false;
9193   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9194   switch(A->getAllocatorType()) {
9195   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9196   // Not supported, fallback to the default mem space.
9197   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9198   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9199   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9200   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9201   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9202   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9203   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9204     AS = LangAS::Default;
9205     return true;
9206   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9207     llvm_unreachable("Expected predefined allocator for the variables with the "
9208                      "static storage.");
9209   }
9210   return false;
9211 }
9212 
9213 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9214     CodeGenModule &CGM)
9215     : CGM(CGM) {
9216   if (CGM.getLangOpts().OpenMPIsDevice) {
9217     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9218     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9219   }
9220 }
9221 
9222 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9223   if (CGM.getLangOpts().OpenMPIsDevice)
9224     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9225 }
9226 
9227 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9228   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9229     return true;
9230 
9231   StringRef Name = CGM.getMangledName(GD);
9232   const auto *D = cast<FunctionDecl>(GD.getDecl());
9233   // Do not to emit function if it is marked as declare target as it was already
9234   // emitted.
9235   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9236     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9237       if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9238         return !F->isDeclaration();
9239       return false;
9240     }
9241     return true;
9242   }
9243 
9244   return !AlreadyEmittedTargetFunctions.insert(Name).second;
9245 }
9246 
9247 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
9248   // If we have offloading in the current module, we need to emit the entries
9249   // now and register the offloading descriptor.
9250   createOffloadEntriesAndInfoMetadata();
9251 
9252   // Create and register the offloading binary descriptors. This is the main
9253   // entity that captures all the information about offloading in the current
9254   // compilation unit.
9255   return createOffloadingBinaryDescriptorRegistration();
9256 }
9257 
9258 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9259                                     const OMPExecutableDirective &D,
9260                                     SourceLocation Loc,
9261                                     llvm::Function *OutlinedFn,
9262                                     ArrayRef<llvm::Value *> CapturedVars) {
9263   if (!CGF.HaveInsertPoint())
9264     return;
9265 
9266   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9267   CodeGenFunction::RunCleanupsScope Scope(CGF);
9268 
9269   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9270   llvm::Value *Args[] = {
9271       RTLoc,
9272       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9273       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9274   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9275   RealArgs.append(std::begin(Args), std::end(Args));
9276   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9277 
9278   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9279   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9280 }
9281 
9282 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9283                                          const Expr *NumTeams,
9284                                          const Expr *ThreadLimit,
9285                                          SourceLocation Loc) {
9286   if (!CGF.HaveInsertPoint())
9287     return;
9288 
9289   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9290 
9291   llvm::Value *NumTeamsVal =
9292       NumTeams
9293           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9294                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9295           : CGF.Builder.getInt32(0);
9296 
9297   llvm::Value *ThreadLimitVal =
9298       ThreadLimit
9299           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9300                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9301           : CGF.Builder.getInt32(0);
9302 
9303   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9304   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9305                                      ThreadLimitVal};
9306   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9307                       PushNumTeamsArgs);
9308 }
9309 
9310 void CGOpenMPRuntime::emitTargetDataCalls(
9311     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9312     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9313   if (!CGF.HaveInsertPoint())
9314     return;
9315 
9316   // Action used to replace the default codegen action and turn privatization
9317   // off.
9318   PrePostActionTy NoPrivAction;
9319 
9320   // Generate the code for the opening of the data environment. Capture all the
9321   // arguments of the runtime call by reference because they are used in the
9322   // closing of the region.
9323   auto &&BeginThenGen = [this, &D, Device, &Info,
9324                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9325     // Fill up the arrays with all the mapped variables.
9326     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9327     MappableExprsHandler::MapValuesArrayTy Pointers;
9328     MappableExprsHandler::MapValuesArrayTy Sizes;
9329     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9330 
9331     // Get map clause information.
9332     MappableExprsHandler MCHandler(D, CGF);
9333     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9334 
9335     // Fill up the arrays and create the arguments.
9336     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9337 
9338     llvm::Value *BasePointersArrayArg = nullptr;
9339     llvm::Value *PointersArrayArg = nullptr;
9340     llvm::Value *SizesArrayArg = nullptr;
9341     llvm::Value *MapTypesArrayArg = nullptr;
9342     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9343                                  SizesArrayArg, MapTypesArrayArg, Info);
9344 
9345     // Emit device ID if any.
9346     llvm::Value *DeviceID = nullptr;
9347     if (Device) {
9348       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9349                                            CGF.Int64Ty, /*isSigned=*/true);
9350     } else {
9351       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9352     }
9353 
9354     // Emit the number of elements in the offloading arrays.
9355     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9356 
9357     llvm::Value *OffloadingArgs[] = {
9358         DeviceID,         PointerNum,    BasePointersArrayArg,
9359         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9360     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
9361                         OffloadingArgs);
9362 
9363     // If device pointer privatization is required, emit the body of the region
9364     // here. It will have to be duplicated: with and without privatization.
9365     if (!Info.CaptureDeviceAddrMap.empty())
9366       CodeGen(CGF);
9367   };
9368 
9369   // Generate code for the closing of the data region.
9370   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
9371                                             PrePostActionTy &) {
9372     assert(Info.isValid() && "Invalid data environment closing arguments.");
9373 
9374     llvm::Value *BasePointersArrayArg = nullptr;
9375     llvm::Value *PointersArrayArg = nullptr;
9376     llvm::Value *SizesArrayArg = nullptr;
9377     llvm::Value *MapTypesArrayArg = nullptr;
9378     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9379                                  SizesArrayArg, MapTypesArrayArg, Info);
9380 
9381     // Emit device ID if any.
9382     llvm::Value *DeviceID = nullptr;
9383     if (Device) {
9384       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9385                                            CGF.Int64Ty, /*isSigned=*/true);
9386     } else {
9387       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9388     }
9389 
9390     // Emit the number of elements in the offloading arrays.
9391     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9392 
9393     llvm::Value *OffloadingArgs[] = {
9394         DeviceID,         PointerNum,    BasePointersArrayArg,
9395         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9396     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
9397                         OffloadingArgs);
9398   };
9399 
9400   // If we need device pointer privatization, we need to emit the body of the
9401   // region with no privatization in the 'else' branch of the conditional.
9402   // Otherwise, we don't have to do anything.
9403   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
9404                                                          PrePostActionTy &) {
9405     if (!Info.CaptureDeviceAddrMap.empty()) {
9406       CodeGen.setAction(NoPrivAction);
9407       CodeGen(CGF);
9408     }
9409   };
9410 
9411   // We don't have to do anything to close the region if the if clause evaluates
9412   // to false.
9413   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
9414 
9415   if (IfCond) {
9416     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
9417   } else {
9418     RegionCodeGenTy RCG(BeginThenGen);
9419     RCG(CGF);
9420   }
9421 
9422   // If we don't require privatization of device pointers, we emit the body in
9423   // between the runtime calls. This avoids duplicating the body code.
9424   if (Info.CaptureDeviceAddrMap.empty()) {
9425     CodeGen.setAction(NoPrivAction);
9426     CodeGen(CGF);
9427   }
9428 
9429   if (IfCond) {
9430     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
9431   } else {
9432     RegionCodeGenTy RCG(EndThenGen);
9433     RCG(CGF);
9434   }
9435 }
9436 
9437 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
9438     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9439     const Expr *Device) {
9440   if (!CGF.HaveInsertPoint())
9441     return;
9442 
9443   assert((isa<OMPTargetEnterDataDirective>(D) ||
9444           isa<OMPTargetExitDataDirective>(D) ||
9445           isa<OMPTargetUpdateDirective>(D)) &&
9446          "Expecting either target enter, exit data, or update directives.");
9447 
9448   CodeGenFunction::OMPTargetDataInfo InputInfo;
9449   llvm::Value *MapTypesArray = nullptr;
9450   // Generate the code for the opening of the data environment.
9451   auto &&ThenGen = [this, &D, Device, &InputInfo,
9452                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
9453     // Emit device ID if any.
9454     llvm::Value *DeviceID = nullptr;
9455     if (Device) {
9456       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9457                                            CGF.Int64Ty, /*isSigned=*/true);
9458     } else {
9459       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9460     }
9461 
9462     // Emit the number of elements in the offloading arrays.
9463     llvm::Constant *PointerNum =
9464         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9465 
9466     llvm::Value *OffloadingArgs[] = {DeviceID,
9467                                      PointerNum,
9468                                      InputInfo.BasePointersArray.getPointer(),
9469                                      InputInfo.PointersArray.getPointer(),
9470                                      InputInfo.SizesArray.getPointer(),
9471                                      MapTypesArray};
9472 
9473     // Select the right runtime function call for each expected standalone
9474     // directive.
9475     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9476     OpenMPRTLFunction RTLFn;
9477     switch (D.getDirectiveKind()) {
9478     case OMPD_target_enter_data:
9479       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
9480                         : OMPRTL__tgt_target_data_begin;
9481       break;
9482     case OMPD_target_exit_data:
9483       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
9484                         : OMPRTL__tgt_target_data_end;
9485       break;
9486     case OMPD_target_update:
9487       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
9488                         : OMPRTL__tgt_target_data_update;
9489       break;
9490     case OMPD_parallel:
9491     case OMPD_for:
9492     case OMPD_parallel_for:
9493     case OMPD_parallel_sections:
9494     case OMPD_for_simd:
9495     case OMPD_parallel_for_simd:
9496     case OMPD_cancel:
9497     case OMPD_cancellation_point:
9498     case OMPD_ordered:
9499     case OMPD_threadprivate:
9500     case OMPD_allocate:
9501     case OMPD_task:
9502     case OMPD_simd:
9503     case OMPD_sections:
9504     case OMPD_section:
9505     case OMPD_single:
9506     case OMPD_master:
9507     case OMPD_critical:
9508     case OMPD_taskyield:
9509     case OMPD_barrier:
9510     case OMPD_taskwait:
9511     case OMPD_taskgroup:
9512     case OMPD_atomic:
9513     case OMPD_flush:
9514     case OMPD_teams:
9515     case OMPD_target_data:
9516     case OMPD_distribute:
9517     case OMPD_distribute_simd:
9518     case OMPD_distribute_parallel_for:
9519     case OMPD_distribute_parallel_for_simd:
9520     case OMPD_teams_distribute:
9521     case OMPD_teams_distribute_simd:
9522     case OMPD_teams_distribute_parallel_for:
9523     case OMPD_teams_distribute_parallel_for_simd:
9524     case OMPD_declare_simd:
9525     case OMPD_declare_target:
9526     case OMPD_end_declare_target:
9527     case OMPD_declare_reduction:
9528     case OMPD_declare_mapper:
9529     case OMPD_taskloop:
9530     case OMPD_taskloop_simd:
9531     case OMPD_target:
9532     case OMPD_target_simd:
9533     case OMPD_target_teams_distribute:
9534     case OMPD_target_teams_distribute_simd:
9535     case OMPD_target_teams_distribute_parallel_for:
9536     case OMPD_target_teams_distribute_parallel_for_simd:
9537     case OMPD_target_teams:
9538     case OMPD_target_parallel:
9539     case OMPD_target_parallel_for:
9540     case OMPD_target_parallel_for_simd:
9541     case OMPD_requires:
9542     case OMPD_unknown:
9543       llvm_unreachable("Unexpected standalone target data directive.");
9544       break;
9545     }
9546     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
9547   };
9548 
9549   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
9550                              CodeGenFunction &CGF, PrePostActionTy &) {
9551     // Fill up the arrays with all the mapped variables.
9552     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9553     MappableExprsHandler::MapValuesArrayTy Pointers;
9554     MappableExprsHandler::MapValuesArrayTy Sizes;
9555     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9556 
9557     // Get map clause information.
9558     MappableExprsHandler MEHandler(D, CGF);
9559     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9560 
9561     TargetDataInfo Info;
9562     // Fill up the arrays and create the arguments.
9563     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9564     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9565                                  Info.PointersArray, Info.SizesArray,
9566                                  Info.MapTypesArray, Info);
9567     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9568     InputInfo.BasePointersArray =
9569         Address(Info.BasePointersArray, CGM.getPointerAlign());
9570     InputInfo.PointersArray =
9571         Address(Info.PointersArray, CGM.getPointerAlign());
9572     InputInfo.SizesArray =
9573         Address(Info.SizesArray, CGM.getPointerAlign());
9574     MapTypesArray = Info.MapTypesArray;
9575     if (D.hasClausesOfKind<OMPDependClause>())
9576       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9577     else
9578       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9579   };
9580 
9581   if (IfCond) {
9582     emitOMPIfClause(CGF, IfCond, TargetThenGen,
9583                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
9584   } else {
9585     RegionCodeGenTy ThenRCG(TargetThenGen);
9586     ThenRCG(CGF);
9587   }
9588 }
9589 
9590 namespace {
9591   /// Kind of parameter in a function with 'declare simd' directive.
9592   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
9593   /// Attribute set of the parameter.
9594   struct ParamAttrTy {
9595     ParamKindTy Kind = Vector;
9596     llvm::APSInt StrideOrArg;
9597     llvm::APSInt Alignment;
9598   };
9599 } // namespace
9600 
9601 static unsigned evaluateCDTSize(const FunctionDecl *FD,
9602                                 ArrayRef<ParamAttrTy> ParamAttrs) {
9603   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
9604   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
9605   // of that clause. The VLEN value must be power of 2.
9606   // In other case the notion of the function`s "characteristic data type" (CDT)
9607   // is used to compute the vector length.
9608   // CDT is defined in the following order:
9609   //   a) For non-void function, the CDT is the return type.
9610   //   b) If the function has any non-uniform, non-linear parameters, then the
9611   //   CDT is the type of the first such parameter.
9612   //   c) If the CDT determined by a) or b) above is struct, union, or class
9613   //   type which is pass-by-value (except for the type that maps to the
9614   //   built-in complex data type), the characteristic data type is int.
9615   //   d) If none of the above three cases is applicable, the CDT is int.
9616   // The VLEN is then determined based on the CDT and the size of vector
9617   // register of that ISA for which current vector version is generated. The
9618   // VLEN is computed using the formula below:
9619   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
9620   // where vector register size specified in section 3.2.1 Registers and the
9621   // Stack Frame of original AMD64 ABI document.
9622   QualType RetType = FD->getReturnType();
9623   if (RetType.isNull())
9624     return 0;
9625   ASTContext &C = FD->getASTContext();
9626   QualType CDT;
9627   if (!RetType.isNull() && !RetType->isVoidType()) {
9628     CDT = RetType;
9629   } else {
9630     unsigned Offset = 0;
9631     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
9632       if (ParamAttrs[Offset].Kind == Vector)
9633         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
9634       ++Offset;
9635     }
9636     if (CDT.isNull()) {
9637       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9638         if (ParamAttrs[I + Offset].Kind == Vector) {
9639           CDT = FD->getParamDecl(I)->getType();
9640           break;
9641         }
9642       }
9643     }
9644   }
9645   if (CDT.isNull())
9646     CDT = C.IntTy;
9647   CDT = CDT->getCanonicalTypeUnqualified();
9648   if (CDT->isRecordType() || CDT->isUnionType())
9649     CDT = C.IntTy;
9650   return C.getTypeSize(CDT);
9651 }
9652 
9653 static void
9654 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
9655                            const llvm::APSInt &VLENVal,
9656                            ArrayRef<ParamAttrTy> ParamAttrs,
9657                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
9658   struct ISADataTy {
9659     char ISA;
9660     unsigned VecRegSize;
9661   };
9662   ISADataTy ISAData[] = {
9663       {
9664           'b', 128
9665       }, // SSE
9666       {
9667           'c', 256
9668       }, // AVX
9669       {
9670           'd', 256
9671       }, // AVX2
9672       {
9673           'e', 512
9674       }, // AVX512
9675   };
9676   llvm::SmallVector<char, 2> Masked;
9677   switch (State) {
9678   case OMPDeclareSimdDeclAttr::BS_Undefined:
9679     Masked.push_back('N');
9680     Masked.push_back('M');
9681     break;
9682   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
9683     Masked.push_back('N');
9684     break;
9685   case OMPDeclareSimdDeclAttr::BS_Inbranch:
9686     Masked.push_back('M');
9687     break;
9688   }
9689   for (char Mask : Masked) {
9690     for (const ISADataTy &Data : ISAData) {
9691       SmallString<256> Buffer;
9692       llvm::raw_svector_ostream Out(Buffer);
9693       Out << "_ZGV" << Data.ISA << Mask;
9694       if (!VLENVal) {
9695         Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
9696                                          evaluateCDTSize(FD, ParamAttrs));
9697       } else {
9698         Out << VLENVal;
9699       }
9700       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
9701         switch (ParamAttr.Kind){
9702         case LinearWithVarStride:
9703           Out << 's' << ParamAttr.StrideOrArg;
9704           break;
9705         case Linear:
9706           Out << 'l';
9707           if (!!ParamAttr.StrideOrArg)
9708             Out << ParamAttr.StrideOrArg;
9709           break;
9710         case Uniform:
9711           Out << 'u';
9712           break;
9713         case Vector:
9714           Out << 'v';
9715           break;
9716         }
9717         if (!!ParamAttr.Alignment)
9718           Out << 'a' << ParamAttr.Alignment;
9719       }
9720       Out << '_' << Fn->getName();
9721       Fn->addFnAttr(Out.str());
9722     }
9723   }
9724 }
9725 
9726 // This are the Functions that are needed to mangle the name of the
9727 // vector functions generated by the compiler, according to the rules
9728 // defined in the "Vector Function ABI specifications for AArch64",
9729 // available at
9730 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
9731 
9732 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
9733 ///
9734 /// TODO: Need to implement the behavior for reference marked with a
9735 /// var or no linear modifiers (1.b in the section). For this, we
9736 /// need to extend ParamKindTy to support the linear modifiers.
9737 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
9738   QT = QT.getCanonicalType();
9739 
9740   if (QT->isVoidType())
9741     return false;
9742 
9743   if (Kind == ParamKindTy::Uniform)
9744     return false;
9745 
9746   if (Kind == ParamKindTy::Linear)
9747     return false;
9748 
9749   // TODO: Handle linear references with modifiers
9750 
9751   if (Kind == ParamKindTy::LinearWithVarStride)
9752     return false;
9753 
9754   return true;
9755 }
9756 
9757 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
9758 static bool getAArch64PBV(QualType QT, ASTContext &C) {
9759   QT = QT.getCanonicalType();
9760   unsigned Size = C.getTypeSize(QT);
9761 
9762   // Only scalars and complex within 16 bytes wide set PVB to true.
9763   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
9764     return false;
9765 
9766   if (QT->isFloatingType())
9767     return true;
9768 
9769   if (QT->isIntegerType())
9770     return true;
9771 
9772   if (QT->isPointerType())
9773     return true;
9774 
9775   // TODO: Add support for complex types (section 3.1.2, item 2).
9776 
9777   return false;
9778 }
9779 
9780 /// Computes the lane size (LS) of a return type or of an input parameter,
9781 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
9782 /// TODO: Add support for references, section 3.2.1, item 1.
9783 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
9784   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
9785     QualType PTy = QT.getCanonicalType()->getPointeeType();
9786     if (getAArch64PBV(PTy, C))
9787       return C.getTypeSize(PTy);
9788   }
9789   if (getAArch64PBV(QT, C))
9790     return C.getTypeSize(QT);
9791 
9792   return C.getTypeSize(C.getUIntPtrType());
9793 }
9794 
9795 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
9796 // signature of the scalar function, as defined in 3.2.2 of the
9797 // AAVFABI.
9798 static std::tuple<unsigned, unsigned, bool>
9799 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
9800   QualType RetType = FD->getReturnType().getCanonicalType();
9801 
9802   ASTContext &C = FD->getASTContext();
9803 
9804   bool OutputBecomesInput = false;
9805 
9806   llvm::SmallVector<unsigned, 8> Sizes;
9807   if (!RetType->isVoidType()) {
9808     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
9809     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
9810       OutputBecomesInput = true;
9811   }
9812   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9813     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
9814     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
9815   }
9816 
9817   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
9818   // The LS of a function parameter / return value can only be a power
9819   // of 2, starting from 8 bits, up to 128.
9820   assert(std::all_of(Sizes.begin(), Sizes.end(),
9821                      [](unsigned Size) {
9822                        return Size == 8 || Size == 16 || Size == 32 ||
9823                               Size == 64 || Size == 128;
9824                      }) &&
9825          "Invalid size");
9826 
9827   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
9828                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
9829                          OutputBecomesInput);
9830 }
9831 
9832 /// Mangle the parameter part of the vector function name according to
9833 /// their OpenMP classification. The mangling function is defined in
9834 /// section 3.5 of the AAVFABI.
9835 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
9836   SmallString<256> Buffer;
9837   llvm::raw_svector_ostream Out(Buffer);
9838   for (const auto &ParamAttr : ParamAttrs) {
9839     switch (ParamAttr.Kind) {
9840     case LinearWithVarStride:
9841       Out << "ls" << ParamAttr.StrideOrArg;
9842       break;
9843     case Linear:
9844       Out << 'l';
9845       // Don't print the step value if it is not present or if it is
9846       // equal to 1.
9847       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
9848         Out << ParamAttr.StrideOrArg;
9849       break;
9850     case Uniform:
9851       Out << 'u';
9852       break;
9853     case Vector:
9854       Out << 'v';
9855       break;
9856     }
9857 
9858     if (!!ParamAttr.Alignment)
9859       Out << 'a' << ParamAttr.Alignment;
9860   }
9861 
9862   return Out.str();
9863 }
9864 
9865 // Function used to add the attribute. The parameter `VLEN` is
9866 // templated to allow the use of "x" when targeting scalable functions
9867 // for SVE.
9868 template <typename T>
9869 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
9870                                  char ISA, StringRef ParSeq,
9871                                  StringRef MangledName, bool OutputBecomesInput,
9872                                  llvm::Function *Fn) {
9873   SmallString<256> Buffer;
9874   llvm::raw_svector_ostream Out(Buffer);
9875   Out << Prefix << ISA << LMask << VLEN;
9876   if (OutputBecomesInput)
9877     Out << "v";
9878   Out << ParSeq << "_" << MangledName;
9879   Fn->addFnAttr(Out.str());
9880 }
9881 
9882 // Helper function to generate the Advanced SIMD names depending on
9883 // the value of the NDS when simdlen is not present.
9884 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
9885                                       StringRef Prefix, char ISA,
9886                                       StringRef ParSeq, StringRef MangledName,
9887                                       bool OutputBecomesInput,
9888                                       llvm::Function *Fn) {
9889   switch (NDS) {
9890   case 8:
9891     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
9892                          OutputBecomesInput, Fn);
9893     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
9894                          OutputBecomesInput, Fn);
9895     break;
9896   case 16:
9897     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
9898                          OutputBecomesInput, Fn);
9899     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
9900                          OutputBecomesInput, Fn);
9901     break;
9902   case 32:
9903     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
9904                          OutputBecomesInput, Fn);
9905     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
9906                          OutputBecomesInput, Fn);
9907     break;
9908   case 64:
9909   case 128:
9910     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
9911                          OutputBecomesInput, Fn);
9912     break;
9913   default:
9914     llvm_unreachable("Scalar type is too wide.");
9915   }
9916 }
9917 
9918 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
9919 static void emitAArch64DeclareSimdFunction(
9920     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
9921     ArrayRef<ParamAttrTy> ParamAttrs,
9922     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
9923     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
9924 
9925   // Get basic data for building the vector signature.
9926   const auto Data = getNDSWDS(FD, ParamAttrs);
9927   const unsigned NDS = std::get<0>(Data);
9928   const unsigned WDS = std::get<1>(Data);
9929   const bool OutputBecomesInput = std::get<2>(Data);
9930 
9931   // Check the values provided via `simdlen` by the user.
9932   // 1. A `simdlen(1)` doesn't produce vector signatures,
9933   if (UserVLEN == 1) {
9934     unsigned DiagID = CGM.getDiags().getCustomDiagID(
9935         DiagnosticsEngine::Warning,
9936         "The clause simdlen(1) has no effect when targeting aarch64.");
9937     CGM.getDiags().Report(SLoc, DiagID);
9938     return;
9939   }
9940 
9941   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
9942   // Advanced SIMD output.
9943   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
9944     unsigned DiagID = CGM.getDiags().getCustomDiagID(
9945         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
9946                                     "power of 2 when targeting Advanced SIMD.");
9947     CGM.getDiags().Report(SLoc, DiagID);
9948     return;
9949   }
9950 
9951   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
9952   // limits.
9953   if (ISA == 's' && UserVLEN != 0) {
9954     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
9955       unsigned DiagID = CGM.getDiags().getCustomDiagID(
9956           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
9957                                       "lanes in the architectural constraints "
9958                                       "for SVE (min is 128-bit, max is "
9959                                       "2048-bit, by steps of 128-bit)");
9960       CGM.getDiags().Report(SLoc, DiagID) << WDS;
9961       return;
9962     }
9963   }
9964 
9965   // Sort out parameter sequence.
9966   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
9967   StringRef Prefix = "_ZGV";
9968   // Generate simdlen from user input (if any).
9969   if (UserVLEN) {
9970     if (ISA == 's') {
9971       // SVE generates only a masked function.
9972       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
9973                            OutputBecomesInput, Fn);
9974     } else {
9975       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
9976       // Advanced SIMD generates one or two functions, depending on
9977       // the `[not]inbranch` clause.
9978       switch (State) {
9979       case OMPDeclareSimdDeclAttr::BS_Undefined:
9980         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
9981                              OutputBecomesInput, Fn);
9982         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
9983                              OutputBecomesInput, Fn);
9984         break;
9985       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
9986         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
9987                              OutputBecomesInput, Fn);
9988         break;
9989       case OMPDeclareSimdDeclAttr::BS_Inbranch:
9990         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
9991                              OutputBecomesInput, Fn);
9992         break;
9993       }
9994     }
9995   } else {
9996     // If no user simdlen is provided, follow the AAVFABI rules for
9997     // generating the vector length.
9998     if (ISA == 's') {
9999       // SVE, section 3.4.1, item 1.
10000       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10001                            OutputBecomesInput, Fn);
10002     } else {
10003       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10004       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10005       // two vector names depending on the use of the clause
10006       // `[not]inbranch`.
10007       switch (State) {
10008       case OMPDeclareSimdDeclAttr::BS_Undefined:
10009         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10010                                   OutputBecomesInput, Fn);
10011         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10012                                   OutputBecomesInput, Fn);
10013         break;
10014       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10015         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10016                                   OutputBecomesInput, Fn);
10017         break;
10018       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10019         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10020                                   OutputBecomesInput, Fn);
10021         break;
10022       }
10023     }
10024   }
10025 }
10026 
10027 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10028                                               llvm::Function *Fn) {
10029   ASTContext &C = CGM.getContext();
10030   FD = FD->getMostRecentDecl();
10031   // Map params to their positions in function decl.
10032   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10033   if (isa<CXXMethodDecl>(FD))
10034     ParamPositions.try_emplace(FD, 0);
10035   unsigned ParamPos = ParamPositions.size();
10036   for (const ParmVarDecl *P : FD->parameters()) {
10037     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10038     ++ParamPos;
10039   }
10040   while (FD) {
10041     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10042       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10043       // Mark uniform parameters.
10044       for (const Expr *E : Attr->uniforms()) {
10045         E = E->IgnoreParenImpCasts();
10046         unsigned Pos;
10047         if (isa<CXXThisExpr>(E)) {
10048           Pos = ParamPositions[FD];
10049         } else {
10050           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10051                                 ->getCanonicalDecl();
10052           Pos = ParamPositions[PVD];
10053         }
10054         ParamAttrs[Pos].Kind = Uniform;
10055       }
10056       // Get alignment info.
10057       auto NI = Attr->alignments_begin();
10058       for (const Expr *E : Attr->aligneds()) {
10059         E = E->IgnoreParenImpCasts();
10060         unsigned Pos;
10061         QualType ParmTy;
10062         if (isa<CXXThisExpr>(E)) {
10063           Pos = ParamPositions[FD];
10064           ParmTy = E->getType();
10065         } else {
10066           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10067                                 ->getCanonicalDecl();
10068           Pos = ParamPositions[PVD];
10069           ParmTy = PVD->getType();
10070         }
10071         ParamAttrs[Pos].Alignment =
10072             (*NI)
10073                 ? (*NI)->EvaluateKnownConstInt(C)
10074                 : llvm::APSInt::getUnsigned(
10075                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10076                           .getQuantity());
10077         ++NI;
10078       }
10079       // Mark linear parameters.
10080       auto SI = Attr->steps_begin();
10081       auto MI = Attr->modifiers_begin();
10082       for (const Expr *E : Attr->linears()) {
10083         E = E->IgnoreParenImpCasts();
10084         unsigned Pos;
10085         if (isa<CXXThisExpr>(E)) {
10086           Pos = ParamPositions[FD];
10087         } else {
10088           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10089                                 ->getCanonicalDecl();
10090           Pos = ParamPositions[PVD];
10091         }
10092         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10093         ParamAttr.Kind = Linear;
10094         if (*SI) {
10095           Expr::EvalResult Result;
10096           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10097             if (const auto *DRE =
10098                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10099               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10100                 ParamAttr.Kind = LinearWithVarStride;
10101                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10102                     ParamPositions[StridePVD->getCanonicalDecl()]);
10103               }
10104             }
10105           } else {
10106             ParamAttr.StrideOrArg = Result.Val.getInt();
10107           }
10108         }
10109         ++SI;
10110         ++MI;
10111       }
10112       llvm::APSInt VLENVal;
10113       SourceLocation ExprLoc;
10114       const Expr *VLENExpr = Attr->getSimdlen();
10115       if (VLENExpr) {
10116         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10117         ExprLoc = VLENExpr->getExprLoc();
10118       }
10119       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10120       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10121           CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10122         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10123       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10124         unsigned VLEN = VLENVal.getExtValue();
10125         StringRef MangledName = Fn->getName();
10126         if (CGM.getTarget().hasFeature("sve"))
10127           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10128                                          MangledName, 's', 128, Fn, ExprLoc);
10129         if (CGM.getTarget().hasFeature("neon"))
10130           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10131                                          MangledName, 'n', 128, Fn, ExprLoc);
10132       }
10133     }
10134     FD = FD->getPreviousDecl();
10135   }
10136 }
10137 
10138 namespace {
10139 /// Cleanup action for doacross support.
10140 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10141 public:
10142   static const int DoacrossFinArgs = 2;
10143 
10144 private:
10145   llvm::FunctionCallee RTLFn;
10146   llvm::Value *Args[DoacrossFinArgs];
10147 
10148 public:
10149   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10150                     ArrayRef<llvm::Value *> CallArgs)
10151       : RTLFn(RTLFn) {
10152     assert(CallArgs.size() == DoacrossFinArgs);
10153     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10154   }
10155   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10156     if (!CGF.HaveInsertPoint())
10157       return;
10158     CGF.EmitRuntimeCall(RTLFn, Args);
10159   }
10160 };
10161 } // namespace
10162 
10163 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10164                                        const OMPLoopDirective &D,
10165                                        ArrayRef<Expr *> NumIterations) {
10166   if (!CGF.HaveInsertPoint())
10167     return;
10168 
10169   ASTContext &C = CGM.getContext();
10170   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10171   RecordDecl *RD;
10172   if (KmpDimTy.isNull()) {
10173     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10174     //  kmp_int64 lo; // lower
10175     //  kmp_int64 up; // upper
10176     //  kmp_int64 st; // stride
10177     // };
10178     RD = C.buildImplicitRecord("kmp_dim");
10179     RD->startDefinition();
10180     addFieldToRecordDecl(C, RD, Int64Ty);
10181     addFieldToRecordDecl(C, RD, Int64Ty);
10182     addFieldToRecordDecl(C, RD, Int64Ty);
10183     RD->completeDefinition();
10184     KmpDimTy = C.getRecordType(RD);
10185   } else {
10186     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10187   }
10188   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10189   QualType ArrayTy =
10190       C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
10191 
10192   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10193   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10194   enum { LowerFD = 0, UpperFD, StrideFD };
10195   // Fill dims with data.
10196   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10197     LValue DimsLVal = CGF.MakeAddrLValue(
10198         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10199     // dims.upper = num_iterations;
10200     LValue UpperLVal = CGF.EmitLValueForField(
10201         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10202     llvm::Value *NumIterVal =
10203         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10204                                  D.getNumIterations()->getType(), Int64Ty,
10205                                  D.getNumIterations()->getExprLoc());
10206     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10207     // dims.stride = 1;
10208     LValue StrideLVal = CGF.EmitLValueForField(
10209         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10210     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10211                           StrideLVal);
10212   }
10213 
10214   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10215   // kmp_int32 num_dims, struct kmp_dim * dims);
10216   llvm::Value *Args[] = {
10217       emitUpdateLocation(CGF, D.getBeginLoc()),
10218       getThreadID(CGF, D.getBeginLoc()),
10219       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10220       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10221           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10222           CGM.VoidPtrTy)};
10223 
10224   llvm::FunctionCallee RTLFn =
10225       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10226   CGF.EmitRuntimeCall(RTLFn, Args);
10227   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10228       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10229   llvm::FunctionCallee FiniRTLFn =
10230       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10231   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10232                                              llvm::makeArrayRef(FiniArgs));
10233 }
10234 
10235 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10236                                           const OMPDependClause *C) {
10237   QualType Int64Ty =
10238       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10239   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10240   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10241       Int64Ty, Size, ArrayType::Normal, 0);
10242   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10243   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10244     const Expr *CounterVal = C->getLoopData(I);
10245     assert(CounterVal);
10246     llvm::Value *CntVal = CGF.EmitScalarConversion(
10247         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10248         CounterVal->getExprLoc());
10249     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10250                           /*Volatile=*/false, Int64Ty);
10251   }
10252   llvm::Value *Args[] = {
10253       emitUpdateLocation(CGF, C->getBeginLoc()),
10254       getThreadID(CGF, C->getBeginLoc()),
10255       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10256   llvm::FunctionCallee RTLFn;
10257   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10258     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10259   } else {
10260     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10261     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10262   }
10263   CGF.EmitRuntimeCall(RTLFn, Args);
10264 }
10265 
10266 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10267                                llvm::FunctionCallee Callee,
10268                                ArrayRef<llvm::Value *> Args) const {
10269   assert(Loc.isValid() && "Outlined function call location must be valid.");
10270   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10271 
10272   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10273     if (Fn->doesNotThrow()) {
10274       CGF.EmitNounwindRuntimeCall(Fn, Args);
10275       return;
10276     }
10277   }
10278   CGF.EmitRuntimeCall(Callee, Args);
10279 }
10280 
10281 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10282     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10283     ArrayRef<llvm::Value *> Args) const {
10284   emitCall(CGF, Loc, OutlinedFn, Args);
10285 }
10286 
10287 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10288                                              const VarDecl *NativeParam,
10289                                              const VarDecl *TargetParam) const {
10290   return CGF.GetAddrOfLocalVar(NativeParam);
10291 }
10292 
10293 namespace {
10294 /// Cleanup action for allocate support.
10295 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10296 public:
10297   static const int CleanupArgs = 3;
10298 
10299 private:
10300   llvm::FunctionCallee RTLFn;
10301   llvm::Value *Args[CleanupArgs];
10302 
10303 public:
10304   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10305                        ArrayRef<llvm::Value *> CallArgs)
10306       : RTLFn(RTLFn) {
10307     assert(CallArgs.size() == CleanupArgs &&
10308            "Size of arguments does not match.");
10309     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10310   }
10311   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10312     if (!CGF.HaveInsertPoint())
10313       return;
10314     CGF.EmitRuntimeCall(RTLFn, Args);
10315   }
10316 };
10317 } // namespace
10318 
10319 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10320                                                    const VarDecl *VD) {
10321   if (!VD)
10322     return Address::invalid();
10323   const VarDecl *CVD = VD->getCanonicalDecl();
10324   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10325     return Address::invalid();
10326   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10327   // Use the default allocation.
10328   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10329       !AA->getAllocator())
10330     return Address::invalid();
10331   llvm::Value *Size;
10332   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10333   if (CVD->getType()->isVariablyModifiedType()) {
10334     Size = CGF.getTypeSize(CVD->getType());
10335     // Align the size: ((size + align - 1) / align) * align
10336     Size = CGF.Builder.CreateNUWAdd(
10337         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10338     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10339     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10340   } else {
10341     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10342     Size = CGM.getSize(Sz.alignTo(Align));
10343   }
10344   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10345   assert(AA->getAllocator() &&
10346          "Expected allocator expression for non-default allocator.");
10347   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10348   // According to the standard, the original allocator type is a enum (integer).
10349   // Convert to pointer type, if required.
10350   if (Allocator->getType()->isIntegerTy())
10351     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10352   else if (Allocator->getType()->isPointerTy())
10353     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10354                                                                 CGM.VoidPtrTy);
10355   llvm::Value *Args[] = {ThreadID, Size, Allocator};
10356 
10357   llvm::Value *Addr =
10358       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
10359                           CVD->getName() + ".void.addr");
10360   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
10361                                                               Allocator};
10362   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
10363 
10364   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10365                                                 llvm::makeArrayRef(FiniArgs));
10366   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10367       Addr,
10368       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
10369       CVD->getName() + ".addr");
10370   return Address(Addr, Align);
10371 }
10372 
10373 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
10374     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10375     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10376   llvm_unreachable("Not supported in SIMD-only mode");
10377 }
10378 
10379 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
10380     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10381     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10382   llvm_unreachable("Not supported in SIMD-only mode");
10383 }
10384 
10385 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
10386     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10387     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
10388     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
10389     bool Tied, unsigned &NumberOfParts) {
10390   llvm_unreachable("Not supported in SIMD-only mode");
10391 }
10392 
10393 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
10394                                            SourceLocation Loc,
10395                                            llvm::Function *OutlinedFn,
10396                                            ArrayRef<llvm::Value *> CapturedVars,
10397                                            const Expr *IfCond) {
10398   llvm_unreachable("Not supported in SIMD-only mode");
10399 }
10400 
10401 void CGOpenMPSIMDRuntime::emitCriticalRegion(
10402     CodeGenFunction &CGF, StringRef CriticalName,
10403     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
10404     const Expr *Hint) {
10405   llvm_unreachable("Not supported in SIMD-only mode");
10406 }
10407 
10408 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
10409                                            const RegionCodeGenTy &MasterOpGen,
10410                                            SourceLocation Loc) {
10411   llvm_unreachable("Not supported in SIMD-only mode");
10412 }
10413 
10414 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
10415                                             SourceLocation Loc) {
10416   llvm_unreachable("Not supported in SIMD-only mode");
10417 }
10418 
10419 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
10420     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
10421     SourceLocation Loc) {
10422   llvm_unreachable("Not supported in SIMD-only mode");
10423 }
10424 
10425 void CGOpenMPSIMDRuntime::emitSingleRegion(
10426     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
10427     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
10428     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
10429     ArrayRef<const Expr *> AssignmentOps) {
10430   llvm_unreachable("Not supported in SIMD-only mode");
10431 }
10432 
10433 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
10434                                             const RegionCodeGenTy &OrderedOpGen,
10435                                             SourceLocation Loc,
10436                                             bool IsThreads) {
10437   llvm_unreachable("Not supported in SIMD-only mode");
10438 }
10439 
10440 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
10441                                           SourceLocation Loc,
10442                                           OpenMPDirectiveKind Kind,
10443                                           bool EmitChecks,
10444                                           bool ForceSimpleCall) {
10445   llvm_unreachable("Not supported in SIMD-only mode");
10446 }
10447 
10448 void CGOpenMPSIMDRuntime::emitForDispatchInit(
10449     CodeGenFunction &CGF, SourceLocation Loc,
10450     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
10451     bool Ordered, const DispatchRTInput &DispatchValues) {
10452   llvm_unreachable("Not supported in SIMD-only mode");
10453 }
10454 
10455 void CGOpenMPSIMDRuntime::emitForStaticInit(
10456     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
10457     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
10458   llvm_unreachable("Not supported in SIMD-only mode");
10459 }
10460 
10461 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
10462     CodeGenFunction &CGF, SourceLocation Loc,
10463     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
10464   llvm_unreachable("Not supported in SIMD-only mode");
10465 }
10466 
10467 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
10468                                                      SourceLocation Loc,
10469                                                      unsigned IVSize,
10470                                                      bool IVSigned) {
10471   llvm_unreachable("Not supported in SIMD-only mode");
10472 }
10473 
10474 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
10475                                               SourceLocation Loc,
10476                                               OpenMPDirectiveKind DKind) {
10477   llvm_unreachable("Not supported in SIMD-only mode");
10478 }
10479 
10480 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
10481                                               SourceLocation Loc,
10482                                               unsigned IVSize, bool IVSigned,
10483                                               Address IL, Address LB,
10484                                               Address UB, Address ST) {
10485   llvm_unreachable("Not supported in SIMD-only mode");
10486 }
10487 
10488 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
10489                                                llvm::Value *NumThreads,
10490                                                SourceLocation Loc) {
10491   llvm_unreachable("Not supported in SIMD-only mode");
10492 }
10493 
10494 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
10495                                              OpenMPProcBindClauseKind ProcBind,
10496                                              SourceLocation Loc) {
10497   llvm_unreachable("Not supported in SIMD-only mode");
10498 }
10499 
10500 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
10501                                                     const VarDecl *VD,
10502                                                     Address VDAddr,
10503                                                     SourceLocation Loc) {
10504   llvm_unreachable("Not supported in SIMD-only mode");
10505 }
10506 
10507 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
10508     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
10509     CodeGenFunction *CGF) {
10510   llvm_unreachable("Not supported in SIMD-only mode");
10511 }
10512 
10513 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
10514     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
10515   llvm_unreachable("Not supported in SIMD-only mode");
10516 }
10517 
10518 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
10519                                     ArrayRef<const Expr *> Vars,
10520                                     SourceLocation Loc) {
10521   llvm_unreachable("Not supported in SIMD-only mode");
10522 }
10523 
10524 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
10525                                        const OMPExecutableDirective &D,
10526                                        llvm::Function *TaskFunction,
10527                                        QualType SharedsTy, Address Shareds,
10528                                        const Expr *IfCond,
10529                                        const OMPTaskDataTy &Data) {
10530   llvm_unreachable("Not supported in SIMD-only mode");
10531 }
10532 
10533 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
10534     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
10535     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
10536     const Expr *IfCond, const OMPTaskDataTy &Data) {
10537   llvm_unreachable("Not supported in SIMD-only mode");
10538 }
10539 
10540 void CGOpenMPSIMDRuntime::emitReduction(
10541     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
10542     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
10543     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
10544   assert(Options.SimpleReduction && "Only simple reduction is expected.");
10545   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
10546                                  ReductionOps, Options);
10547 }
10548 
10549 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
10550     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
10551     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
10552   llvm_unreachable("Not supported in SIMD-only mode");
10553 }
10554 
10555 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
10556                                                   SourceLocation Loc,
10557                                                   ReductionCodeGen &RCG,
10558                                                   unsigned N) {
10559   llvm_unreachable("Not supported in SIMD-only mode");
10560 }
10561 
10562 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
10563                                                   SourceLocation Loc,
10564                                                   llvm::Value *ReductionsPtr,
10565                                                   LValue SharedLVal) {
10566   llvm_unreachable("Not supported in SIMD-only mode");
10567 }
10568 
10569 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
10570                                            SourceLocation Loc) {
10571   llvm_unreachable("Not supported in SIMD-only mode");
10572 }
10573 
10574 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
10575     CodeGenFunction &CGF, SourceLocation Loc,
10576     OpenMPDirectiveKind CancelRegion) {
10577   llvm_unreachable("Not supported in SIMD-only mode");
10578 }
10579 
10580 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
10581                                          SourceLocation Loc, const Expr *IfCond,
10582                                          OpenMPDirectiveKind CancelRegion) {
10583   llvm_unreachable("Not supported in SIMD-only mode");
10584 }
10585 
10586 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
10587     const OMPExecutableDirective &D, StringRef ParentName,
10588     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
10589     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
10590   llvm_unreachable("Not supported in SIMD-only mode");
10591 }
10592 
10593 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
10594                                          const OMPExecutableDirective &D,
10595                                          llvm::Function *OutlinedFn,
10596                                          llvm::Value *OutlinedFnID,
10597                                          const Expr *IfCond,
10598                                          const Expr *Device) {
10599   llvm_unreachable("Not supported in SIMD-only mode");
10600 }
10601 
10602 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
10603   llvm_unreachable("Not supported in SIMD-only mode");
10604 }
10605 
10606 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10607   llvm_unreachable("Not supported in SIMD-only mode");
10608 }
10609 
10610 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
10611   return false;
10612 }
10613 
10614 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
10615   return nullptr;
10616 }
10617 
10618 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
10619                                         const OMPExecutableDirective &D,
10620                                         SourceLocation Loc,
10621                                         llvm::Function *OutlinedFn,
10622                                         ArrayRef<llvm::Value *> CapturedVars) {
10623   llvm_unreachable("Not supported in SIMD-only mode");
10624 }
10625 
10626 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10627                                              const Expr *NumTeams,
10628                                              const Expr *ThreadLimit,
10629                                              SourceLocation Loc) {
10630   llvm_unreachable("Not supported in SIMD-only mode");
10631 }
10632 
10633 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
10634     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10635     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10636   llvm_unreachable("Not supported in SIMD-only mode");
10637 }
10638 
10639 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
10640     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10641     const Expr *Device) {
10642   llvm_unreachable("Not supported in SIMD-only mode");
10643 }
10644 
10645 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10646                                            const OMPLoopDirective &D,
10647                                            ArrayRef<Expr *> NumIterations) {
10648   llvm_unreachable("Not supported in SIMD-only mode");
10649 }
10650 
10651 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10652                                               const OMPDependClause *C) {
10653   llvm_unreachable("Not supported in SIMD-only mode");
10654 }
10655 
10656 const VarDecl *
10657 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
10658                                         const VarDecl *NativeParam) const {
10659   llvm_unreachable("Not supported in SIMD-only mode");
10660 }
10661 
10662 Address
10663 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
10664                                          const VarDecl *NativeParam,
10665                                          const VarDecl *TargetParam) const {
10666   llvm_unreachable("Not supported in SIMD-only mode");
10667 }
10668