1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
45 
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
49 
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54   /// Kinds of OpenMP regions used in codegen.
55   enum CGOpenMPRegionKind {
56     /// Region with outlined function for standalone 'parallel'
57     /// directive.
58     ParallelOutlinedRegion,
59     /// Region with outlined function for standalone 'task' directive.
60     TaskOutlinedRegion,
61     /// Region for constructs that do not require function outlining,
62     /// like 'for', 'sections', 'atomic' etc. directives.
63     InlinedRegion,
64     /// Region with outlined function for standalone 'target' directive.
65     TargetRegion,
66   };
67 
CGOpenMPRegionInfo(const CapturedStmt & CS,const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)68   CGOpenMPRegionInfo(const CapturedStmt &CS,
69                      const CGOpenMPRegionKind RegionKind,
70                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71                      bool HasCancel)
72       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74 
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)75   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77                      bool HasCancel)
78       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79         Kind(Kind), HasCancel(HasCancel) {}
80 
81   /// Get a variable or parameter for storing global thread id
82   /// inside OpenMP construct.
83   virtual const VarDecl *getThreadIDVariable() const = 0;
84 
85   /// Emit the captured statement body.
86   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87 
88   /// Get an LValue for the current ThreadID variable.
89   /// \return LValue for thread id variable. This LValue always has type int32*.
90   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91 
emitUntiedSwitch(CodeGenFunction &)92   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93 
getRegionKind() const94   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95 
getDirectiveKind() const96   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97 
hasCancel() const98   bool hasCancel() const { return HasCancel; }
99 
classof(const CGCapturedStmtInfo * Info)100   static bool classof(const CGCapturedStmtInfo *Info) {
101     return Info->getKind() == CR_OpenMP;
102   }
103 
104   ~CGOpenMPRegionInfo() override = default;
105 
106 protected:
107   CGOpenMPRegionKind RegionKind;
108   RegionCodeGenTy CodeGen;
109   OpenMPDirectiveKind Kind;
110   bool HasCancel;
111 };
112 
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,StringRef HelperName)116   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117                              const RegionCodeGenTy &CodeGen,
118                              OpenMPDirectiveKind Kind, bool HasCancel,
119                              StringRef HelperName)
120       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121                            HasCancel),
122         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124   }
125 
126   /// Get a variable or parameter for storing global thread id
127   /// inside OpenMP construct.
getThreadIDVariable() const128   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129 
130   /// Get the name of the capture helper.
getHelperName() const131   StringRef getHelperName() const override { return HelperName; }
132 
classof(const CGCapturedStmtInfo * Info)133   static bool classof(const CGCapturedStmtInfo *Info) {
134     return CGOpenMPRegionInfo::classof(Info) &&
135            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136                ParallelOutlinedRegion;
137   }
138 
139 private:
140   /// A variable or parameter storing global thread id for OpenMP
141   /// constructs.
142   const VarDecl *ThreadIDVar;
143   StringRef HelperName;
144 };
145 
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149   class UntiedTaskActionTy final : public PrePostActionTy {
150     bool Untied;
151     const VarDecl *PartIDVar;
152     const RegionCodeGenTy UntiedCodeGen;
153     llvm::SwitchInst *UntiedSwitch = nullptr;
154 
155   public:
UntiedTaskActionTy(bool Tied,const VarDecl * PartIDVar,const RegionCodeGenTy & UntiedCodeGen)156     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157                        const RegionCodeGenTy &UntiedCodeGen)
158         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
Enter(CodeGenFunction & CGF)159     void Enter(CodeGenFunction &CGF) override {
160       if (Untied) {
161         // Emit task switching point.
162         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163             CGF.GetAddrOfLocalVar(PartIDVar),
164             PartIDVar->getType()->castAs<PointerType>());
165         llvm::Value *Res =
166             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169         CGF.EmitBlock(DoneBB);
170         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173                               CGF.Builder.GetInsertBlock());
174         emitUntiedSwitch(CGF);
175       }
176     }
emitUntiedSwitch(CodeGenFunction & CGF) const177     void emitUntiedSwitch(CodeGenFunction &CGF) const {
178       if (Untied) {
179         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180             CGF.GetAddrOfLocalVar(PartIDVar),
181             PartIDVar->getType()->castAs<PointerType>());
182         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183                               PartIdLVal);
184         UntiedCodeGen(CGF);
185         CodeGenFunction::JumpDest CurPoint =
186             CGF.getJumpDestInCurrentScope(".untied.next.");
187         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190                               CGF.Builder.GetInsertBlock());
191         CGF.EmitBranchThroughCleanup(CurPoint);
192         CGF.EmitBlock(CurPoint.getBlock());
193       }
194     }
getNumberOfParts() const195     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196   };
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,const UntiedTaskActionTy & Action)197   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198                                  const VarDecl *ThreadIDVar,
199                                  const RegionCodeGenTy &CodeGen,
200                                  OpenMPDirectiveKind Kind, bool HasCancel,
201                                  const UntiedTaskActionTy &Action)
202       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203         ThreadIDVar(ThreadIDVar), Action(Action) {
204     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205   }
206 
207   /// Get a variable or parameter for storing global thread id
208   /// inside OpenMP construct.
getThreadIDVariable() const209   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210 
211   /// Get an LValue for the current ThreadID variable.
212   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213 
214   /// Get the name of the capture helper.
getHelperName() const215   StringRef getHelperName() const override { return ".omp_outlined."; }
216 
emitUntiedSwitch(CodeGenFunction & CGF)217   void emitUntiedSwitch(CodeGenFunction &CGF) override {
218     Action.emitUntiedSwitch(CGF);
219   }
220 
classof(const CGCapturedStmtInfo * Info)221   static bool classof(const CGCapturedStmtInfo *Info) {
222     return CGOpenMPRegionInfo::classof(Info) &&
223            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224                TaskOutlinedRegion;
225   }
226 
227 private:
228   /// A variable or parameter storing global thread id for OpenMP
229   /// constructs.
230   const VarDecl *ThreadIDVar;
231   /// Action for emitting code for untied tasks.
232   const UntiedTaskActionTy &Action;
233 };
234 
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo * OldCSI,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)239   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240                             const RegionCodeGenTy &CodeGen,
241                             OpenMPDirectiveKind Kind, bool HasCancel)
242       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243         OldCSI(OldCSI),
244         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245 
246   // Retrieve the value of the context parameter.
getContextValue() const247   llvm::Value *getContextValue() const override {
248     if (OuterRegionInfo)
249       return OuterRegionInfo->getContextValue();
250     llvm_unreachable("No context value for inlined OpenMP region");
251   }
252 
setContextValue(llvm::Value * V)253   void setContextValue(llvm::Value *V) override {
254     if (OuterRegionInfo) {
255       OuterRegionInfo->setContextValue(V);
256       return;
257     }
258     llvm_unreachable("No context value for inlined OpenMP region");
259   }
260 
261   /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const262   const FieldDecl *lookup(const VarDecl *VD) const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->lookup(VD);
265     // If there is no outer outlined region,no need to lookup in a list of
266     // captured variables, we can use the original one.
267     return nullptr;
268   }
269 
getThisFieldDecl() const270   FieldDecl *getThisFieldDecl() const override {
271     if (OuterRegionInfo)
272       return OuterRegionInfo->getThisFieldDecl();
273     return nullptr;
274   }
275 
276   /// Get a variable or parameter for storing global thread id
277   /// inside OpenMP construct.
getThreadIDVariable() const278   const VarDecl *getThreadIDVariable() const override {
279     if (OuterRegionInfo)
280       return OuterRegionInfo->getThreadIDVariable();
281     return nullptr;
282   }
283 
284   /// Get an LValue for the current ThreadID variable.
getThreadIDVariableLValue(CodeGenFunction & CGF)285   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286     if (OuterRegionInfo)
287       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288     llvm_unreachable("No LValue for inlined OpenMP construct");
289   }
290 
291   /// Get the name of the capture helper.
getHelperName() const292   StringRef getHelperName() const override {
293     if (auto *OuterRegionInfo = getOldCSI())
294       return OuterRegionInfo->getHelperName();
295     llvm_unreachable("No helper name for inlined OpenMP construct");
296   }
297 
emitUntiedSwitch(CodeGenFunction & CGF)298   void emitUntiedSwitch(CodeGenFunction &CGF) override {
299     if (OuterRegionInfo)
300       OuterRegionInfo->emitUntiedSwitch(CGF);
301   }
302 
getOldCSI() const303   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304 
classof(const CGCapturedStmtInfo * Info)305   static bool classof(const CGCapturedStmtInfo *Info) {
306     return CGOpenMPRegionInfo::classof(Info) &&
307            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308   }
309 
310   ~CGOpenMPInlinedRegionInfo() override = default;
311 
312 private:
313   /// CodeGen info about outer OpenMP region.
314   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315   CGOpenMPRegionInfo *OuterRegionInfo;
316 };
317 
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
CGOpenMPTargetRegionInfo(const CapturedStmt & CS,const RegionCodeGenTy & CodeGen,StringRef HelperName)325   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
327       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328                            /*HasCancel=*/false),
329         HelperName(HelperName) {}
330 
331   /// This is unused for target regions because each starts executing
332   /// with a single thread.
getThreadIDVariable() const333   const VarDecl *getThreadIDVariable() const override { return nullptr; }
334 
335   /// Get the name of the capture helper.
getHelperName() const336   StringRef getHelperName() const override { return HelperName; }
337 
classof(const CGCapturedStmtInfo * Info)338   static bool classof(const CGCapturedStmtInfo *Info) {
339     return CGOpenMPRegionInfo::classof(Info) &&
340            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341   }
342 
343 private:
344   StringRef HelperName;
345 };
346 
EmptyCodeGen(CodeGenFunction &,PrePostActionTy &)347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348   llvm_unreachable("No codegen for expressions");
349 }
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
CGOpenMPInnerExprInfo(CodeGenFunction & CGF,const CapturedStmt & CS)354   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356                                   OMPD_unknown,
357                                   /*HasCancel=*/false),
358         PrivScope(CGF) {
359     // Make sure the globals captured in the provided statement are local by
360     // using the privatization logic. We assume the same variable is not
361     // captured more than once.
362     for (const auto &C : CS.captures()) {
363       if (!C.capturesVariable() && !C.capturesVariableByCopy())
364         continue;
365 
366       const VarDecl *VD = C.getCapturedVar();
367       if (VD->isLocalVarDeclOrParm())
368         continue;
369 
370       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371                       /*RefersToEnclosingVariableOrCapture=*/false,
372                       VD->getType().getNonReferenceType(), VK_LValue,
373                       C.getLocation());
374       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
375     }
376     (void)PrivScope.Privatize();
377   }
378 
379   /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const380   const FieldDecl *lookup(const VarDecl *VD) const override {
381     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382       return FD;
383     return nullptr;
384   }
385 
386   /// Emit the captured statement body.
EmitBody(CodeGenFunction & CGF,const Stmt * S)387   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388     llvm_unreachable("No body for expressions");
389   }
390 
391   /// Get a variable or parameter for storing global thread id
392   /// inside OpenMP construct.
getThreadIDVariable() const393   const VarDecl *getThreadIDVariable() const override {
394     llvm_unreachable("No thread id for expressions");
395   }
396 
397   /// Get the name of the capture helper.
getHelperName() const398   StringRef getHelperName() const override {
399     llvm_unreachable("No helper name for expressions");
400   }
401 
classof(const CGCapturedStmtInfo * Info)402   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403 
404 private:
405   /// Private scope to capture global variables.
406   CodeGenFunction::OMPPrivateScope PrivScope;
407 };
408 
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411   CodeGenFunction &CGF;
412   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
413   FieldDecl *LambdaThisCaptureField = nullptr;
414   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415   bool NoInheritance = false;
416 
417 public:
418   /// Constructs region for combined constructs.
419   /// \param CodeGen Code generation sequence for combined directives. Includes
420   /// a list of functions used for code generation of implicitly inlined
421   /// regions.
InlinedOpenMPRegionRAII(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,bool NoInheritance=true)422   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423                           OpenMPDirectiveKind Kind, bool HasCancel,
424                           bool NoInheritance = true)
425       : CGF(CGF), NoInheritance(NoInheritance) {
426     // Start emission for the construct.
427     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429     if (NoInheritance) {
430       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432       CGF.LambdaThisCaptureField = nullptr;
433       BlockInfo = CGF.BlockInfo;
434       CGF.BlockInfo = nullptr;
435     }
436   }
437 
~InlinedOpenMPRegionRAII()438   ~InlinedOpenMPRegionRAII() {
439     // Restore original CapturedStmtInfo only if we're done with code emission.
440     auto *OldCSI =
441         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442     delete CGF.CapturedStmtInfo;
443     CGF.CapturedStmtInfo = OldCSI;
444     if (NoInheritance) {
445       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447       CGF.BlockInfo = BlockInfo;
448     }
449   }
450 };
451 
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456   /// Use trampoline for internal microtask.
457   OMP_IDENT_IMD = 0x01,
458   /// Use c-style ident structure.
459   OMP_IDENT_KMPC = 0x02,
460   /// Atomic reduction option for kmpc_reduce.
461   OMP_ATOMIC_REDUCE = 0x10,
462   /// Explicit 'barrier' directive.
463   OMP_IDENT_BARRIER_EXPL = 0x20,
464   /// Implicit barrier in code.
465   OMP_IDENT_BARRIER_IMPL = 0x40,
466   /// Implicit barrier in 'for' directive.
467   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468   /// Implicit barrier in 'sections' directive.
469   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470   /// Implicit barrier in 'single' directive.
471   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472   /// Call of __kmp_for_static_init for static loop.
473   OMP_IDENT_WORK_LOOP = 0x200,
474   /// Call of __kmp_for_static_init for sections.
475   OMP_IDENT_WORK_SECTIONS = 0x400,
476   /// Call of __kmp_for_static_init for distribute.
477   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479 };
480 
481 namespace {
482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
483 /// Values for bit flags for marking which requires clauses have been used.
484 enum OpenMPOffloadingRequiresDirFlags : int64_t {
485   /// flag undefined.
486   OMP_REQ_UNDEFINED               = 0x000,
487   /// no requires clause present.
488   OMP_REQ_NONE                    = 0x001,
489   /// reverse_offload clause.
490   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
491   /// unified_address clause.
492   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
493   /// unified_shared_memory clause.
494   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
495   /// dynamic_allocators clause.
496   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
497   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
498 };
499 
500 enum OpenMPOffloadingReservedDeviceIDs {
501   /// Device ID if the device was not defined, runtime should get it
502   /// from environment variables in the spec.
503   OMP_DEVICEID_UNDEF = -1,
504 };
505 } // anonymous namespace
506 
507 /// Describes ident structure that describes a source location.
508 /// All descriptions are taken from
509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510 /// Original structure:
511 /// typedef struct ident {
512 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
513 ///                                  see above  */
514 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
515 ///                                  KMP_IDENT_KMPC identifies this union
516 ///                                  member  */
517 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
518 ///                                  see above */
519 ///#if USE_ITT_BUILD
520 ///                            /*  but currently used for storing
521 ///                                region-specific ITT */
522 ///                            /*  contextual information. */
523 ///#endif /* USE_ITT_BUILD */
524 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
525 ///                                 C++  */
526 ///    char const *psource;    /**< String describing the source location.
527 ///                            The string is composed of semi-colon separated
528 //                             fields which describe the source file,
529 ///                            the function and a pair of line numbers that
530 ///                            delimit the construct.
531 ///                             */
532 /// } ident_t;
533 enum IdentFieldIndex {
534   /// might be used in Fortran
535   IdentField_Reserved_1,
536   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
537   IdentField_Flags,
538   /// Not really used in Fortran any more
539   IdentField_Reserved_2,
540   /// Source[4] in Fortran, do not use for C++
541   IdentField_Reserved_3,
542   /// String describing the source location. The string is composed of
543   /// semi-colon separated fields which describe the source file, the function
544   /// and a pair of line numbers that delimit the construct.
545   IdentField_PSource
546 };
547 
548 /// Schedule types for 'omp for' loops (these enumerators are taken from
549 /// the enum sched_type in kmp.h).
550 enum OpenMPSchedType {
551   /// Lower bound for default (unordered) versions.
552   OMP_sch_lower = 32,
553   OMP_sch_static_chunked = 33,
554   OMP_sch_static = 34,
555   OMP_sch_dynamic_chunked = 35,
556   OMP_sch_guided_chunked = 36,
557   OMP_sch_runtime = 37,
558   OMP_sch_auto = 38,
559   /// static with chunk adjustment (e.g., simd)
560   OMP_sch_static_balanced_chunked = 45,
561   /// Lower bound for 'ordered' versions.
562   OMP_ord_lower = 64,
563   OMP_ord_static_chunked = 65,
564   OMP_ord_static = 66,
565   OMP_ord_dynamic_chunked = 67,
566   OMP_ord_guided_chunked = 68,
567   OMP_ord_runtime = 69,
568   OMP_ord_auto = 70,
569   OMP_sch_default = OMP_sch_static,
570   /// dist_schedule types
571   OMP_dist_sch_static_chunked = 91,
572   OMP_dist_sch_static = 92,
573   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574   /// Set if the monotonic schedule modifier was present.
575   OMP_sch_modifier_monotonic = (1 << 29),
576   /// Set if the nonmonotonic schedule modifier was present.
577   OMP_sch_modifier_nonmonotonic = (1 << 30),
578 };
579 
580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
581 /// region.
582 class CleanupTy final : public EHScopeStack::Cleanup {
583   PrePostActionTy *Action;
584 
585 public:
CleanupTy(PrePostActionTy * Action)586   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
Emit(CodeGenFunction & CGF,Flags)587   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
588     if (!CGF.HaveInsertPoint())
589       return;
590     Action->Exit(CGF);
591   }
592 };
593 
594 } // anonymous namespace
595 
operator ()(CodeGenFunction & CGF) const596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
597   CodeGenFunction::RunCleanupsScope Scope(CGF);
598   if (PrePostAction) {
599     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
600     Callback(CodeGen, CGF, *PrePostAction);
601   } else {
602     PrePostActionTy Action;
603     Callback(CodeGen, CGF, Action);
604   }
605 }
606 
607 /// Check if the combiner is a call to UDR combiner and if it is so return the
608 /// UDR decl used for reduction.
609 static const OMPDeclareReductionDecl *
getReductionInit(const Expr * ReductionOp)610 getReductionInit(const Expr *ReductionOp) {
611   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
612     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
613       if (const auto *DRE =
614               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
615         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
616           return DRD;
617   return nullptr;
618 }
619 
emitInitWithReductionInitializer(CodeGenFunction & CGF,const OMPDeclareReductionDecl * DRD,const Expr * InitOp,Address Private,Address Original,QualType Ty)620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
621                                              const OMPDeclareReductionDecl *DRD,
622                                              const Expr *InitOp,
623                                              Address Private, Address Original,
624                                              QualType Ty) {
625   if (DRD->getInitializer()) {
626     std::pair<llvm::Function *, llvm::Function *> Reduction =
627         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
628     const auto *CE = cast<CallExpr>(InitOp);
629     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
630     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
632     const auto *LHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
634     const auto *RHSDRE =
635         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
636     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
637     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
EmitOMPAggregateInit(CodeGenFunction & CGF,Address DestAddr,QualType Type,bool EmitDeclareReductionInit,const Expr * Init,const OMPDeclareReductionDecl * DRD,Address SrcAddr=Address::invalid ())679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   if (DRD)
691     SrcAddr =
692         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
693 
694   llvm::Value *SrcBegin = nullptr;
695   if (DRD)
696     SrcBegin = SrcAddr.getPointer();
697   llvm::Value *DestBegin = DestAddr.getPointer();
698   // Cast from pointer to array type to pointer to single element.
699   llvm::Value *DestEnd =
700       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
701   // The basic structure here is a while-do loop.
702   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704   llvm::Value *IsEmpty =
705       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
707 
708   // Enter the loop body, making that address the current address.
709   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710   CGF.EmitBlock(BodyBB);
711 
712   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
713 
714   llvm::PHINode *SrcElementPHI = nullptr;
715   Address SrcElementCurrent = Address::invalid();
716   if (DRD) {
717     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718                                           "omp.arraycpy.srcElementPast");
719     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720     SrcElementCurrent =
721         Address(SrcElementPHI, SrcAddr.getElementType(),
722                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
723   }
724   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726   DestElementPHI->addIncoming(DestBegin, EntryBB);
727   Address DestElementCurrent =
728       Address(DestElementPHI, DestAddr.getElementType(),
729               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730 
731   // Emit copy.
732   {
733     CodeGenFunction::RunCleanupsScope InitScope(CGF);
734     if (EmitDeclareReductionInit) {
735       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736                                        SrcElementCurrent, ElementTy);
737     } else
738       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739                            /*IsInitializer=*/false);
740   }
741 
742   if (DRD) {
743     // Shift the address forward by one element.
744     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
746         "omp.arraycpy.dest.element");
747     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748   }
749 
750   // Shift the address forward by one element.
751   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
753       "omp.arraycpy.dest.element");
754   // Check whether we've reached the end.
755   llvm::Value *Done =
756       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
757   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
758   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
759 
760   // Done.
761   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
762 }
763 
emitSharedLValue(CodeGenFunction & CGF,const Expr * E)764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
765   return CGF.EmitOMPSharedLValue(E);
766 }
767 
emitSharedLValueUB(CodeGenFunction & CGF,const Expr * E)768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
769                                             const Expr *E) {
770   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
771     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
772   return LValue();
773 }
774 
emitAggregateInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,Address SharedAddr,const OMPDeclareReductionDecl * DRD)775 void ReductionCodeGen::emitAggregateInitialization(
776     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
777     const OMPDeclareReductionDecl *DRD) {
778   // Emit VarDecl with copy init for arrays.
779   // Get the address of the original variable captured in current
780   // captured region.
781   const auto *PrivateVD =
782       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
783   bool EmitDeclareReductionInit =
784       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
785   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
786                        EmitDeclareReductionInit,
787                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
788                                                 : PrivateVD->getInit(),
789                        DRD, SharedAddr);
790 }
791 
ReductionCodeGen(ArrayRef<const Expr * > Shareds,ArrayRef<const Expr * > Origs,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > ReductionOps)792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
793                                    ArrayRef<const Expr *> Origs,
794                                    ArrayRef<const Expr *> Privates,
795                                    ArrayRef<const Expr *> ReductionOps) {
796   ClausesData.reserve(Shareds.size());
797   SharedAddresses.reserve(Shareds.size());
798   Sizes.reserve(Shareds.size());
799   BaseDecls.reserve(Shareds.size());
800   const auto *IOrig = Origs.begin();
801   const auto *IPriv = Privates.begin();
802   const auto *IRed = ReductionOps.begin();
803   for (const Expr *Ref : Shareds) {
804     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
805     std::advance(IOrig, 1);
806     std::advance(IPriv, 1);
807     std::advance(IRed, 1);
808   }
809 }
810 
emitSharedOrigLValue(CodeGenFunction & CGF,unsigned N)811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
812   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
813          "Number of generated lvalues must be exactly N.");
814   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
815   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
816   SharedAddresses.emplace_back(First, Second);
817   if (ClausesData[N].Shared == ClausesData[N].Ref) {
818     OrigAddresses.emplace_back(First, Second);
819   } else {
820     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
821     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
822     OrigAddresses.emplace_back(First, Second);
823   }
824 }
825 
emitAggregateType(CodeGenFunction & CGF,unsigned N)826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
827   QualType PrivateType = getPrivateType(N);
828   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
829   if (!PrivateType->isVariablyModifiedType()) {
830     Sizes.emplace_back(
831         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
832         nullptr);
833     return;
834   }
835   llvm::Value *Size;
836   llvm::Value *SizeInChars;
837   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
838   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
839   if (AsArraySection) {
840     Size = CGF.Builder.CreatePtrDiff(ElemType,
841                                      OrigAddresses[N].second.getPointer(CGF),
842                                      OrigAddresses[N].first.getPointer(CGF));
843     Size = CGF.Builder.CreateNUWAdd(
844         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
845     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
846   } else {
847     SizeInChars =
848         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
849     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
850   }
851   Sizes.emplace_back(SizeInChars, Size);
852   CodeGenFunction::OpaqueValueMapping OpaqueMap(
853       CGF,
854       cast<OpaqueValueExpr>(
855           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
856       RValue::get(Size));
857   CGF.EmitVariablyModifiedType(PrivateType);
858 }
859 
emitAggregateType(CodeGenFunction & CGF,unsigned N,llvm::Value * Size)860 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
861                                          llvm::Value *Size) {
862   QualType PrivateType = getPrivateType(N);
863   if (!PrivateType->isVariablyModifiedType()) {
864     assert(!Size && !Sizes[N].second &&
865            "Size should be nullptr for non-variably modified reduction "
866            "items.");
867     return;
868   }
869   CodeGenFunction::OpaqueValueMapping OpaqueMap(
870       CGF,
871       cast<OpaqueValueExpr>(
872           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
873       RValue::get(Size));
874   CGF.EmitVariablyModifiedType(PrivateType);
875 }
876 
emitInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,Address SharedAddr,llvm::function_ref<bool (CodeGenFunction &)> DefaultInit)877 void ReductionCodeGen::emitInitialization(
878     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
879     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
880   assert(SharedAddresses.size() > N && "No variable was generated");
881   const auto *PrivateVD =
882       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
883   const OMPDeclareReductionDecl *DRD =
884       getReductionInit(ClausesData[N].ReductionOp);
885   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
886     if (DRD && DRD->getInitializer())
887       (void)DefaultInit(CGF);
888     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
889   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
890     (void)DefaultInit(CGF);
891     QualType SharedType = SharedAddresses[N].first.getType();
892     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
893                                      PrivateAddr, SharedAddr, SharedType);
894   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897                          PrivateVD->getType().getQualifiers(),
898                          /*IsInitializer=*/false);
899   }
900 }
901 
needCleanups(unsigned N)902 bool ReductionCodeGen::needCleanups(unsigned N) {
903   QualType PrivateType = getPrivateType(N);
904   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
905   return DTorKind != QualType::DK_none;
906 }
907 
emitCleanups(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)908 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
909                                     Address PrivateAddr) {
910   QualType PrivateType = getPrivateType(N);
911   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
912   if (needCleanups(N)) {
913     PrivateAddr = CGF.Builder.CreateElementBitCast(
914         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
915     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
916   }
917 }
918 
loadToBegin(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,LValue BaseLV)919 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
920                           LValue BaseLV) {
921   BaseTy = BaseTy.getNonReferenceType();
922   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
923          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
924     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
925       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
926     } else {
927       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
928       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
929     }
930     BaseTy = BaseTy->getPointeeType();
931   }
932   return CGF.MakeAddrLValue(
933       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
934                                        CGF.ConvertTypeForMem(ElTy)),
935       BaseLV.getType(), BaseLV.getBaseInfo(),
936       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
937 }
938 
castToBase(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,Address OriginalBaseAddress,llvm::Value * Addr)939 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           Address OriginalBaseAddress, llvm::Value *Addr) {
941   Address Tmp = Address::invalid();
942   Address TopTmp = Address::invalid();
943   Address MostTopTmp = Address::invalid();
944   BaseTy = BaseTy.getNonReferenceType();
945   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
946          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
947     Tmp = CGF.CreateMemTemp(BaseTy);
948     if (TopTmp.isValid())
949       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
950     else
951       MostTopTmp = Tmp;
952     TopTmp = Tmp;
953     BaseTy = BaseTy->getPointeeType();
954   }
955 
956   if (Tmp.isValid()) {
957     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
958         Addr, Tmp.getElementType());
959     CGF.Builder.CreateStore(Addr, Tmp);
960     return MostTopTmp;
961   }
962 
963   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
964       Addr, OriginalBaseAddress.getType());
965   return OriginalBaseAddress.withPointer(Addr);
966 }
967 
getBaseDecl(const Expr * Ref,const DeclRefExpr * & DE)968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
969   const VarDecl *OrigVD = nullptr;
970   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
971     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
972     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
973       Base = TempOASE->getBase()->IgnoreParenImpCasts();
974     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
975       Base = TempASE->getBase()->IgnoreParenImpCasts();
976     DE = cast<DeclRefExpr>(Base);
977     OrigVD = cast<VarDecl>(DE->getDecl());
978   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
979     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
980     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981       Base = TempASE->getBase()->IgnoreParenImpCasts();
982     DE = cast<DeclRefExpr>(Base);
983     OrigVD = cast<VarDecl>(DE->getDecl());
984   }
985   return OrigVD;
986 }
987 
adjustPrivateAddress(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)988 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
989                                                Address PrivateAddr) {
990   const DeclRefExpr *DE;
991   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
992     BaseDecls.emplace_back(OrigVD);
993     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
994     LValue BaseLValue =
995         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
996                     OriginalBaseLValue);
997     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
998     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
999         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1000         SharedAddr.getPointer());
1001     llvm::Value *PrivatePointer =
1002         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1003             PrivateAddr.getPointer(), SharedAddr.getType());
1004     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1005         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1006     return castToBase(CGF, OrigVD->getType(),
1007                       SharedAddresses[N].first.getType(),
1008                       OriginalBaseLValue.getAddress(CGF), Ptr);
1009   }
1010   BaseDecls.emplace_back(
1011       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1012   return PrivateAddr;
1013 }
1014 
usesReductionInitializer(unsigned N) const1015 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1016   const OMPDeclareReductionDecl *DRD =
1017       getReductionInit(ClausesData[N].ReductionOp);
1018   return DRD && DRD->getInitializer();
1019 }
1020 
getThreadIDVariableLValue(CodeGenFunction & CGF)1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1022   return CGF.EmitLoadOfPointerLValue(
1023       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1024       getThreadIDVariable()->getType()->castAs<PointerType>());
1025 }
1026 
EmitBody(CodeGenFunction & CGF,const Stmt * S)1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1028   if (!CGF.HaveInsertPoint())
1029     return;
1030   // 1.2.2 OpenMP Language Terminology
1031   // Structured block - An executable statement with a single entry at the
1032   // top and a single exit at the bottom.
1033   // The point of exit cannot be a branch out of the structured block.
1034   // longjmp() and throw() must not violate the entry/exit criteria.
1035   CGF.EHStack.pushTerminate();
1036   if (S)
1037     CGF.incrementProfileCounter(S);
1038   CodeGen(CGF);
1039   CGF.EHStack.popTerminate();
1040 }
1041 
getThreadIDVariableLValue(CodeGenFunction & CGF)1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043     CodeGenFunction &CGF) {
1044   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045                             getThreadIDVariable()->getType(),
1046                             AlignmentSource::Decl);
1047 }
1048 
addFieldToRecordDecl(ASTContext & C,DeclContext * DC,QualType FieldTy)1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1050                                        QualType FieldTy) {
1051   auto *Field = FieldDecl::Create(
1052       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055   Field->setAccess(AS_public);
1056   DC->addDecl(Field);
1057   return Field;
1058 }
1059 
CGOpenMPRuntime(CodeGenModule & CGM,StringRef FirstSeparator,StringRef Separator)1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1061                                  StringRef Separator)
1062     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1063       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1064   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1065 
1066   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1067   OMPBuilder.initialize();
1068   loadOffloadInfoMetadata();
1069 }
1070 
clear()1071 void CGOpenMPRuntime::clear() {
1072   InternalVars.clear();
1073   // Clean non-target variable declarations possibly used only in debug info.
1074   for (const auto &Data : EmittedNonTargetVariables) {
1075     if (!Data.getValue().pointsToAliveValue())
1076       continue;
1077     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1078     if (!GV)
1079       continue;
1080     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1081       continue;
1082     GV->eraseFromParent();
1083   }
1084 }
1085 
getName(ArrayRef<StringRef> Parts) const1086 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1087   SmallString<128> Buffer;
1088   llvm::raw_svector_ostream OS(Buffer);
1089   StringRef Sep = FirstSeparator;
1090   for (StringRef Part : Parts) {
1091     OS << Sep << Part;
1092     Sep = Separator;
1093   }
1094   return std::string(OS.str());
1095 }
1096 
1097 static llvm::Function *
emitCombinerOrInitializer(CodeGenModule & CGM,QualType Ty,const Expr * CombinerInitializer,const VarDecl * In,const VarDecl * Out,bool IsCombiner)1098 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1099                           const Expr *CombinerInitializer, const VarDecl *In,
1100                           const VarDecl *Out, bool IsCombiner) {
1101   // void .omp_combiner.(Ty *in, Ty *out);
1102   ASTContext &C = CGM.getContext();
1103   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1104   FunctionArgList Args;
1105   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1106                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1107   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1108                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   Args.push_back(&OmpOutParm);
1110   Args.push_back(&OmpInParm);
1111   const CGFunctionInfo &FnInfo =
1112       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1113   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1114   std::string Name = CGM.getOpenMPRuntime().getName(
1115       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1116   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1117                                     Name, &CGM.getModule());
1118   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1119   if (CGM.getLangOpts().Optimize) {
1120     Fn->removeFnAttr(llvm::Attribute::NoInline);
1121     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1122     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1123   }
1124   CodeGenFunction CGF(CGM);
1125   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1126   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1127   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1128                     Out->getLocation());
1129   CodeGenFunction::OMPPrivateScope Scope(CGF);
1130   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1131   Scope.addPrivate(
1132       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1133               .getAddress(CGF));
1134   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1135   Scope.addPrivate(
1136       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1137                .getAddress(CGF));
1138   (void)Scope.Privatize();
1139   if (!IsCombiner && Out->hasInit() &&
1140       !CGF.isTrivialInitializer(Out->getInit())) {
1141     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1142                          Out->getType().getQualifiers(),
1143                          /*IsInitializer=*/true);
1144   }
1145   if (CombinerInitializer)
1146     CGF.EmitIgnoredExpr(CombinerInitializer);
1147   Scope.ForceCleanup();
1148   CGF.FinishFunction();
1149   return Fn;
1150 }
1151 
emitUserDefinedReduction(CodeGenFunction * CGF,const OMPDeclareReductionDecl * D)1152 void CGOpenMPRuntime::emitUserDefinedReduction(
1153     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1154   if (UDRMap.count(D) > 0)
1155     return;
1156   llvm::Function *Combiner = emitCombinerOrInitializer(
1157       CGM, D->getType(), D->getCombiner(),
1158       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1159       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1160       /*IsCombiner=*/true);
1161   llvm::Function *Initializer = nullptr;
1162   if (const Expr *Init = D->getInitializer()) {
1163     Initializer = emitCombinerOrInitializer(
1164         CGM, D->getType(),
1165         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1166                                                                      : nullptr,
1167         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1168         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1169         /*IsCombiner=*/false);
1170   }
1171   UDRMap.try_emplace(D, Combiner, Initializer);
1172   if (CGF) {
1173     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1174     Decls.second.push_back(D);
1175   }
1176 }
1177 
1178 std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl * D)1179 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1180   auto I = UDRMap.find(D);
1181   if (I != UDRMap.end())
1182     return I->second;
1183   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1184   return UDRMap.lookup(D);
1185 }
1186 
1187 namespace {
1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1189 // Builder if one is present.
1190 struct PushAndPopStackRAII {
PushAndPopStackRAII__anond842882b0311::PushAndPopStackRAII1191   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1192                       bool HasCancel, llvm::omp::Directive Kind)
1193       : OMPBuilder(OMPBuilder) {
1194     if (!OMPBuilder)
1195       return;
1196 
1197     // The following callback is the crucial part of clangs cleanup process.
1198     //
1199     // NOTE:
1200     // Once the OpenMPIRBuilder is used to create parallel regions (and
1201     // similar), the cancellation destination (Dest below) is determined via
1202     // IP. That means if we have variables to finalize we split the block at IP,
1203     // use the new block (=BB) as destination to build a JumpDest (via
1204     // getJumpDestInCurrentScope(BB)) which then is fed to
1205     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1206     // to push & pop an FinalizationInfo object.
1207     // The FiniCB will still be needed but at the point where the
1208     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1209     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1210       assert(IP.getBlock()->end() == IP.getPoint() &&
1211              "Clang CG should cause non-terminated block!");
1212       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1213       CGF.Builder.restoreIP(IP);
1214       CodeGenFunction::JumpDest Dest =
1215           CGF.getOMPCancelDestination(OMPD_parallel);
1216       CGF.EmitBranchThroughCleanup(Dest);
1217     };
1218 
1219     // TODO: Remove this once we emit parallel regions through the
1220     //       OpenMPIRBuilder as it can do this setup internally.
1221     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1222     OMPBuilder->pushFinalizationCB(std::move(FI));
1223   }
~PushAndPopStackRAII__anond842882b0311::PushAndPopStackRAII1224   ~PushAndPopStackRAII() {
1225     if (OMPBuilder)
1226       OMPBuilder->popFinalizationCB();
1227   }
1228   llvm::OpenMPIRBuilder *OMPBuilder;
1229 };
1230 } // namespace
1231 
emitParallelOrTeamsOutlinedFunction(CodeGenModule & CGM,const OMPExecutableDirective & D,const CapturedStmt * CS,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const StringRef OutlinedHelperName,const RegionCodeGenTy & CodeGen)1232 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1233     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1234     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1235     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1236   assert(ThreadIDVar->getType()->isPointerType() &&
1237          "thread id variable must be of type kmp_int32 *");
1238   CodeGenFunction CGF(CGM, true);
1239   bool HasCancel = false;
1240   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1241     HasCancel = OPD->hasCancel();
1242   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1243     HasCancel = OPD->hasCancel();
1244   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1245     HasCancel = OPSD->hasCancel();
1246   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1247     HasCancel = OPFD->hasCancel();
1248   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1249     HasCancel = OPFD->hasCancel();
1250   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1251     HasCancel = OPFD->hasCancel();
1252   else if (const auto *OPFD =
1253                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD =
1256                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1257     HasCancel = OPFD->hasCancel();
1258 
1259   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1260   //       parallel region to make cancellation barriers work properly.
1261   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1262   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1263   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1264                                     HasCancel, OutlinedHelperName);
1265   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1266   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1267 }
1268 
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1269 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1270     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1271     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1272   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1273   return emitParallelOrTeamsOutlinedFunction(
1274       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1275 }
1276 
emitTeamsOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1277 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1278     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1279     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1280   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1281   return emitParallelOrTeamsOutlinedFunction(
1282       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1283 }
1284 
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)1285 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1286     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1287     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1288     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1289     bool Tied, unsigned &NumberOfParts) {
1290   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1291                                               PrePostActionTy &) {
1292     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1293     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1294     llvm::Value *TaskArgs[] = {
1295         UpLoc, ThreadID,
1296         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1297                                     TaskTVar->getType()->castAs<PointerType>())
1298             .getPointer(CGF)};
1299     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1300                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1301                         TaskArgs);
1302   };
1303   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1304                                                             UntiedCodeGen);
1305   CodeGen.setAction(Action);
1306   assert(!ThreadIDVar->getType()->isPointerType() &&
1307          "thread id variable must be of type kmp_int32 for tasks");
1308   const OpenMPDirectiveKind Region =
1309       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1310                                                       : OMPD_task;
1311   const CapturedStmt *CS = D.getCapturedStmt(Region);
1312   bool HasCancel = false;
1313   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1314     HasCancel = TD->hasCancel();
1315   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1316     HasCancel = TD->hasCancel();
1317   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1318     HasCancel = TD->hasCancel();
1319   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1320     HasCancel = TD->hasCancel();
1321 
1322   CodeGenFunction CGF(CGM, true);
1323   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1324                                         InnermostKind, HasCancel, Action);
1325   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1326   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1327   if (!Tied)
1328     NumberOfParts = Action.getNumberOfParts();
1329   return Res;
1330 }
1331 
setLocThreadIdInsertPt(CodeGenFunction & CGF,bool AtCurrentPoint)1332 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1333                                              bool AtCurrentPoint) {
1334   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1335   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1336 
1337   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1338   if (AtCurrentPoint) {
1339     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1340         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1341   } else {
1342     Elem.second.ServiceInsertPt =
1343         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1344     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1345   }
1346 }
1347 
clearLocThreadIdInsertPt(CodeGenFunction & CGF)1348 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1349   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1350   if (Elem.second.ServiceInsertPt) {
1351     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1352     Elem.second.ServiceInsertPt = nullptr;
1353     Ptr->eraseFromParent();
1354   }
1355 }
1356 
getIdentStringFromSourceLocation(CodeGenFunction & CGF,SourceLocation Loc,SmallString<128> & Buffer)1357 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1358                                                   SourceLocation Loc,
1359                                                   SmallString<128> &Buffer) {
1360   llvm::raw_svector_ostream OS(Buffer);
1361   // Build debug location
1362   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1363   OS << ";" << PLoc.getFilename() << ";";
1364   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1365     OS << FD->getQualifiedNameAsString();
1366   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1367   return OS.str();
1368 }
1369 
emitUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,unsigned Flags)1370 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1371                                                  SourceLocation Loc,
1372                                                  unsigned Flags) {
1373   uint32_t SrcLocStrSize;
1374   llvm::Constant *SrcLocStr;
1375   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1376       Loc.isInvalid()) {
1377     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1378   } else {
1379     std::string FunctionName;
1380     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1381       FunctionName = FD->getQualifiedNameAsString();
1382     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1383     const char *FileName = PLoc.getFilename();
1384     unsigned Line = PLoc.getLine();
1385     unsigned Column = PLoc.getColumn();
1386     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1387                                                 Column, SrcLocStrSize);
1388   }
1389   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1390   return OMPBuilder.getOrCreateIdent(
1391       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1392 }
1393 
getThreadID(CodeGenFunction & CGF,SourceLocation Loc)1394 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1395                                           SourceLocation Loc) {
1396   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1397   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1398   // the clang invariants used below might be broken.
1399   if (CGM.getLangOpts().OpenMPIRBuilder) {
1400     SmallString<128> Buffer;
1401     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1402     uint32_t SrcLocStrSize;
1403     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1404         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1405     return OMPBuilder.getOrCreateThreadID(
1406         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1407   }
1408 
1409   llvm::Value *ThreadID = nullptr;
1410   // Check whether we've already cached a load of the thread id in this
1411   // function.
1412   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1413   if (I != OpenMPLocThreadIDMap.end()) {
1414     ThreadID = I->second.ThreadID;
1415     if (ThreadID != nullptr)
1416       return ThreadID;
1417   }
1418   // If exceptions are enabled, do not use parameter to avoid possible crash.
1419   if (auto *OMPRegionInfo =
1420           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1421     if (OMPRegionInfo->getThreadIDVariable()) {
1422       // Check if this an outlined function with thread id passed as argument.
1423       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1424       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1425       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1426           !CGF.getLangOpts().CXXExceptions ||
1427           CGF.Builder.GetInsertBlock() == TopBlock ||
1428           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1429           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1430               TopBlock ||
1431           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1432               CGF.Builder.GetInsertBlock()) {
1433         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1434         // If value loaded in entry block, cache it and use it everywhere in
1435         // function.
1436         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1437           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1438           Elem.second.ThreadID = ThreadID;
1439         }
1440         return ThreadID;
1441       }
1442     }
1443   }
1444 
1445   // This is not an outlined function region - need to call __kmpc_int32
1446   // kmpc_global_thread_num(ident_t *loc).
1447   // Generate thread id value and cache this value for use across the
1448   // function.
1449   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1450   if (!Elem.second.ServiceInsertPt)
1451     setLocThreadIdInsertPt(CGF);
1452   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1453   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1454   llvm::CallInst *Call = CGF.Builder.CreateCall(
1455       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1456                                             OMPRTL___kmpc_global_thread_num),
1457       emitUpdateLocation(CGF, Loc));
1458   Call->setCallingConv(CGF.getRuntimeCC());
1459   Elem.second.ThreadID = Call;
1460   return Call;
1461 }
1462 
functionFinished(CodeGenFunction & CGF)1463 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1464   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1466     clearLocThreadIdInsertPt(CGF);
1467     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1468   }
1469   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1470     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1471       UDRMap.erase(D);
1472     FunctionUDRMap.erase(CGF.CurFn);
1473   }
1474   auto I = FunctionUDMMap.find(CGF.CurFn);
1475   if (I != FunctionUDMMap.end()) {
1476     for(const auto *D : I->second)
1477       UDMMap.erase(D);
1478     FunctionUDMMap.erase(I);
1479   }
1480   LastprivateConditionalToTypes.erase(CGF.CurFn);
1481   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1482 }
1483 
getIdentTyPointerTy()1484 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485   return OMPBuilder.IdentPtr;
1486 }
1487 
getKmpc_MicroPointerTy()1488 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1489   if (!Kmpc_MicroTy) {
1490     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1492                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1493     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1494   }
1495   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1496 }
1497 
1498 llvm::FunctionCallee
createForStaticInitFunction(unsigned IVSize,bool IVSigned,bool IsGPUDistribute)1499 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1500                                              bool IsGPUDistribute) {
1501   assert((IVSize == 32 || IVSize == 64) &&
1502          "IV size is not compatible with the omp runtime");
1503   StringRef Name;
1504   if (IsGPUDistribute)
1505     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1506                                     : "__kmpc_distribute_static_init_4u")
1507                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1508                                     : "__kmpc_distribute_static_init_8u");
1509   else
1510     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1511                                     : "__kmpc_for_static_init_4u")
1512                         : (IVSigned ? "__kmpc_for_static_init_8"
1513                                     : "__kmpc_for_static_init_8u");
1514 
1515   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1516   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1517   llvm::Type *TypeParams[] = {
1518     getIdentTyPointerTy(),                     // loc
1519     CGM.Int32Ty,                               // tid
1520     CGM.Int32Ty,                               // schedtype
1521     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1522     PtrTy,                                     // p_lower
1523     PtrTy,                                     // p_upper
1524     PtrTy,                                     // p_stride
1525     ITy,                                       // incr
1526     ITy                                        // chunk
1527   };
1528   auto *FnTy =
1529       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1530   return CGM.CreateRuntimeFunction(FnTy, Name);
1531 }
1532 
1533 llvm::FunctionCallee
createDispatchInitFunction(unsigned IVSize,bool IVSigned)1534 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1535   assert((IVSize == 32 || IVSize == 64) &&
1536          "IV size is not compatible with the omp runtime");
1537   StringRef Name =
1538       IVSize == 32
1539           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1540           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1541   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1542   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1543                                CGM.Int32Ty,           // tid
1544                                CGM.Int32Ty,           // schedtype
1545                                ITy,                   // lower
1546                                ITy,                   // upper
1547                                ITy,                   // stride
1548                                ITy                    // chunk
1549   };
1550   auto *FnTy =
1551       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1552   return CGM.CreateRuntimeFunction(FnTy, Name);
1553 }
1554 
1555 llvm::FunctionCallee
createDispatchFiniFunction(unsigned IVSize,bool IVSigned)1556 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1557   assert((IVSize == 32 || IVSize == 64) &&
1558          "IV size is not compatible with the omp runtime");
1559   StringRef Name =
1560       IVSize == 32
1561           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1562           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1563   llvm::Type *TypeParams[] = {
1564       getIdentTyPointerTy(), // loc
1565       CGM.Int32Ty,           // tid
1566   };
1567   auto *FnTy =
1568       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1569   return CGM.CreateRuntimeFunction(FnTy, Name);
1570 }
1571 
1572 llvm::FunctionCallee
createDispatchNextFunction(unsigned IVSize,bool IVSigned)1573 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1574   assert((IVSize == 32 || IVSize == 64) &&
1575          "IV size is not compatible with the omp runtime");
1576   StringRef Name =
1577       IVSize == 32
1578           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1579           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1580   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1582   llvm::Type *TypeParams[] = {
1583     getIdentTyPointerTy(),                     // loc
1584     CGM.Int32Ty,                               // tid
1585     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1586     PtrTy,                                     // p_lower
1587     PtrTy,                                     // p_upper
1588     PtrTy                                      // p_stride
1589   };
1590   auto *FnTy =
1591       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1592   return CGM.CreateRuntimeFunction(FnTy, Name);
1593 }
1594 
1595 /// Obtain information that uniquely identifies a target entry. This
1596 /// consists of the file and device IDs as well as line number associated with
1597 /// the relevant entry source location.
getTargetEntryUniqueInfo(ASTContext & C,SourceLocation Loc,unsigned & DeviceID,unsigned & FileID,unsigned & LineNum)1598 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1599                                      unsigned &DeviceID, unsigned &FileID,
1600                                      unsigned &LineNum) {
1601   SourceManager &SM = C.getSourceManager();
1602 
1603   // The loc should be always valid and have a file ID (the user cannot use
1604   // #pragma directives in macros)
1605 
1606   assert(Loc.isValid() && "Source location is expected to be always valid.");
1607 
1608   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1609   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1610 
1611   llvm::sys::fs::UniqueID ID;
1612   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1613     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1614     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1615     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1616       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1617           << PLoc.getFilename() << EC.message();
1618   }
1619 
1620   DeviceID = ID.getDevice();
1621   FileID = ID.getFile();
1622   LineNum = PLoc.getLine();
1623 }
1624 
getAddrOfDeclareTargetVar(const VarDecl * VD)1625 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1626   if (CGM.getLangOpts().OpenMPSimd)
1627     return Address::invalid();
1628   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1629       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1630   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1631               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1632                HasRequiresUnifiedSharedMemory))) {
1633     SmallString<64> PtrName;
1634     {
1635       llvm::raw_svector_ostream OS(PtrName);
1636       OS << CGM.getMangledName(GlobalDecl(VD));
1637       if (!VD->isExternallyVisible()) {
1638         unsigned DeviceID, FileID, Line;
1639         getTargetEntryUniqueInfo(CGM.getContext(),
1640                                  VD->getCanonicalDecl()->getBeginLoc(),
1641                                  DeviceID, FileID, Line);
1642         OS << llvm::format("_%x", FileID);
1643       }
1644       OS << "_decl_tgt_ref_ptr";
1645     }
1646     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1647     QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1648     llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
1649     if (!Ptr) {
1650       Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
1651 
1652       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1653       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1654 
1655       if (!CGM.getLangOpts().OpenMPIsDevice)
1656         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1657       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1658     }
1659     return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1660   }
1661   return Address::invalid();
1662 }
1663 
1664 llvm::Constant *
getOrCreateThreadPrivateCache(const VarDecl * VD)1665 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1666   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1667          !CGM.getContext().getTargetInfo().isTLSSupported());
1668   // Lookup the entry, lazily creating it if necessary.
1669   std::string Suffix = getName({"cache", ""});
1670   return getOrCreateInternalVariable(
1671       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1672 }
1673 
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1674 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1675                                                 const VarDecl *VD,
1676                                                 Address VDAddr,
1677                                                 SourceLocation Loc) {
1678   if (CGM.getLangOpts().OpenMPUseTLS &&
1679       CGM.getContext().getTargetInfo().isTLSSupported())
1680     return VDAddr;
1681 
1682   llvm::Type *VarTy = VDAddr.getElementType();
1683   llvm::Value *Args[] = {
1684       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1685       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1686       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1687       getOrCreateThreadPrivateCache(VD)};
1688   return Address(
1689       CGF.EmitRuntimeCall(
1690           OMPBuilder.getOrCreateRuntimeFunction(
1691               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1692           Args),
1693       CGF.Int8Ty, VDAddr.getAlignment());
1694 }
1695 
emitThreadPrivateVarInit(CodeGenFunction & CGF,Address VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)1696 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1697     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1698     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1699   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1700   // library.
1701   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1702   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1703                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1704                       OMPLoc);
1705   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1706   // to register constructor/destructor for variable.
1707   llvm::Value *Args[] = {
1708       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1709       Ctor, CopyCtor, Dtor};
1710   CGF.EmitRuntimeCall(
1711       OMPBuilder.getOrCreateRuntimeFunction(
1712           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1713       Args);
1714 }
1715 
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)1716 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1717     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1718     bool PerformInit, CodeGenFunction *CGF) {
1719   if (CGM.getLangOpts().OpenMPUseTLS &&
1720       CGM.getContext().getTargetInfo().isTLSSupported())
1721     return nullptr;
1722 
1723   VD = VD->getDefinition(CGM.getContext());
1724   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1725     QualType ASTTy = VD->getType();
1726 
1727     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1728     const Expr *Init = VD->getAnyInitializer();
1729     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1730       // Generate function that re-emits the declaration's initializer into the
1731       // threadprivate copy of the variable VD
1732       CodeGenFunction CtorCGF(CGM);
1733       FunctionArgList Args;
1734       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1735                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1736                             ImplicitParamDecl::Other);
1737       Args.push_back(&Dst);
1738 
1739       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1740           CGM.getContext().VoidPtrTy, Args);
1741       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1742       std::string Name = getName({"__kmpc_global_ctor_", ""});
1743       llvm::Function *Fn =
1744           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1745       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1746                             Args, Loc, Loc);
1747       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1748           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1749           CGM.getContext().VoidPtrTy, Dst.getLocation());
1750       Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
1751       Arg = CtorCGF.Builder.CreateElementBitCast(
1752           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1753       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1754                                /*IsInitializer=*/true);
1755       ArgVal = CtorCGF.EmitLoadOfScalar(
1756           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1757           CGM.getContext().VoidPtrTy, Dst.getLocation());
1758       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1759       CtorCGF.FinishFunction();
1760       Ctor = Fn;
1761     }
1762     if (VD->getType().isDestructedType() != QualType::DK_none) {
1763       // Generate function that emits destructor call for the threadprivate copy
1764       // of the variable VD
1765       CodeGenFunction DtorCGF(CGM);
1766       FunctionArgList Args;
1767       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1768                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1769                             ImplicitParamDecl::Other);
1770       Args.push_back(&Dst);
1771 
1772       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1773           CGM.getContext().VoidTy, Args);
1774       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1775       std::string Name = getName({"__kmpc_global_dtor_", ""});
1776       llvm::Function *Fn =
1777           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1778       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1779       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1780                             Loc, Loc);
1781       // Create a scope with an artificial location for the body of this function.
1782       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1783       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1784           DtorCGF.GetAddrOfLocalVar(&Dst),
1785           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1786       DtorCGF.emitDestroy(
1787           Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1788           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1789           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1790       DtorCGF.FinishFunction();
1791       Dtor = Fn;
1792     }
1793     // Do not emit init function if it is not required.
1794     if (!Ctor && !Dtor)
1795       return nullptr;
1796 
1797     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1798     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1799                                                /*isVarArg=*/false)
1800                            ->getPointerTo();
1801     // Copying constructor for the threadprivate variable.
1802     // Must be NULL - reserved by runtime, but currently it requires that this
1803     // parameter is always NULL. Otherwise it fires assertion.
1804     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1805     if (Ctor == nullptr) {
1806       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1807                                              /*isVarArg=*/false)
1808                          ->getPointerTo();
1809       Ctor = llvm::Constant::getNullValue(CtorTy);
1810     }
1811     if (Dtor == nullptr) {
1812       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1813                                              /*isVarArg=*/false)
1814                          ->getPointerTo();
1815       Dtor = llvm::Constant::getNullValue(DtorTy);
1816     }
1817     if (!CGF) {
1818       auto *InitFunctionTy =
1819           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1820       std::string Name = getName({"__omp_threadprivate_init_", ""});
1821       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1822           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1823       CodeGenFunction InitCGF(CGM);
1824       FunctionArgList ArgList;
1825       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1826                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1827                             Loc, Loc);
1828       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1829       InitCGF.FinishFunction();
1830       return InitFunction;
1831     }
1832     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1833   }
1834   return nullptr;
1835 }
1836 
emitDeclareTargetVarDefinition(const VarDecl * VD,llvm::GlobalVariable * Addr,bool PerformInit)1837 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1838                                                      llvm::GlobalVariable *Addr,
1839                                                      bool PerformInit) {
1840   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1841       !CGM.getLangOpts().OpenMPIsDevice)
1842     return false;
1843   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1844       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1845   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1846       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1847        HasRequiresUnifiedSharedMemory))
1848     return CGM.getLangOpts().OpenMPIsDevice;
1849   VD = VD->getDefinition(CGM.getContext());
1850   assert(VD && "Unknown VarDecl");
1851 
1852   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1853     return CGM.getLangOpts().OpenMPIsDevice;
1854 
1855   QualType ASTTy = VD->getType();
1856   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1857 
1858   // Produce the unique prefix to identify the new target regions. We use
1859   // the source location of the variable declaration which we know to not
1860   // conflict with any target region.
1861   unsigned DeviceID;
1862   unsigned FileID;
1863   unsigned Line;
1864   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1865   SmallString<128> Buffer, Out;
1866   {
1867     llvm::raw_svector_ostream OS(Buffer);
1868     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1869        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1870   }
1871 
1872   const Expr *Init = VD->getAnyInitializer();
1873   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1874     llvm::Constant *Ctor;
1875     llvm::Constant *ID;
1876     if (CGM.getLangOpts().OpenMPIsDevice) {
1877       // Generate function that re-emits the declaration's initializer into
1878       // the threadprivate copy of the variable VD
1879       CodeGenFunction CtorCGF(CGM);
1880 
1881       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1882       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1883       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1884           FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1885           llvm::GlobalValue::WeakODRLinkage);
1886       if (CGM.getTriple().isAMDGCN())
1887         Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1888       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1889       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1890                             FunctionArgList(), Loc, Loc);
1891       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1892       llvm::Constant *AddrInAS0 = Addr;
1893       if (Addr->getAddressSpace() != 0)
1894         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1895             Addr, llvm::PointerType::getWithSamePointeeType(
1896                       cast<llvm::PointerType>(Addr->getType()), 0));
1897       CtorCGF.EmitAnyExprToMem(Init,
1898                                Address(AddrInAS0, Addr->getValueType(),
1899                                        CGM.getContext().getDeclAlign(VD)),
1900                                Init->getType().getQualifiers(),
1901                                /*IsInitializer=*/true);
1902       CtorCGF.FinishFunction();
1903       Ctor = Fn;
1904       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1905     } else {
1906       Ctor = new llvm::GlobalVariable(
1907           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1908           llvm::GlobalValue::PrivateLinkage,
1909           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1910       ID = Ctor;
1911     }
1912 
1913     // Register the information for the entry associated with the constructor.
1914     Out.clear();
1915     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1916         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1917         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1918   }
1919   if (VD->getType().isDestructedType() != QualType::DK_none) {
1920     llvm::Constant *Dtor;
1921     llvm::Constant *ID;
1922     if (CGM.getLangOpts().OpenMPIsDevice) {
1923       // Generate function that emits destructor call for the threadprivate
1924       // copy of the variable VD
1925       CodeGenFunction DtorCGF(CGM);
1926 
1927       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1928       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1929       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1930           FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1931           llvm::GlobalValue::WeakODRLinkage);
1932       if (CGM.getTriple().isAMDGCN())
1933         Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1934       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1935       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1936                             FunctionArgList(), Loc, Loc);
1937       // Create a scope with an artificial location for the body of this
1938       // function.
1939       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1940       llvm::Constant *AddrInAS0 = Addr;
1941       if (Addr->getAddressSpace() != 0)
1942         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1943             Addr, llvm::PointerType::getWithSamePointeeType(
1944                       cast<llvm::PointerType>(Addr->getType()), 0));
1945       DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1946                                   CGM.getContext().getDeclAlign(VD)),
1947                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1948                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1949       DtorCGF.FinishFunction();
1950       Dtor = Fn;
1951       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1952     } else {
1953       Dtor = new llvm::GlobalVariable(
1954           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1955           llvm::GlobalValue::PrivateLinkage,
1956           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1957       ID = Dtor;
1958     }
1959     // Register the information for the entry associated with the destructor.
1960     Out.clear();
1961     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1962         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1963         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1964   }
1965   return CGM.getLangOpts().OpenMPIsDevice;
1966 }
1967 
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)1968 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1969                                                           QualType VarType,
1970                                                           StringRef Name) {
1971   std::string Suffix = getName({"artificial", ""});
1972   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1973   llvm::GlobalVariable *GAddr =
1974       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1975   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1976       CGM.getTarget().isTLSSupported()) {
1977     GAddr->setThreadLocal(/*Val=*/true);
1978     return Address(GAddr, GAddr->getValueType(),
1979                    CGM.getContext().getTypeAlignInChars(VarType));
1980   }
1981   std::string CacheSuffix = getName({"cache", ""});
1982   llvm::Value *Args[] = {
1983       emitUpdateLocation(CGF, SourceLocation()),
1984       getThreadID(CGF, SourceLocation()),
1985       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1986       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1987                                 /*isSigned=*/false),
1988       getOrCreateInternalVariable(
1989           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
1990   return Address(
1991       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1992           CGF.EmitRuntimeCall(
1993               OMPBuilder.getOrCreateRuntimeFunction(
1994                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1995               Args),
1996           VarLVType->getPointerTo(/*AddrSpace=*/0)),
1997       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1998 }
1999 
emitIfClause(CodeGenFunction & CGF,const Expr * Cond,const RegionCodeGenTy & ThenGen,const RegionCodeGenTy & ElseGen)2000 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2001                                    const RegionCodeGenTy &ThenGen,
2002                                    const RegionCodeGenTy &ElseGen) {
2003   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2004 
2005   // If the condition constant folds and can be elided, try to avoid emitting
2006   // the condition and the dead arm of the if/else.
2007   bool CondConstant;
2008   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2009     if (CondConstant)
2010       ThenGen(CGF);
2011     else
2012       ElseGen(CGF);
2013     return;
2014   }
2015 
2016   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2017   // emit the conditional branch.
2018   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2019   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2020   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2021   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2022 
2023   // Emit the 'then' code.
2024   CGF.EmitBlock(ThenBlock);
2025   ThenGen(CGF);
2026   CGF.EmitBranch(ContBlock);
2027   // Emit the 'else' code if present.
2028   // There is no need to emit line number for unconditional branch.
2029   (void)ApplyDebugLocation::CreateEmpty(CGF);
2030   CGF.EmitBlock(ElseBlock);
2031   ElseGen(CGF);
2032   // There is no need to emit line number for unconditional branch.
2033   (void)ApplyDebugLocation::CreateEmpty(CGF);
2034   CGF.EmitBranch(ContBlock);
2035   // Emit the continuation block for code after the if.
2036   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2037 }
2038 
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond,llvm::Value * NumThreads)2039 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2040                                        llvm::Function *OutlinedFn,
2041                                        ArrayRef<llvm::Value *> CapturedVars,
2042                                        const Expr *IfCond,
2043                                        llvm::Value *NumThreads) {
2044   if (!CGF.HaveInsertPoint())
2045     return;
2046   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2047   auto &M = CGM.getModule();
2048   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2049                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2050     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2051     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2052     llvm::Value *Args[] = {
2053         RTLoc,
2054         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2055         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2056     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2057     RealArgs.append(std::begin(Args), std::end(Args));
2058     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2059 
2060     llvm::FunctionCallee RTLFn =
2061         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2062     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2063   };
2064   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2065                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2066     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2067     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2068     // Build calls:
2069     // __kmpc_serialized_parallel(&Loc, GTid);
2070     llvm::Value *Args[] = {RTLoc, ThreadID};
2071     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2072                             M, OMPRTL___kmpc_serialized_parallel),
2073                         Args);
2074 
2075     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2076     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2077     Address ZeroAddrBound =
2078         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2079                                          /*Name=*/".bound.zero.addr");
2080     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2081     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2082     // ThreadId for serialized parallels is 0.
2083     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2084     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2085     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2086 
2087     // Ensure we do not inline the function. This is trivially true for the ones
2088     // passed to __kmpc_fork_call but the ones called in serialized regions
2089     // could be inlined. This is not a perfect but it is closer to the invariant
2090     // we want, namely, every data environment starts with a new function.
2091     // TODO: We should pass the if condition to the runtime function and do the
2092     //       handling there. Much cleaner code.
2093     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2094     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2095     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2096 
2097     // __kmpc_end_serialized_parallel(&Loc, GTid);
2098     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2099     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2100                             M, OMPRTL___kmpc_end_serialized_parallel),
2101                         EndArgs);
2102   };
2103   if (IfCond) {
2104     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2105   } else {
2106     RegionCodeGenTy ThenRCG(ThenGen);
2107     ThenRCG(CGF);
2108   }
2109 }
2110 
2111 // If we're inside an (outlined) parallel region, use the region info's
2112 // thread-ID variable (it is passed in a first argument of the outlined function
2113 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2114 // regular serial code region, get thread ID by calling kmp_int32
2115 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2116 // return the address of that temp.
emitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)2117 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2118                                              SourceLocation Loc) {
2119   if (auto *OMPRegionInfo =
2120           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2121     if (OMPRegionInfo->getThreadIDVariable())
2122       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2123 
2124   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2125   QualType Int32Ty =
2126       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2127   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2128   CGF.EmitStoreOfScalar(ThreadID,
2129                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2130 
2131   return ThreadIDTemp;
2132 }
2133 
getOrCreateInternalVariable(llvm::Type * Ty,const llvm::Twine & Name,unsigned AddressSpace)2134 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2135     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2136   SmallString<256> Buffer;
2137   llvm::raw_svector_ostream Out(Buffer);
2138   Out << Name;
2139   StringRef RuntimeName = Out.str();
2140   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2141   if (Elem.second) {
2142     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2143            "OMP internal variable has different type than requested");
2144     return &*Elem.second;
2145   }
2146 
2147   return Elem.second = new llvm::GlobalVariable(
2148              CGM.getModule(), Ty, /*IsConstant*/ false,
2149              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2150              Elem.first(), /*InsertBefore=*/nullptr,
2151              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2152 }
2153 
getCriticalRegionLock(StringRef CriticalName)2154 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2155   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2156   std::string Name = getName({Prefix, "var"});
2157   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2158 }
2159 
2160 namespace {
2161 /// Common pre(post)-action for different OpenMP constructs.
2162 class CommonActionTy final : public PrePostActionTy {
2163   llvm::FunctionCallee EnterCallee;
2164   ArrayRef<llvm::Value *> EnterArgs;
2165   llvm::FunctionCallee ExitCallee;
2166   ArrayRef<llvm::Value *> ExitArgs;
2167   bool Conditional;
2168   llvm::BasicBlock *ContBlock = nullptr;
2169 
2170 public:
CommonActionTy(llvm::FunctionCallee EnterCallee,ArrayRef<llvm::Value * > EnterArgs,llvm::FunctionCallee ExitCallee,ArrayRef<llvm::Value * > ExitArgs,bool Conditional=false)2171   CommonActionTy(llvm::FunctionCallee EnterCallee,
2172                  ArrayRef<llvm::Value *> EnterArgs,
2173                  llvm::FunctionCallee ExitCallee,
2174                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2175       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2176         ExitArgs(ExitArgs), Conditional(Conditional) {}
Enter(CodeGenFunction & CGF)2177   void Enter(CodeGenFunction &CGF) override {
2178     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2179     if (Conditional) {
2180       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2181       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2182       ContBlock = CGF.createBasicBlock("omp_if.end");
2183       // Generate the branch (If-stmt)
2184       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2185       CGF.EmitBlock(ThenBlock);
2186     }
2187   }
Done(CodeGenFunction & CGF)2188   void Done(CodeGenFunction &CGF) {
2189     // Emit the rest of blocks/branches
2190     CGF.EmitBranch(ContBlock);
2191     CGF.EmitBlock(ContBlock, true);
2192   }
Exit(CodeGenFunction & CGF)2193   void Exit(CodeGenFunction &CGF) override {
2194     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2195   }
2196 };
2197 } // anonymous namespace
2198 
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)2199 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2200                                          StringRef CriticalName,
2201                                          const RegionCodeGenTy &CriticalOpGen,
2202                                          SourceLocation Loc, const Expr *Hint) {
2203   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2204   // CriticalOpGen();
2205   // __kmpc_end_critical(ident_t *, gtid, Lock);
2206   // Prepare arguments and build a call to __kmpc_critical
2207   if (!CGF.HaveInsertPoint())
2208     return;
2209   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2210                          getCriticalRegionLock(CriticalName)};
2211   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2212                                                 std::end(Args));
2213   if (Hint) {
2214     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2215         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2216   }
2217   CommonActionTy Action(
2218       OMPBuilder.getOrCreateRuntimeFunction(
2219           CGM.getModule(),
2220           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2221       EnterArgs,
2222       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2223                                             OMPRTL___kmpc_end_critical),
2224       Args);
2225   CriticalOpGen.setAction(Action);
2226   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2227 }
2228 
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)2229 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2230                                        const RegionCodeGenTy &MasterOpGen,
2231                                        SourceLocation Loc) {
2232   if (!CGF.HaveInsertPoint())
2233     return;
2234   // if(__kmpc_master(ident_t *, gtid)) {
2235   //   MasterOpGen();
2236   //   __kmpc_end_master(ident_t *, gtid);
2237   // }
2238   // Prepare arguments and build a call to __kmpc_master
2239   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2240   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2241                             CGM.getModule(), OMPRTL___kmpc_master),
2242                         Args,
2243                         OMPBuilder.getOrCreateRuntimeFunction(
2244                             CGM.getModule(), OMPRTL___kmpc_end_master),
2245                         Args,
2246                         /*Conditional=*/true);
2247   MasterOpGen.setAction(Action);
2248   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2249   Action.Done(CGF);
2250 }
2251 
emitMaskedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MaskedOpGen,SourceLocation Loc,const Expr * Filter)2252 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2253                                        const RegionCodeGenTy &MaskedOpGen,
2254                                        SourceLocation Loc, const Expr *Filter) {
2255   if (!CGF.HaveInsertPoint())
2256     return;
2257   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2258   //   MaskedOpGen();
2259   //   __kmpc_end_masked(iden_t *, gtid);
2260   // }
2261   // Prepare arguments and build a call to __kmpc_masked
2262   llvm::Value *FilterVal = Filter
2263                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2264                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2265   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2266                          FilterVal};
2267   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2268                             getThreadID(CGF, Loc)};
2269   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2270                             CGM.getModule(), OMPRTL___kmpc_masked),
2271                         Args,
2272                         OMPBuilder.getOrCreateRuntimeFunction(
2273                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2274                         ArgsEnd,
2275                         /*Conditional=*/true);
2276   MaskedOpGen.setAction(Action);
2277   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2278   Action.Done(CGF);
2279 }
2280 
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)2281 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2282                                         SourceLocation Loc) {
2283   if (!CGF.HaveInsertPoint())
2284     return;
2285   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2286     OMPBuilder.createTaskyield(CGF.Builder);
2287   } else {
2288     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2289     llvm::Value *Args[] = {
2290         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2291         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2292     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2293                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2294                         Args);
2295   }
2296 
2297   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2298     Region->emitUntiedSwitch(CGF);
2299 }
2300 
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)2301 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2302                                           const RegionCodeGenTy &TaskgroupOpGen,
2303                                           SourceLocation Loc) {
2304   if (!CGF.HaveInsertPoint())
2305     return;
2306   // __kmpc_taskgroup(ident_t *, gtid);
2307   // TaskgroupOpGen();
2308   // __kmpc_end_taskgroup(ident_t *, gtid);
2309   // Prepare arguments and build a call to __kmpc_taskgroup
2310   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2311   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2312                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2313                         Args,
2314                         OMPBuilder.getOrCreateRuntimeFunction(
2315                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2316                         Args);
2317   TaskgroupOpGen.setAction(Action);
2318   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2319 }
2320 
2321 /// Given an array of pointers to variables, project the address of a
2322 /// given variable.
emitAddrOfVarFromArray(CodeGenFunction & CGF,Address Array,unsigned Index,const VarDecl * Var)2323 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2324                                       unsigned Index, const VarDecl *Var) {
2325   // Pull out the pointer to the variable.
2326   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2327   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2328 
2329   llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2330   return Address(
2331       CGF.Builder.CreateBitCast(
2332           Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2333       ElemTy, CGF.getContext().getDeclAlign(Var));
2334 }
2335 
emitCopyprivateCopyFunction(CodeGenModule & CGM,llvm::Type * ArgsElemType,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps,SourceLocation Loc)2336 static llvm::Value *emitCopyprivateCopyFunction(
2337     CodeGenModule &CGM, llvm::Type *ArgsElemType,
2338     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2339     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2340     SourceLocation Loc) {
2341   ASTContext &C = CGM.getContext();
2342   // void copy_func(void *LHSArg, void *RHSArg);
2343   FunctionArgList Args;
2344   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2345                            ImplicitParamDecl::Other);
2346   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2347                            ImplicitParamDecl::Other);
2348   Args.push_back(&LHSArg);
2349   Args.push_back(&RHSArg);
2350   const auto &CGFI =
2351       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2352   std::string Name =
2353       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2354   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2355                                     llvm::GlobalValue::InternalLinkage, Name,
2356                                     &CGM.getModule());
2357   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2358   Fn->setDoesNotRecurse();
2359   CodeGenFunction CGF(CGM);
2360   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2361   // Dest = (void*[n])(LHSArg);
2362   // Src = (void*[n])(RHSArg);
2363   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2364                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2365                   ArgsElemType->getPointerTo()),
2366               ArgsElemType, CGF.getPointerAlign());
2367   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2368                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2369                   ArgsElemType->getPointerTo()),
2370               ArgsElemType, CGF.getPointerAlign());
2371   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2372   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2373   // ...
2374   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2375   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2376     const auto *DestVar =
2377         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2378     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2379 
2380     const auto *SrcVar =
2381         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2382     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2383 
2384     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2385     QualType Type = VD->getType();
2386     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2387   }
2388   CGF.FinishFunction();
2389   return Fn;
2390 }
2391 
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > DstExprs,ArrayRef<const Expr * > AssignmentOps)2392 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2393                                        const RegionCodeGenTy &SingleOpGen,
2394                                        SourceLocation Loc,
2395                                        ArrayRef<const Expr *> CopyprivateVars,
2396                                        ArrayRef<const Expr *> SrcExprs,
2397                                        ArrayRef<const Expr *> DstExprs,
2398                                        ArrayRef<const Expr *> AssignmentOps) {
2399   if (!CGF.HaveInsertPoint())
2400     return;
2401   assert(CopyprivateVars.size() == SrcExprs.size() &&
2402          CopyprivateVars.size() == DstExprs.size() &&
2403          CopyprivateVars.size() == AssignmentOps.size());
2404   ASTContext &C = CGM.getContext();
2405   // int32 did_it = 0;
2406   // if(__kmpc_single(ident_t *, gtid)) {
2407   //   SingleOpGen();
2408   //   __kmpc_end_single(ident_t *, gtid);
2409   //   did_it = 1;
2410   // }
2411   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2412   // <copy_func>, did_it);
2413 
2414   Address DidIt = Address::invalid();
2415   if (!CopyprivateVars.empty()) {
2416     // int32 did_it = 0;
2417     QualType KmpInt32Ty =
2418         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2419     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2420     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2421   }
2422   // Prepare arguments and build a call to __kmpc_single
2423   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2424   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2425                             CGM.getModule(), OMPRTL___kmpc_single),
2426                         Args,
2427                         OMPBuilder.getOrCreateRuntimeFunction(
2428                             CGM.getModule(), OMPRTL___kmpc_end_single),
2429                         Args,
2430                         /*Conditional=*/true);
2431   SingleOpGen.setAction(Action);
2432   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2433   if (DidIt.isValid()) {
2434     // did_it = 1;
2435     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2436   }
2437   Action.Done(CGF);
2438   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2439   // <copy_func>, did_it);
2440   if (DidIt.isValid()) {
2441     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2442     QualType CopyprivateArrayTy = C.getConstantArrayType(
2443         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2444         /*IndexTypeQuals=*/0);
2445     // Create a list of all private variables for copyprivate.
2446     Address CopyprivateList =
2447         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2448     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2449       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2450       CGF.Builder.CreateStore(
2451           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2452               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2453               CGF.VoidPtrTy),
2454           Elem);
2455     }
2456     // Build function that copies private values from single region to all other
2457     // threads in the corresponding parallel region.
2458     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2459         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2460         SrcExprs, DstExprs, AssignmentOps, Loc);
2461     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2462     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2463         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2464     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2465     llvm::Value *Args[] = {
2466         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2467         getThreadID(CGF, Loc),        // i32 <gtid>
2468         BufSize,                      // size_t <buf_size>
2469         CL.getPointer(),              // void *<copyprivate list>
2470         CpyFn,                        // void (*) (void *, void *) <copy_func>
2471         DidItVal                      // i32 did_it
2472     };
2473     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2474                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2475                         Args);
2476   }
2477 }
2478 
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)2479 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2480                                         const RegionCodeGenTy &OrderedOpGen,
2481                                         SourceLocation Loc, bool IsThreads) {
2482   if (!CGF.HaveInsertPoint())
2483     return;
2484   // __kmpc_ordered(ident_t *, gtid);
2485   // OrderedOpGen();
2486   // __kmpc_end_ordered(ident_t *, gtid);
2487   // Prepare arguments and build a call to __kmpc_ordered
2488   if (IsThreads) {
2489     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2490     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2491                               CGM.getModule(), OMPRTL___kmpc_ordered),
2492                           Args,
2493                           OMPBuilder.getOrCreateRuntimeFunction(
2494                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2495                           Args);
2496     OrderedOpGen.setAction(Action);
2497     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2498     return;
2499   }
2500   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2501 }
2502 
getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)2503 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2504   unsigned Flags;
2505   if (Kind == OMPD_for)
2506     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2507   else if (Kind == OMPD_sections)
2508     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2509   else if (Kind == OMPD_single)
2510     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2511   else if (Kind == OMPD_barrier)
2512     Flags = OMP_IDENT_BARRIER_EXPL;
2513   else
2514     Flags = OMP_IDENT_BARRIER_IMPL;
2515   return Flags;
2516 }
2517 
getDefaultScheduleAndChunk(CodeGenFunction & CGF,const OMPLoopDirective & S,OpenMPScheduleClauseKind & ScheduleKind,const Expr * & ChunkExpr) const2518 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2519     CodeGenFunction &CGF, const OMPLoopDirective &S,
2520     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2521   // Check if the loop directive is actually a doacross loop directive. In this
2522   // case choose static, 1 schedule.
2523   if (llvm::any_of(
2524           S.getClausesOfKind<OMPOrderedClause>(),
2525           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2526     ScheduleKind = OMPC_SCHEDULE_static;
2527     // Chunk size is 1 in this case.
2528     llvm::APInt ChunkSize(32, 1);
2529     ChunkExpr = IntegerLiteral::Create(
2530         CGF.getContext(), ChunkSize,
2531         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2532         SourceLocation());
2533   }
2534 }
2535 
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)2536 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2537                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2538                                       bool ForceSimpleCall) {
2539   // Check if we should use the OMPBuilder
2540   auto *OMPRegionInfo =
2541       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2542   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2543     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2544         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2545     return;
2546   }
2547 
2548   if (!CGF.HaveInsertPoint())
2549     return;
2550   // Build call __kmpc_cancel_barrier(loc, thread_id);
2551   // Build call __kmpc_barrier(loc, thread_id);
2552   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2553   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2554   // thread_id);
2555   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2556                          getThreadID(CGF, Loc)};
2557   if (OMPRegionInfo) {
2558     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2559       llvm::Value *Result = CGF.EmitRuntimeCall(
2560           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2561                                                 OMPRTL___kmpc_cancel_barrier),
2562           Args);
2563       if (EmitChecks) {
2564         // if (__kmpc_cancel_barrier()) {
2565         //   exit from construct;
2566         // }
2567         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2568         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2569         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2570         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2571         CGF.EmitBlock(ExitBB);
2572         //   exit from construct;
2573         CodeGenFunction::JumpDest CancelDestination =
2574             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2575         CGF.EmitBranchThroughCleanup(CancelDestination);
2576         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2577       }
2578       return;
2579     }
2580   }
2581   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2582                           CGM.getModule(), OMPRTL___kmpc_barrier),
2583                       Args);
2584 }
2585 
2586 /// Map the OpenMP loop schedule to the runtime enumeration.
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked,bool Ordered)2587 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2588                                           bool Chunked, bool Ordered) {
2589   switch (ScheduleKind) {
2590   case OMPC_SCHEDULE_static:
2591     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2592                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2593   case OMPC_SCHEDULE_dynamic:
2594     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2595   case OMPC_SCHEDULE_guided:
2596     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2597   case OMPC_SCHEDULE_runtime:
2598     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2599   case OMPC_SCHEDULE_auto:
2600     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2601   case OMPC_SCHEDULE_unknown:
2602     assert(!Chunked && "chunk was specified but schedule kind not known");
2603     return Ordered ? OMP_ord_static : OMP_sch_static;
2604   }
2605   llvm_unreachable("Unexpected runtime schedule");
2606 }
2607 
2608 /// Map the OpenMP distribute schedule to the runtime enumeration.
2609 static OpenMPSchedType
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked)2610 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2611   // only static is allowed for dist_schedule
2612   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2613 }
2614 
isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2615 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2616                                          bool Chunked) const {
2617   OpenMPSchedType Schedule =
2618       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2619   return Schedule == OMP_sch_static;
2620 }
2621 
isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2622 bool CGOpenMPRuntime::isStaticNonchunked(
2623     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2624   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2625   return Schedule == OMP_dist_sch_static;
2626 }
2627 
isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2628 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2629                                       bool Chunked) const {
2630   OpenMPSchedType Schedule =
2631       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2632   return Schedule == OMP_sch_static_chunked;
2633 }
2634 
isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2635 bool CGOpenMPRuntime::isStaticChunked(
2636     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2637   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2638   return Schedule == OMP_dist_sch_static_chunked;
2639 }
2640 
isDynamic(OpenMPScheduleClauseKind ScheduleKind) const2641 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2642   OpenMPSchedType Schedule =
2643       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2644   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2645   return Schedule != OMP_sch_static;
2646 }
2647 
addMonoNonMonoModifier(CodeGenModule & CGM,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2)2648 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2649                                   OpenMPScheduleClauseModifier M1,
2650                                   OpenMPScheduleClauseModifier M2) {
2651   int Modifier = 0;
2652   switch (M1) {
2653   case OMPC_SCHEDULE_MODIFIER_monotonic:
2654     Modifier = OMP_sch_modifier_monotonic;
2655     break;
2656   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2657     Modifier = OMP_sch_modifier_nonmonotonic;
2658     break;
2659   case OMPC_SCHEDULE_MODIFIER_simd:
2660     if (Schedule == OMP_sch_static_chunked)
2661       Schedule = OMP_sch_static_balanced_chunked;
2662     break;
2663   case OMPC_SCHEDULE_MODIFIER_last:
2664   case OMPC_SCHEDULE_MODIFIER_unknown:
2665     break;
2666   }
2667   switch (M2) {
2668   case OMPC_SCHEDULE_MODIFIER_monotonic:
2669     Modifier = OMP_sch_modifier_monotonic;
2670     break;
2671   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2672     Modifier = OMP_sch_modifier_nonmonotonic;
2673     break;
2674   case OMPC_SCHEDULE_MODIFIER_simd:
2675     if (Schedule == OMP_sch_static_chunked)
2676       Schedule = OMP_sch_static_balanced_chunked;
2677     break;
2678   case OMPC_SCHEDULE_MODIFIER_last:
2679   case OMPC_SCHEDULE_MODIFIER_unknown:
2680     break;
2681   }
2682   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2683   // If the static schedule kind is specified or if the ordered clause is
2684   // specified, and if the nonmonotonic modifier is not specified, the effect is
2685   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2686   // modifier is specified, the effect is as if the nonmonotonic modifier is
2687   // specified.
2688   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2689     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2690           Schedule == OMP_sch_static_balanced_chunked ||
2691           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2692           Schedule == OMP_dist_sch_static_chunked ||
2693           Schedule == OMP_dist_sch_static))
2694       Modifier = OMP_sch_modifier_nonmonotonic;
2695   }
2696   return Schedule | Modifier;
2697 }
2698 
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)2699 void CGOpenMPRuntime::emitForDispatchInit(
2700     CodeGenFunction &CGF, SourceLocation Loc,
2701     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2702     bool Ordered, const DispatchRTInput &DispatchValues) {
2703   if (!CGF.HaveInsertPoint())
2704     return;
2705   OpenMPSchedType Schedule = getRuntimeSchedule(
2706       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2707   assert(Ordered ||
2708          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2709           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2710           Schedule != OMP_sch_static_balanced_chunked));
2711   // Call __kmpc_dispatch_init(
2712   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2713   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2714   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2715 
2716   // If the Chunk was not specified in the clause - use default value 1.
2717   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2718                                             : CGF.Builder.getIntN(IVSize, 1);
2719   llvm::Value *Args[] = {
2720       emitUpdateLocation(CGF, Loc),
2721       getThreadID(CGF, Loc),
2722       CGF.Builder.getInt32(addMonoNonMonoModifier(
2723           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2724       DispatchValues.LB,                                     // Lower
2725       DispatchValues.UB,                                     // Upper
2726       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2727       Chunk                                                  // Chunk
2728   };
2729   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2730 }
2731 
emitForStaticInitCall(CodeGenFunction & CGF,llvm::Value * UpdateLocation,llvm::Value * ThreadId,llvm::FunctionCallee ForStaticInitFunction,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2,const CGOpenMPRuntime::StaticRTInput & Values)2732 static void emitForStaticInitCall(
2733     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2734     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2735     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2736     const CGOpenMPRuntime::StaticRTInput &Values) {
2737   if (!CGF.HaveInsertPoint())
2738     return;
2739 
2740   assert(!Values.Ordered);
2741   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2742          Schedule == OMP_sch_static_balanced_chunked ||
2743          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2744          Schedule == OMP_dist_sch_static ||
2745          Schedule == OMP_dist_sch_static_chunked);
2746 
2747   // Call __kmpc_for_static_init(
2748   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2749   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2750   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2751   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2752   llvm::Value *Chunk = Values.Chunk;
2753   if (Chunk == nullptr) {
2754     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2755             Schedule == OMP_dist_sch_static) &&
2756            "expected static non-chunked schedule");
2757     // If the Chunk was not specified in the clause - use default value 1.
2758     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2759   } else {
2760     assert((Schedule == OMP_sch_static_chunked ||
2761             Schedule == OMP_sch_static_balanced_chunked ||
2762             Schedule == OMP_ord_static_chunked ||
2763             Schedule == OMP_dist_sch_static_chunked) &&
2764            "expected static chunked schedule");
2765   }
2766   llvm::Value *Args[] = {
2767       UpdateLocation,
2768       ThreadId,
2769       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2770                                                   M2)), // Schedule type
2771       Values.IL.getPointer(),                           // &isLastIter
2772       Values.LB.getPointer(),                           // &LB
2773       Values.UB.getPointer(),                           // &UB
2774       Values.ST.getPointer(),                           // &Stride
2775       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2776       Chunk                                             // Chunk
2777   };
2778   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2779 }
2780 
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)2781 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2782                                         SourceLocation Loc,
2783                                         OpenMPDirectiveKind DKind,
2784                                         const OpenMPScheduleTy &ScheduleKind,
2785                                         const StaticRTInput &Values) {
2786   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2787       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2788   assert(isOpenMPWorksharingDirective(DKind) &&
2789          "Expected loop-based or sections-based directive.");
2790   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2791                                              isOpenMPLoopDirective(DKind)
2792                                                  ? OMP_IDENT_WORK_LOOP
2793                                                  : OMP_IDENT_WORK_SECTIONS);
2794   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2795   llvm::FunctionCallee StaticInitFunction =
2796       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2797   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2798   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2799                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2800 }
2801 
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const CGOpenMPRuntime::StaticRTInput & Values)2802 void CGOpenMPRuntime::emitDistributeStaticInit(
2803     CodeGenFunction &CGF, SourceLocation Loc,
2804     OpenMPDistScheduleClauseKind SchedKind,
2805     const CGOpenMPRuntime::StaticRTInput &Values) {
2806   OpenMPSchedType ScheduleNum =
2807       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2808   llvm::Value *UpdatedLocation =
2809       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2810   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2811   llvm::FunctionCallee StaticInitFunction;
2812   bool isGPUDistribute =
2813       CGM.getLangOpts().OpenMPIsDevice &&
2814       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2815   StaticInitFunction = createForStaticInitFunction(
2816       Values.IVSize, Values.IVSigned, isGPUDistribute);
2817 
2818   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2819                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2820                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2821 }
2822 
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)2823 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2824                                           SourceLocation Loc,
2825                                           OpenMPDirectiveKind DKind) {
2826   if (!CGF.HaveInsertPoint())
2827     return;
2828   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2829   llvm::Value *Args[] = {
2830       emitUpdateLocation(CGF, Loc,
2831                          isOpenMPDistributeDirective(DKind)
2832                              ? OMP_IDENT_WORK_DISTRIBUTE
2833                              : isOpenMPLoopDirective(DKind)
2834                                    ? OMP_IDENT_WORK_LOOP
2835                                    : OMP_IDENT_WORK_SECTIONS),
2836       getThreadID(CGF, Loc)};
2837   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2838   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2839       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2840     CGF.EmitRuntimeCall(
2841         OMPBuilder.getOrCreateRuntimeFunction(
2842             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2843         Args);
2844   else
2845     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2846                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2847                         Args);
2848 }
2849 
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)2850 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2851                                                  SourceLocation Loc,
2852                                                  unsigned IVSize,
2853                                                  bool IVSigned) {
2854   if (!CGF.HaveInsertPoint())
2855     return;
2856   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2857   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2858   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2859 }
2860 
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)2861 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2862                                           SourceLocation Loc, unsigned IVSize,
2863                                           bool IVSigned, Address IL,
2864                                           Address LB, Address UB,
2865                                           Address ST) {
2866   // Call __kmpc_dispatch_next(
2867   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2868   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2869   //          kmp_int[32|64] *p_stride);
2870   llvm::Value *Args[] = {
2871       emitUpdateLocation(CGF, Loc),
2872       getThreadID(CGF, Loc),
2873       IL.getPointer(), // &isLastIter
2874       LB.getPointer(), // &Lower
2875       UB.getPointer(), // &Upper
2876       ST.getPointer()  // &Stride
2877   };
2878   llvm::Value *Call =
2879       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2880   return CGF.EmitScalarConversion(
2881       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2882       CGF.getContext().BoolTy, Loc);
2883 }
2884 
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)2885 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2886                                            llvm::Value *NumThreads,
2887                                            SourceLocation Loc) {
2888   if (!CGF.HaveInsertPoint())
2889     return;
2890   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2891   llvm::Value *Args[] = {
2892       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2893       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2894   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2895                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2896                       Args);
2897 }
2898 
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)2899 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2900                                          ProcBindKind ProcBind,
2901                                          SourceLocation Loc) {
2902   if (!CGF.HaveInsertPoint())
2903     return;
2904   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2905   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2906   llvm::Value *Args[] = {
2907       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2908       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2909   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2910                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2911                       Args);
2912 }
2913 
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc,llvm::AtomicOrdering AO)2914 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2915                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2916   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2917     OMPBuilder.createFlush(CGF.Builder);
2918   } else {
2919     if (!CGF.HaveInsertPoint())
2920       return;
2921     // Build call void __kmpc_flush(ident_t *loc)
2922     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2923                             CGM.getModule(), OMPRTL___kmpc_flush),
2924                         emitUpdateLocation(CGF, Loc));
2925   }
2926 }
2927 
2928 namespace {
2929 /// Indexes of fields for type kmp_task_t.
2930 enum KmpTaskTFields {
2931   /// List of shared variables.
2932   KmpTaskTShareds,
2933   /// Task routine.
2934   KmpTaskTRoutine,
2935   /// Partition id for the untied tasks.
2936   KmpTaskTPartId,
2937   /// Function with call of destructors for private variables.
2938   Data1,
2939   /// Task priority.
2940   Data2,
2941   /// (Taskloops only) Lower bound.
2942   KmpTaskTLowerBound,
2943   /// (Taskloops only) Upper bound.
2944   KmpTaskTUpperBound,
2945   /// (Taskloops only) Stride.
2946   KmpTaskTStride,
2947   /// (Taskloops only) Is last iteration flag.
2948   KmpTaskTLastIter,
2949   /// (Taskloops only) Reduction data.
2950   KmpTaskTReductions,
2951 };
2952 } // anonymous namespace
2953 
empty() const2954 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2955   return OffloadEntriesTargetRegion.empty() &&
2956          OffloadEntriesDeviceGlobalVar.empty();
2957 }
2958 
2959 /// Initialize target region entry.
2960 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,unsigned Order)2961     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2962                                     StringRef ParentName, unsigned LineNum,
2963                                     unsigned Order) {
2964   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2965                                              "only required for the device "
2966                                              "code generation.");
2967   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2968       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2969                                    OMPTargetRegionEntryTargetRegion);
2970   ++OffloadingEntriesNum;
2971 }
2972 
2973 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,llvm::Constant * Addr,llvm::Constant * ID,OMPTargetRegionEntryKind Flags)2974     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2975                                   StringRef ParentName, unsigned LineNum,
2976                                   llvm::Constant *Addr, llvm::Constant *ID,
2977                                   OMPTargetRegionEntryKind Flags) {
2978   // If we are emitting code for a target, the entry is already initialized,
2979   // only has to be registered.
2980   if (CGM.getLangOpts().OpenMPIsDevice) {
2981     // This could happen if the device compilation is invoked standalone.
2982     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2983       return;
2984     auto &Entry =
2985         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2986     Entry.setAddress(Addr);
2987     Entry.setID(ID);
2988     Entry.setFlags(Flags);
2989   } else {
2990     if (Flags ==
2991             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2992         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2993                                  /*IgnoreAddressId*/ true))
2994       return;
2995     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2996            "Target region entry already registered!");
2997     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2998     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2999     ++OffloadingEntriesNum;
3000   }
3001 }
3002 
hasTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,bool IgnoreAddressId) const3003 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3004     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3005     bool IgnoreAddressId) const {
3006   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3007   if (PerDevice == OffloadEntriesTargetRegion.end())
3008     return false;
3009   auto PerFile = PerDevice->second.find(FileID);
3010   if (PerFile == PerDevice->second.end())
3011     return false;
3012   auto PerParentName = PerFile->second.find(ParentName);
3013   if (PerParentName == PerFile->second.end())
3014     return false;
3015   auto PerLine = PerParentName->second.find(LineNum);
3016   if (PerLine == PerParentName->second.end())
3017     return false;
3018   // Fail if this entry is already registered.
3019   if (!IgnoreAddressId &&
3020       (PerLine->second.getAddress() || PerLine->second.getID()))
3021     return false;
3022   return true;
3023 }
3024 
actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy & Action)3025 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3026     const OffloadTargetRegionEntryInfoActTy &Action) {
3027   // Scan all target region entries and perform the provided action.
3028   for (const auto &D : OffloadEntriesTargetRegion)
3029     for (const auto &F : D.second)
3030       for (const auto &P : F.second)
3031         for (const auto &L : P.second)
3032           Action(D.first, F.first, P.first(), L.first, L.second);
3033 }
3034 
3035 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeDeviceGlobalVarEntryInfo(StringRef Name,OMPTargetGlobalVarEntryKind Flags,unsigned Order)3036     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3037                                        OMPTargetGlobalVarEntryKind Flags,
3038                                        unsigned Order) {
3039   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3040                                              "only required for the device "
3041                                              "code generation.");
3042   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3043   ++OffloadingEntriesNum;
3044 }
3045 
3046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerDeviceGlobalVarEntryInfo(StringRef VarName,llvm::Constant * Addr,CharUnits VarSize,OMPTargetGlobalVarEntryKind Flags,llvm::GlobalValue::LinkageTypes Linkage)3047     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3048                                      CharUnits VarSize,
3049                                      OMPTargetGlobalVarEntryKind Flags,
3050                                      llvm::GlobalValue::LinkageTypes Linkage) {
3051   if (CGM.getLangOpts().OpenMPIsDevice) {
3052     // This could happen if the device compilation is invoked standalone.
3053     if (!hasDeviceGlobalVarEntryInfo(VarName))
3054       return;
3055     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3056     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3057       if (Entry.getVarSize().isZero()) {
3058         Entry.setVarSize(VarSize);
3059         Entry.setLinkage(Linkage);
3060       }
3061       return;
3062     }
3063     Entry.setVarSize(VarSize);
3064     Entry.setLinkage(Linkage);
3065     Entry.setAddress(Addr);
3066   } else {
3067     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3068       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3069       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3070              "Entry not initialized!");
3071       if (Entry.getVarSize().isZero()) {
3072         Entry.setVarSize(VarSize);
3073         Entry.setLinkage(Linkage);
3074       }
3075       return;
3076     }
3077     OffloadEntriesDeviceGlobalVar.try_emplace(
3078         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3079     ++OffloadingEntriesNum;
3080   }
3081 }
3082 
3083 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy & Action)3084     actOnDeviceGlobalVarEntriesInfo(
3085         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3086   // Scan all target region entries and perform the provided action.
3087   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3088     Action(E.getKey(), E.getValue());
3089 }
3090 
createOffloadEntry(llvm::Constant * ID,llvm::Constant * Addr,uint64_t Size,int32_t Flags,llvm::GlobalValue::LinkageTypes Linkage)3091 void CGOpenMPRuntime::createOffloadEntry(
3092     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3093     llvm::GlobalValue::LinkageTypes Linkage) {
3094   OMPBuilder.emitOffloadingEntry(ID, Addr->getName(), Size, Flags);
3095 }
3096 
createOffloadEntriesAndInfoMetadata()3097 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3098   // Emit the offloading entries and metadata so that the device codegen side
3099   // can easily figure out what to emit. The produced metadata looks like
3100   // this:
3101   //
3102   // !omp_offload.info = !{!1, ...}
3103   //
3104   // Right now we only generate metadata for function that contain target
3105   // regions.
3106 
3107   // If we are in simd mode or there are no entries, we don't need to do
3108   // anything.
3109   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3110     return;
3111 
3112   llvm::Module &M = CGM.getModule();
3113   llvm::LLVMContext &C = M.getContext();
3114   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3115                          SourceLocation, StringRef>,
3116               16>
3117       OrderedEntries(OffloadEntriesInfoManager.size());
3118   llvm::SmallVector<StringRef, 16> ParentFunctions(
3119       OffloadEntriesInfoManager.size());
3120 
3121   // Auxiliary methods to create metadata values and strings.
3122   auto &&GetMDInt = [this](unsigned V) {
3123     return llvm::ConstantAsMetadata::get(
3124         llvm::ConstantInt::get(CGM.Int32Ty, V));
3125   };
3126 
3127   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3128 
3129   // Create the offloading info metadata node.
3130   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3131 
3132   // Create function that emits metadata for each target region entry;
3133   auto &&TargetRegionMetadataEmitter =
3134       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3135        &GetMDString](
3136           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3137           unsigned Line,
3138           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3139         // Generate metadata for target regions. Each entry of this metadata
3140         // contains:
3141         // - Entry 0 -> Kind of this type of metadata (0).
3142         // - Entry 1 -> Device ID of the file where the entry was identified.
3143         // - Entry 2 -> File ID of the file where the entry was identified.
3144         // - Entry 3 -> Mangled name of the function where the entry was
3145         // identified.
3146         // - Entry 4 -> Line in the file where the entry was identified.
3147         // - Entry 5 -> Order the entry was created.
3148         // The first element of the metadata node is the kind.
3149         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3150                                  GetMDInt(FileID),      GetMDString(ParentName),
3151                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3152 
3153         SourceLocation Loc;
3154         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3155                   E = CGM.getContext().getSourceManager().fileinfo_end();
3156              I != E; ++I) {
3157           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3158               I->getFirst()->getUniqueID().getFile() == FileID) {
3159             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3160                 I->getFirst(), Line, 1);
3161             break;
3162           }
3163         }
3164         // Save this entry in the right position of the ordered entries array.
3165         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3166         ParentFunctions[E.getOrder()] = ParentName;
3167 
3168         // Add metadata to the named metadata node.
3169         MD->addOperand(llvm::MDNode::get(C, Ops));
3170       };
3171 
3172   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3173       TargetRegionMetadataEmitter);
3174 
3175   // Create function that emits metadata for each device global variable entry;
3176   auto &&DeviceGlobalVarMetadataEmitter =
3177       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3178        MD](StringRef MangledName,
3179            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3180                &E) {
3181         // Generate metadata for global variables. Each entry of this metadata
3182         // contains:
3183         // - Entry 0 -> Kind of this type of metadata (1).
3184         // - Entry 1 -> Mangled name of the variable.
3185         // - Entry 2 -> Declare target kind.
3186         // - Entry 3 -> Order the entry was created.
3187         // The first element of the metadata node is the kind.
3188         llvm::Metadata *Ops[] = {
3189             GetMDInt(E.getKind()), GetMDString(MangledName),
3190             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3191 
3192         // Save this entry in the right position of the ordered entries array.
3193         OrderedEntries[E.getOrder()] =
3194             std::make_tuple(&E, SourceLocation(), MangledName);
3195 
3196         // Add metadata to the named metadata node.
3197         MD->addOperand(llvm::MDNode::get(C, Ops));
3198       };
3199 
3200   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3201       DeviceGlobalVarMetadataEmitter);
3202 
3203   for (const auto &E : OrderedEntries) {
3204     assert(std::get<0>(E) && "All ordered entries must exist!");
3205     if (const auto *CE =
3206             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3207                 std::get<0>(E))) {
3208       if (!CE->getID() || !CE->getAddress()) {
3209         // Do not blame the entry if the parent funtion is not emitted.
3210         StringRef FnName = ParentFunctions[CE->getOrder()];
3211         if (!CGM.GetGlobalValue(FnName))
3212           continue;
3213         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3214             DiagnosticsEngine::Error,
3215             "Offloading entry for target region in %0 is incorrect: either the "
3216             "address or the ID is invalid.");
3217         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3218         continue;
3219       }
3220       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3221                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3222     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3223                                              OffloadEntryInfoDeviceGlobalVar>(
3224                    std::get<0>(E))) {
3225       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3226           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3227               CE->getFlags());
3228       switch (Flags) {
3229       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3230         if (CGM.getLangOpts().OpenMPIsDevice &&
3231             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3232           continue;
3233         if (!CE->getAddress()) {
3234           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3235               DiagnosticsEngine::Error, "Offloading entry for declare target "
3236                                         "variable %0 is incorrect: the "
3237                                         "address is invalid.");
3238           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3239           continue;
3240         }
3241         // The vaiable has no definition - no need to add the entry.
3242         if (CE->getVarSize().isZero())
3243           continue;
3244         break;
3245       }
3246       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3247         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3248                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3249                "Declaret target link address is set.");
3250         if (CGM.getLangOpts().OpenMPIsDevice)
3251           continue;
3252         if (!CE->getAddress()) {
3253           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3254               DiagnosticsEngine::Error,
3255               "Offloading entry for declare target variable is incorrect: the "
3256               "address is invalid.");
3257           CGM.getDiags().Report(DiagID);
3258           continue;
3259         }
3260         break;
3261       }
3262 
3263       // Hidden or internal symbols on the device are not externally visible. We
3264       // should not attempt to register them by creating an offloading entry.
3265       if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress()))
3266         if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
3267           continue;
3268 
3269       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3270                          CE->getVarSize().getQuantity(), Flags,
3271                          CE->getLinkage());
3272     } else {
3273       llvm_unreachable("Unsupported entry kind.");
3274     }
3275   }
3276 }
3277 
3278 /// Loads all the offload entries information from the host IR
3279 /// metadata.
loadOffloadInfoMetadata()3280 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3281   // If we are in target mode, load the metadata from the host IR. This code has
3282   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3283 
3284   if (!CGM.getLangOpts().OpenMPIsDevice)
3285     return;
3286 
3287   if (CGM.getLangOpts().OMPHostIRFile.empty())
3288     return;
3289 
3290   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3291   if (auto EC = Buf.getError()) {
3292     CGM.getDiags().Report(diag::err_cannot_open_file)
3293         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3294     return;
3295   }
3296 
3297   llvm::LLVMContext C;
3298   auto ME = expectedToErrorOrAndEmitErrors(
3299       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3300 
3301   if (auto EC = ME.getError()) {
3302     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3303         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3304     CGM.getDiags().Report(DiagID)
3305         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3306     return;
3307   }
3308 
3309   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3310   if (!MD)
3311     return;
3312 
3313   for (llvm::MDNode *MN : MD->operands()) {
3314     auto &&GetMDInt = [MN](unsigned Idx) {
3315       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3316       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3317     };
3318 
3319     auto &&GetMDString = [MN](unsigned Idx) {
3320       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3321       return V->getString();
3322     };
3323 
3324     switch (GetMDInt(0)) {
3325     default:
3326       llvm_unreachable("Unexpected metadata!");
3327       break;
3328     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3329         OffloadingEntryInfoTargetRegion:
3330       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3331           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3332           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3333           /*Order=*/GetMDInt(5));
3334       break;
3335     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3336         OffloadingEntryInfoDeviceGlobalVar:
3337       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3338           /*MangledName=*/GetMDString(1),
3339           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3340               /*Flags=*/GetMDInt(2)),
3341           /*Order=*/GetMDInt(3));
3342       break;
3343     }
3344   }
3345 }
3346 
emitKmpRoutineEntryT(QualType KmpInt32Ty)3347 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3348   if (!KmpRoutineEntryPtrTy) {
3349     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3350     ASTContext &C = CGM.getContext();
3351     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3352     FunctionProtoType::ExtProtoInfo EPI;
3353     KmpRoutineEntryPtrQTy = C.getPointerType(
3354         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3355     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3356   }
3357 }
3358 
3359 namespace {
3360 struct PrivateHelpersTy {
PrivateHelpersTy__anond842882b1111::PrivateHelpersTy3361   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3362                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3363       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3364         PrivateElemInit(PrivateElemInit) {}
PrivateHelpersTy__anond842882b1111::PrivateHelpersTy3365   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3366   const Expr *OriginalRef = nullptr;
3367   const VarDecl *Original = nullptr;
3368   const VarDecl *PrivateCopy = nullptr;
3369   const VarDecl *PrivateElemInit = nullptr;
isLocalPrivate__anond842882b1111::PrivateHelpersTy3370   bool isLocalPrivate() const {
3371     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3372   }
3373 };
3374 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3375 } // anonymous namespace
3376 
isAllocatableDecl(const VarDecl * VD)3377 static bool isAllocatableDecl(const VarDecl *VD) {
3378   const VarDecl *CVD = VD->getCanonicalDecl();
3379   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3380     return false;
3381   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3382   // Use the default allocation.
3383   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3384            !AA->getAllocator());
3385 }
3386 
3387 static RecordDecl *
createPrivatesRecordDecl(CodeGenModule & CGM,ArrayRef<PrivateDataTy> Privates)3388 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3389   if (!Privates.empty()) {
3390     ASTContext &C = CGM.getContext();
3391     // Build struct .kmp_privates_t. {
3392     //         /*  private vars  */
3393     //       };
3394     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3395     RD->startDefinition();
3396     for (const auto &Pair : Privates) {
3397       const VarDecl *VD = Pair.second.Original;
3398       QualType Type = VD->getType().getNonReferenceType();
3399       // If the private variable is a local variable with lvalue ref type,
3400       // allocate the pointer instead of the pointee type.
3401       if (Pair.second.isLocalPrivate()) {
3402         if (VD->getType()->isLValueReferenceType())
3403           Type = C.getPointerType(Type);
3404         if (isAllocatableDecl(VD))
3405           Type = C.getPointerType(Type);
3406       }
3407       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3408       if (VD->hasAttrs()) {
3409         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3410              E(VD->getAttrs().end());
3411              I != E; ++I)
3412           FD->addAttr(*I);
3413       }
3414     }
3415     RD->completeDefinition();
3416     return RD;
3417   }
3418   return nullptr;
3419 }
3420 
3421 static RecordDecl *
createKmpTaskTRecordDecl(CodeGenModule & CGM,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpRoutineEntryPointerQTy)3422 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3423                          QualType KmpInt32Ty,
3424                          QualType KmpRoutineEntryPointerQTy) {
3425   ASTContext &C = CGM.getContext();
3426   // Build struct kmp_task_t {
3427   //         void *              shareds;
3428   //         kmp_routine_entry_t routine;
3429   //         kmp_int32           part_id;
3430   //         kmp_cmplrdata_t data1;
3431   //         kmp_cmplrdata_t data2;
3432   // For taskloops additional fields:
3433   //         kmp_uint64          lb;
3434   //         kmp_uint64          ub;
3435   //         kmp_int64           st;
3436   //         kmp_int32           liter;
3437   //         void *              reductions;
3438   //       };
3439   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3440   UD->startDefinition();
3441   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3442   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3443   UD->completeDefinition();
3444   QualType KmpCmplrdataTy = C.getRecordType(UD);
3445   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3446   RD->startDefinition();
3447   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3448   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3449   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3450   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3451   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3452   if (isOpenMPTaskLoopDirective(Kind)) {
3453     QualType KmpUInt64Ty =
3454         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3455     QualType KmpInt64Ty =
3456         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3457     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3458     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3459     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3460     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3461     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3462   }
3463   RD->completeDefinition();
3464   return RD;
3465 }
3466 
3467 static RecordDecl *
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule & CGM,QualType KmpTaskTQTy,ArrayRef<PrivateDataTy> Privates)3468 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3469                                      ArrayRef<PrivateDataTy> Privates) {
3470   ASTContext &C = CGM.getContext();
3471   // Build struct kmp_task_t_with_privates {
3472   //         kmp_task_t task_data;
3473   //         .kmp_privates_t. privates;
3474   //       };
3475   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3476   RD->startDefinition();
3477   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3478   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3479     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3480   RD->completeDefinition();
3481   return RD;
3482 }
3483 
3484 /// Emit a proxy function which accepts kmp_task_t as the second
3485 /// argument.
3486 /// \code
3487 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3488 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3489 ///   For taskloops:
3490 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3491 ///   tt->reductions, tt->shareds);
3492 ///   return 0;
3493 /// }
3494 /// \endcode
3495 static llvm::Function *
emitProxyTaskFunction(CodeGenModule & CGM,SourceLocation Loc,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy,QualType KmpTaskTQTy,QualType SharedsPtrTy,llvm::Function * TaskFunction,llvm::Value * TaskPrivatesMap)3496 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3497                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3498                       QualType KmpTaskTWithPrivatesPtrQTy,
3499                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3500                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3501                       llvm::Value *TaskPrivatesMap) {
3502   ASTContext &C = CGM.getContext();
3503   FunctionArgList Args;
3504   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3505                             ImplicitParamDecl::Other);
3506   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3507                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3508                                 ImplicitParamDecl::Other);
3509   Args.push_back(&GtidArg);
3510   Args.push_back(&TaskTypeArg);
3511   const auto &TaskEntryFnInfo =
3512       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3513   llvm::FunctionType *TaskEntryTy =
3514       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3515   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3516   auto *TaskEntry = llvm::Function::Create(
3517       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3518   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3519   TaskEntry->setDoesNotRecurse();
3520   CodeGenFunction CGF(CGM);
3521   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3522                     Loc, Loc);
3523 
3524   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3525   // tt,
3526   // For taskloops:
3527   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3528   // tt->task_data.shareds);
3529   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3530       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3531   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3532       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3533       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3534   const auto *KmpTaskTWithPrivatesQTyRD =
3535       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3536   LValue Base =
3537       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3538   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3539   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3540   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3541   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3542 
3543   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3544   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3545   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3546       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3547       CGF.ConvertTypeForMem(SharedsPtrTy));
3548 
3549   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3550   llvm::Value *PrivatesParam;
3551   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3552     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3553     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3554         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3555   } else {
3556     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3557   }
3558 
3559   llvm::Value *CommonArgs[] = {
3560       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3561       CGF.Builder
3562           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3563                                                CGF.VoidPtrTy, CGF.Int8Ty)
3564           .getPointer()};
3565   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3566                                           std::end(CommonArgs));
3567   if (isOpenMPTaskLoopDirective(Kind)) {
3568     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3569     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3570     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3571     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3572     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3573     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3574     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3575     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3576     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3577     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3578     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3579     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3580     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3581     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3582     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3583     CallArgs.push_back(LBParam);
3584     CallArgs.push_back(UBParam);
3585     CallArgs.push_back(StParam);
3586     CallArgs.push_back(LIParam);
3587     CallArgs.push_back(RParam);
3588   }
3589   CallArgs.push_back(SharedsParam);
3590 
3591   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3592                                                   CallArgs);
3593   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3594                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3595   CGF.FinishFunction();
3596   return TaskEntry;
3597 }
3598 
emitDestructorsFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy)3599 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3600                                             SourceLocation Loc,
3601                                             QualType KmpInt32Ty,
3602                                             QualType KmpTaskTWithPrivatesPtrQTy,
3603                                             QualType KmpTaskTWithPrivatesQTy) {
3604   ASTContext &C = CGM.getContext();
3605   FunctionArgList Args;
3606   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3607                             ImplicitParamDecl::Other);
3608   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3609                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3610                                 ImplicitParamDecl::Other);
3611   Args.push_back(&GtidArg);
3612   Args.push_back(&TaskTypeArg);
3613   const auto &DestructorFnInfo =
3614       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3615   llvm::FunctionType *DestructorFnTy =
3616       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3617   std::string Name =
3618       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3619   auto *DestructorFn =
3620       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3621                              Name, &CGM.getModule());
3622   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3623                                     DestructorFnInfo);
3624   DestructorFn->setDoesNotRecurse();
3625   CodeGenFunction CGF(CGM);
3626   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3627                     Args, Loc, Loc);
3628 
3629   LValue Base = CGF.EmitLoadOfPointerLValue(
3630       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3631       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3632   const auto *KmpTaskTWithPrivatesQTyRD =
3633       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3634   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3635   Base = CGF.EmitLValueForField(Base, *FI);
3636   for (const auto *Field :
3637        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3638     if (QualType::DestructionKind DtorKind =
3639             Field->getType().isDestructedType()) {
3640       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3641       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3642     }
3643   }
3644   CGF.FinishFunction();
3645   return DestructorFn;
3646 }
3647 
3648 /// Emit a privates mapping function for correct handling of private and
3649 /// firstprivate variables.
3650 /// \code
3651 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3652 /// **noalias priv1,...,  <tyn> **noalias privn) {
3653 ///   *priv1 = &.privates.priv1;
3654 ///   ...;
3655 ///   *privn = &.privates.privn;
3656 /// }
3657 /// \endcode
3658 static llvm::Value *
emitTaskPrivateMappingFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPTaskDataTy & Data,QualType PrivatesQTy,ArrayRef<PrivateDataTy> Privates)3659 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3660                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3661                                ArrayRef<PrivateDataTy> Privates) {
3662   ASTContext &C = CGM.getContext();
3663   FunctionArgList Args;
3664   ImplicitParamDecl TaskPrivatesArg(
3665       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3666       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3667       ImplicitParamDecl::Other);
3668   Args.push_back(&TaskPrivatesArg);
3669   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3670   unsigned Counter = 1;
3671   for (const Expr *E : Data.PrivateVars) {
3672     Args.push_back(ImplicitParamDecl::Create(
3673         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3674         C.getPointerType(C.getPointerType(E->getType()))
3675             .withConst()
3676             .withRestrict(),
3677         ImplicitParamDecl::Other));
3678     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3679     PrivateVarsPos[VD] = Counter;
3680     ++Counter;
3681   }
3682   for (const Expr *E : Data.FirstprivateVars) {
3683     Args.push_back(ImplicitParamDecl::Create(
3684         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3685         C.getPointerType(C.getPointerType(E->getType()))
3686             .withConst()
3687             .withRestrict(),
3688         ImplicitParamDecl::Other));
3689     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3690     PrivateVarsPos[VD] = Counter;
3691     ++Counter;
3692   }
3693   for (const Expr *E : Data.LastprivateVars) {
3694     Args.push_back(ImplicitParamDecl::Create(
3695         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3696         C.getPointerType(C.getPointerType(E->getType()))
3697             .withConst()
3698             .withRestrict(),
3699         ImplicitParamDecl::Other));
3700     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3701     PrivateVarsPos[VD] = Counter;
3702     ++Counter;
3703   }
3704   for (const VarDecl *VD : Data.PrivateLocals) {
3705     QualType Ty = VD->getType().getNonReferenceType();
3706     if (VD->getType()->isLValueReferenceType())
3707       Ty = C.getPointerType(Ty);
3708     if (isAllocatableDecl(VD))
3709       Ty = C.getPointerType(Ty);
3710     Args.push_back(ImplicitParamDecl::Create(
3711         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3712         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3713         ImplicitParamDecl::Other));
3714     PrivateVarsPos[VD] = Counter;
3715     ++Counter;
3716   }
3717   const auto &TaskPrivatesMapFnInfo =
3718       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3719   llvm::FunctionType *TaskPrivatesMapTy =
3720       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3721   std::string Name =
3722       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3723   auto *TaskPrivatesMap = llvm::Function::Create(
3724       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3725       &CGM.getModule());
3726   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3727                                     TaskPrivatesMapFnInfo);
3728   if (CGM.getLangOpts().Optimize) {
3729     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3730     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3731     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3732   }
3733   CodeGenFunction CGF(CGM);
3734   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3735                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3736 
3737   // *privi = &.privates.privi;
3738   LValue Base = CGF.EmitLoadOfPointerLValue(
3739       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3740       TaskPrivatesArg.getType()->castAs<PointerType>());
3741   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3742   Counter = 0;
3743   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3744     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3745     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3746     LValue RefLVal =
3747         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3748     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3749         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3750     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3751     ++Counter;
3752   }
3753   CGF.FinishFunction();
3754   return TaskPrivatesMap;
3755 }
3756 
3757 /// Emit initialization for private variables in task-based directives.
emitPrivatesInit(CodeGenFunction & CGF,const OMPExecutableDirective & D,Address KmpTaskSharedsPtr,LValue TDBase,const RecordDecl * KmpTaskTWithPrivatesQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool ForDup)3758 static void emitPrivatesInit(CodeGenFunction &CGF,
3759                              const OMPExecutableDirective &D,
3760                              Address KmpTaskSharedsPtr, LValue TDBase,
3761                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3762                              QualType SharedsTy, QualType SharedsPtrTy,
3763                              const OMPTaskDataTy &Data,
3764                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3765   ASTContext &C = CGF.getContext();
3766   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3767   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3768   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3769                                  ? OMPD_taskloop
3770                                  : OMPD_task;
3771   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3772   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3773   LValue SrcBase;
3774   bool IsTargetTask =
3775       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3776       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3777   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3778   // PointersArray, SizesArray, and MappersArray. The original variables for
3779   // these arrays are not captured and we get their addresses explicitly.
3780   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3781       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3782     SrcBase = CGF.MakeAddrLValue(
3783         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3784             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3785             CGF.ConvertTypeForMem(SharedsTy)),
3786         SharedsTy);
3787   }
3788   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3789   for (const PrivateDataTy &Pair : Privates) {
3790     // Do not initialize private locals.
3791     if (Pair.second.isLocalPrivate()) {
3792       ++FI;
3793       continue;
3794     }
3795     const VarDecl *VD = Pair.second.PrivateCopy;
3796     const Expr *Init = VD->getAnyInitializer();
3797     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3798                              !CGF.isTrivialInitializer(Init)))) {
3799       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3800       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3801         const VarDecl *OriginalVD = Pair.second.Original;
3802         // Check if the variable is the target-based BasePointersArray,
3803         // PointersArray, SizesArray, or MappersArray.
3804         LValue SharedRefLValue;
3805         QualType Type = PrivateLValue.getType();
3806         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3807         if (IsTargetTask && !SharedField) {
3808           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3809                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3810                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3811                          ->getNumParams() == 0 &&
3812                  isa<TranslationUnitDecl>(
3813                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3814                          ->getDeclContext()) &&
3815                  "Expected artificial target data variable.");
3816           SharedRefLValue =
3817               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3818         } else if (ForDup) {
3819           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3820           SharedRefLValue = CGF.MakeAddrLValue(
3821               SharedRefLValue.getAddress(CGF).withAlignment(
3822                   C.getDeclAlign(OriginalVD)),
3823               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3824               SharedRefLValue.getTBAAInfo());
3825         } else if (CGF.LambdaCaptureFields.count(
3826                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3827                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3828           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3829         } else {
3830           // Processing for implicitly captured variables.
3831           InlinedOpenMPRegionRAII Region(
3832               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3833               /*HasCancel=*/false, /*NoInheritance=*/true);
3834           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3835         }
3836         if (Type->isArrayType()) {
3837           // Initialize firstprivate array.
3838           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3839             // Perform simple memcpy.
3840             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3841           } else {
3842             // Initialize firstprivate array using element-by-element
3843             // initialization.
3844             CGF.EmitOMPAggregateAssign(
3845                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3846                 Type,
3847                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3848                                                   Address SrcElement) {
3849                   // Clean up any temporaries needed by the initialization.
3850                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3851                   InitScope.addPrivate(Elem, SrcElement);
3852                   (void)InitScope.Privatize();
3853                   // Emit initialization for single element.
3854                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3855                       CGF, &CapturesInfo);
3856                   CGF.EmitAnyExprToMem(Init, DestElement,
3857                                        Init->getType().getQualifiers(),
3858                                        /*IsInitializer=*/false);
3859                 });
3860           }
3861         } else {
3862           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3863           InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3864           (void)InitScope.Privatize();
3865           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3866           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3867                              /*capturedByInit=*/false);
3868         }
3869       } else {
3870         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3871       }
3872     }
3873     ++FI;
3874   }
3875 }
3876 
3877 /// Check if duplication function is required for taskloops.
checkInitIsRequired(CodeGenFunction & CGF,ArrayRef<PrivateDataTy> Privates)3878 static bool checkInitIsRequired(CodeGenFunction &CGF,
3879                                 ArrayRef<PrivateDataTy> Privates) {
3880   bool InitRequired = false;
3881   for (const PrivateDataTy &Pair : Privates) {
3882     if (Pair.second.isLocalPrivate())
3883       continue;
3884     const VarDecl *VD = Pair.second.PrivateCopy;
3885     const Expr *Init = VD->getAnyInitializer();
3886     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3887                                     !CGF.isTrivialInitializer(Init));
3888     if (InitRequired)
3889       break;
3890   }
3891   return InitRequired;
3892 }
3893 
3894 
3895 /// Emit task_dup function (for initialization of
3896 /// private/firstprivate/lastprivate vars and last_iter flag)
3897 /// \code
3898 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3899 /// lastpriv) {
3900 /// // setup lastprivate flag
3901 ///    task_dst->last = lastpriv;
3902 /// // could be constructor calls here...
3903 /// }
3904 /// \endcode
3905 static llvm::Value *
emitTaskDupFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPExecutableDirective & D,QualType KmpTaskTWithPrivatesPtrQTy,const RecordDecl * KmpTaskTWithPrivatesQTyRD,const RecordDecl * KmpTaskTQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool WithLastIter)3906 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3907                     const OMPExecutableDirective &D,
3908                     QualType KmpTaskTWithPrivatesPtrQTy,
3909                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3910                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3911                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3912                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3913   ASTContext &C = CGM.getContext();
3914   FunctionArgList Args;
3915   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3916                            KmpTaskTWithPrivatesPtrQTy,
3917                            ImplicitParamDecl::Other);
3918   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3919                            KmpTaskTWithPrivatesPtrQTy,
3920                            ImplicitParamDecl::Other);
3921   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3922                                 ImplicitParamDecl::Other);
3923   Args.push_back(&DstArg);
3924   Args.push_back(&SrcArg);
3925   Args.push_back(&LastprivArg);
3926   const auto &TaskDupFnInfo =
3927       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3928   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3929   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3930   auto *TaskDup = llvm::Function::Create(
3931       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3932   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3933   TaskDup->setDoesNotRecurse();
3934   CodeGenFunction CGF(CGM);
3935   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3936                     Loc);
3937 
3938   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3939       CGF.GetAddrOfLocalVar(&DstArg),
3940       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3941   // task_dst->liter = lastpriv;
3942   if (WithLastIter) {
3943     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3944     LValue Base = CGF.EmitLValueForField(
3945         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3946     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3947     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3948         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3949     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3950   }
3951 
3952   // Emit initial values for private copies (if any).
3953   assert(!Privates.empty());
3954   Address KmpTaskSharedsPtr = Address::invalid();
3955   if (!Data.FirstprivateVars.empty()) {
3956     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3957         CGF.GetAddrOfLocalVar(&SrcArg),
3958         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3959     LValue Base = CGF.EmitLValueForField(
3960         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3961     KmpTaskSharedsPtr = Address(
3962         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3963                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3964                                                   KmpTaskTShareds)),
3965                              Loc),
3966         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3967   }
3968   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3969                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3970   CGF.FinishFunction();
3971   return TaskDup;
3972 }
3973 
3974 /// Checks if destructor function is required to be generated.
3975 /// \return true if cleanups are required, false otherwise.
3976 static bool
checkDestructorsRequired(const RecordDecl * KmpTaskTWithPrivatesQTyRD,ArrayRef<PrivateDataTy> Privates)3977 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3978                          ArrayRef<PrivateDataTy> Privates) {
3979   for (const PrivateDataTy &P : Privates) {
3980     if (P.second.isLocalPrivate())
3981       continue;
3982     QualType Ty = P.second.Original->getType().getNonReferenceType();
3983     if (Ty.isDestructedType())
3984       return true;
3985   }
3986   return false;
3987 }
3988 
3989 namespace {
3990 /// Loop generator for OpenMP iterator expression.
3991 class OMPIteratorGeneratorScope final
3992     : public CodeGenFunction::OMPPrivateScope {
3993   CodeGenFunction &CGF;
3994   const OMPIteratorExpr *E = nullptr;
3995   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3996   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3997   OMPIteratorGeneratorScope() = delete;
3998   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3999 
4000 public:
OMPIteratorGeneratorScope(CodeGenFunction & CGF,const OMPIteratorExpr * E)4001   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4002       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4003     if (!E)
4004       return;
4005     SmallVector<llvm::Value *, 4> Uppers;
4006     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4007       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4008       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4009       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
4010       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4011       addPrivate(
4012           HelperData.CounterVD,
4013           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
4014     }
4015     Privatize();
4016 
4017     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4018       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4019       LValue CLVal =
4020           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4021                              HelperData.CounterVD->getType());
4022       // Counter = 0;
4023       CGF.EmitStoreOfScalar(
4024           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4025           CLVal);
4026       CodeGenFunction::JumpDest &ContDest =
4027           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4028       CodeGenFunction::JumpDest &ExitDest =
4029           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4030       // N = <number-of_iterations>;
4031       llvm::Value *N = Uppers[I];
4032       // cont:
4033       // if (Counter < N) goto body; else goto exit;
4034       CGF.EmitBlock(ContDest.getBlock());
4035       auto *CVal =
4036           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4037       llvm::Value *Cmp =
4038           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4039               ? CGF.Builder.CreateICmpSLT(CVal, N)
4040               : CGF.Builder.CreateICmpULT(CVal, N);
4041       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4042       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4043       // body:
4044       CGF.EmitBlock(BodyBB);
4045       // Iteri = Begini + Counter * Stepi;
4046       CGF.EmitIgnoredExpr(HelperData.Update);
4047     }
4048   }
~OMPIteratorGeneratorScope()4049   ~OMPIteratorGeneratorScope() {
4050     if (!E)
4051       return;
4052     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4053       // Counter = Counter + 1;
4054       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4055       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4056       // goto cont;
4057       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4058       // exit:
4059       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4060     }
4061   }
4062 };
4063 } // namespace
4064 
4065 static std::pair<llvm::Value *, llvm::Value *>
getPointerAndSize(CodeGenFunction & CGF,const Expr * E)4066 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4067   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4068   llvm::Value *Addr;
4069   if (OASE) {
4070     const Expr *Base = OASE->getBase();
4071     Addr = CGF.EmitScalarExpr(Base);
4072   } else {
4073     Addr = CGF.EmitLValue(E).getPointer(CGF);
4074   }
4075   llvm::Value *SizeVal;
4076   QualType Ty = E->getType();
4077   if (OASE) {
4078     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4079     for (const Expr *SE : OASE->getDimensions()) {
4080       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4081       Sz = CGF.EmitScalarConversion(
4082           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4083       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4084     }
4085   } else if (const auto *ASE =
4086                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4087     LValue UpAddrLVal =
4088         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4089     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4090     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4091         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4092     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4093     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4094     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4095   } else {
4096     SizeVal = CGF.getTypeSize(Ty);
4097   }
4098   return std::make_pair(Addr, SizeVal);
4099 }
4100 
4101 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getKmpAffinityType(ASTContext & C,QualType & KmpTaskAffinityInfoTy)4102 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4103   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4104   if (KmpTaskAffinityInfoTy.isNull()) {
4105     RecordDecl *KmpAffinityInfoRD =
4106         C.buildImplicitRecord("kmp_task_affinity_info_t");
4107     KmpAffinityInfoRD->startDefinition();
4108     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4109     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4110     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4111     KmpAffinityInfoRD->completeDefinition();
4112     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4113   }
4114 }
4115 
4116 CGOpenMPRuntime::TaskResultTy
emitTaskInit(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const OMPTaskDataTy & Data)4117 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4118                               const OMPExecutableDirective &D,
4119                               llvm::Function *TaskFunction, QualType SharedsTy,
4120                               Address Shareds, const OMPTaskDataTy &Data) {
4121   ASTContext &C = CGM.getContext();
4122   llvm::SmallVector<PrivateDataTy, 4> Privates;
4123   // Aggregate privates and sort them by the alignment.
4124   const auto *I = Data.PrivateCopies.begin();
4125   for (const Expr *E : Data.PrivateVars) {
4126     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4127     Privates.emplace_back(
4128         C.getDeclAlign(VD),
4129         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4130                          /*PrivateElemInit=*/nullptr));
4131     ++I;
4132   }
4133   I = Data.FirstprivateCopies.begin();
4134   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4135   for (const Expr *E : Data.FirstprivateVars) {
4136     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4137     Privates.emplace_back(
4138         C.getDeclAlign(VD),
4139         PrivateHelpersTy(
4140             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4141             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4142     ++I;
4143     ++IElemInitRef;
4144   }
4145   I = Data.LastprivateCopies.begin();
4146   for (const Expr *E : Data.LastprivateVars) {
4147     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4148     Privates.emplace_back(
4149         C.getDeclAlign(VD),
4150         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4151                          /*PrivateElemInit=*/nullptr));
4152     ++I;
4153   }
4154   for (const VarDecl *VD : Data.PrivateLocals) {
4155     if (isAllocatableDecl(VD))
4156       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4157     else
4158       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4159   }
4160   llvm::stable_sort(Privates,
4161                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4162                       return L.first > R.first;
4163                     });
4164   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4165   // Build type kmp_routine_entry_t (if not built yet).
4166   emitKmpRoutineEntryT(KmpInt32Ty);
4167   // Build type kmp_task_t (if not built yet).
4168   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4169     if (SavedKmpTaskloopTQTy.isNull()) {
4170       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4171           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4172     }
4173     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4174   } else {
4175     assert((D.getDirectiveKind() == OMPD_task ||
4176             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4177             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4178            "Expected taskloop, task or target directive");
4179     if (SavedKmpTaskTQTy.isNull()) {
4180       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4181           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4182     }
4183     KmpTaskTQTy = SavedKmpTaskTQTy;
4184   }
4185   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4186   // Build particular struct kmp_task_t for the given task.
4187   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4188       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4189   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4190   QualType KmpTaskTWithPrivatesPtrQTy =
4191       C.getPointerType(KmpTaskTWithPrivatesQTy);
4192   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4193   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4194       KmpTaskTWithPrivatesTy->getPointerTo();
4195   llvm::Value *KmpTaskTWithPrivatesTySize =
4196       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4197   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4198 
4199   // Emit initial values for private copies (if any).
4200   llvm::Value *TaskPrivatesMap = nullptr;
4201   llvm::Type *TaskPrivatesMapTy =
4202       std::next(TaskFunction->arg_begin(), 3)->getType();
4203   if (!Privates.empty()) {
4204     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4205     TaskPrivatesMap =
4206         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4207     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4208         TaskPrivatesMap, TaskPrivatesMapTy);
4209   } else {
4210     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4211         cast<llvm::PointerType>(TaskPrivatesMapTy));
4212   }
4213   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4214   // kmp_task_t *tt);
4215   llvm::Function *TaskEntry = emitProxyTaskFunction(
4216       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4217       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4218       TaskPrivatesMap);
4219 
4220   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4221   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4222   // kmp_routine_entry_t *task_entry);
4223   // Task flags. Format is taken from
4224   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4225   // description of kmp_tasking_flags struct.
4226   enum {
4227     TiedFlag = 0x1,
4228     FinalFlag = 0x2,
4229     DestructorsFlag = 0x8,
4230     PriorityFlag = 0x20,
4231     DetachableFlag = 0x40,
4232   };
4233   unsigned Flags = Data.Tied ? TiedFlag : 0;
4234   bool NeedsCleanup = false;
4235   if (!Privates.empty()) {
4236     NeedsCleanup =
4237         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4238     if (NeedsCleanup)
4239       Flags = Flags | DestructorsFlag;
4240   }
4241   if (Data.Priority.getInt())
4242     Flags = Flags | PriorityFlag;
4243   if (D.hasClausesOfKind<OMPDetachClause>())
4244     Flags = Flags | DetachableFlag;
4245   llvm::Value *TaskFlags =
4246       Data.Final.getPointer()
4247           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4248                                      CGF.Builder.getInt32(FinalFlag),
4249                                      CGF.Builder.getInt32(/*C=*/0))
4250           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4251   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4252   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4253   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4254       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4255       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4256           TaskEntry, KmpRoutineEntryPtrTy)};
4257   llvm::Value *NewTask;
4258   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4259     // Check if we have any device clause associated with the directive.
4260     const Expr *Device = nullptr;
4261     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4262       Device = C->getDevice();
4263     // Emit device ID if any otherwise use default value.
4264     llvm::Value *DeviceID;
4265     if (Device)
4266       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4267                                            CGF.Int64Ty, /*isSigned=*/true);
4268     else
4269       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4270     AllocArgs.push_back(DeviceID);
4271     NewTask = CGF.EmitRuntimeCall(
4272         OMPBuilder.getOrCreateRuntimeFunction(
4273             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4274         AllocArgs);
4275   } else {
4276     NewTask =
4277         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4278                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4279                             AllocArgs);
4280   }
4281   // Emit detach clause initialization.
4282   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4283   // task_descriptor);
4284   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4285     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4286     LValue EvtLVal = CGF.EmitLValue(Evt);
4287 
4288     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4289     // int gtid, kmp_task_t *task);
4290     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4291     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4292     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4293     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4294         OMPBuilder.getOrCreateRuntimeFunction(
4295             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4296         {Loc, Tid, NewTask});
4297     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4298                                       Evt->getExprLoc());
4299     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4300   }
4301   // Process affinity clauses.
4302   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4303     // Process list of affinity data.
4304     ASTContext &C = CGM.getContext();
4305     Address AffinitiesArray = Address::invalid();
4306     // Calculate number of elements to form the array of affinity data.
4307     llvm::Value *NumOfElements = nullptr;
4308     unsigned NumAffinities = 0;
4309     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4310       if (const Expr *Modifier = C->getModifier()) {
4311         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4312         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4313           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4314           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4315           NumOfElements =
4316               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4317         }
4318       } else {
4319         NumAffinities += C->varlist_size();
4320       }
4321     }
4322     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4323     // Fields ids in kmp_task_affinity_info record.
4324     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4325 
4326     QualType KmpTaskAffinityInfoArrayTy;
4327     if (NumOfElements) {
4328       NumOfElements = CGF.Builder.CreateNUWAdd(
4329           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4330       auto *OVE = new (C) OpaqueValueExpr(
4331           Loc,
4332           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4333           VK_PRValue);
4334       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4335                                                     RValue::get(NumOfElements));
4336       KmpTaskAffinityInfoArrayTy =
4337           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4338                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4339       // Properly emit variable-sized array.
4340       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4341                                            ImplicitParamDecl::Other);
4342       CGF.EmitVarDecl(*PD);
4343       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4344       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4345                                                 /*isSigned=*/false);
4346     } else {
4347       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4348           KmpTaskAffinityInfoTy,
4349           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4350           ArrayType::Normal, /*IndexTypeQuals=*/0);
4351       AffinitiesArray =
4352           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4353       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4354       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4355                                              /*isSigned=*/false);
4356     }
4357 
4358     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4359     // Fill array by elements without iterators.
4360     unsigned Pos = 0;
4361     bool HasIterator = false;
4362     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4363       if (C->getModifier()) {
4364         HasIterator = true;
4365         continue;
4366       }
4367       for (const Expr *E : C->varlists()) {
4368         llvm::Value *Addr;
4369         llvm::Value *Size;
4370         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4371         LValue Base =
4372             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4373                                KmpTaskAffinityInfoTy);
4374         // affs[i].base_addr = &<Affinities[i].second>;
4375         LValue BaseAddrLVal = CGF.EmitLValueForField(
4376             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4377         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4378                               BaseAddrLVal);
4379         // affs[i].len = sizeof(<Affinities[i].second>);
4380         LValue LenLVal = CGF.EmitLValueForField(
4381             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4382         CGF.EmitStoreOfScalar(Size, LenLVal);
4383         ++Pos;
4384       }
4385     }
4386     LValue PosLVal;
4387     if (HasIterator) {
4388       PosLVal = CGF.MakeAddrLValue(
4389           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4390           C.getSizeType());
4391       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4392     }
4393     // Process elements with iterators.
4394     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4395       const Expr *Modifier = C->getModifier();
4396       if (!Modifier)
4397         continue;
4398       OMPIteratorGeneratorScope IteratorScope(
4399           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4400       for (const Expr *E : C->varlists()) {
4401         llvm::Value *Addr;
4402         llvm::Value *Size;
4403         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4404         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4405         LValue Base = CGF.MakeAddrLValue(
4406             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4407         // affs[i].base_addr = &<Affinities[i].second>;
4408         LValue BaseAddrLVal = CGF.EmitLValueForField(
4409             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4410         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4411                               BaseAddrLVal);
4412         // affs[i].len = sizeof(<Affinities[i].second>);
4413         LValue LenLVal = CGF.EmitLValueForField(
4414             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4415         CGF.EmitStoreOfScalar(Size, LenLVal);
4416         Idx = CGF.Builder.CreateNUWAdd(
4417             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4418         CGF.EmitStoreOfScalar(Idx, PosLVal);
4419       }
4420     }
4421     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4422     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4423     // naffins, kmp_task_affinity_info_t *affin_list);
4424     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4425     llvm::Value *GTid = getThreadID(CGF, Loc);
4426     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4427         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4428     // FIXME: Emit the function and ignore its result for now unless the
4429     // runtime function is properly implemented.
4430     (void)CGF.EmitRuntimeCall(
4431         OMPBuilder.getOrCreateRuntimeFunction(
4432             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4433         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4434   }
4435   llvm::Value *NewTaskNewTaskTTy =
4436       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4437           NewTask, KmpTaskTWithPrivatesPtrTy);
4438   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4439                                                KmpTaskTWithPrivatesQTy);
4440   LValue TDBase =
4441       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4442   // Fill the data in the resulting kmp_task_t record.
4443   // Copy shareds if there are any.
4444   Address KmpTaskSharedsPtr = Address::invalid();
4445   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4446     KmpTaskSharedsPtr = Address(
4447         CGF.EmitLoadOfScalar(
4448             CGF.EmitLValueForField(
4449                 TDBase,
4450                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4451             Loc),
4452         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4453     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4454     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4455     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4456   }
4457   // Emit initial values for private copies (if any).
4458   TaskResultTy Result;
4459   if (!Privates.empty()) {
4460     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4461                      SharedsTy, SharedsPtrTy, Data, Privates,
4462                      /*ForDup=*/false);
4463     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4464         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4465       Result.TaskDupFn = emitTaskDupFunction(
4466           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4467           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4468           /*WithLastIter=*/!Data.LastprivateVars.empty());
4469     }
4470   }
4471   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4472   enum { Priority = 0, Destructors = 1 };
4473   // Provide pointer to function with destructors for privates.
4474   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4475   const RecordDecl *KmpCmplrdataUD =
4476       (*FI)->getType()->getAsUnionType()->getDecl();
4477   if (NeedsCleanup) {
4478     llvm::Value *DestructorFn = emitDestructorsFunction(
4479         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4480         KmpTaskTWithPrivatesQTy);
4481     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4482     LValue DestructorsLV = CGF.EmitLValueForField(
4483         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4484     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4485                               DestructorFn, KmpRoutineEntryPtrTy),
4486                           DestructorsLV);
4487   }
4488   // Set priority.
4489   if (Data.Priority.getInt()) {
4490     LValue Data2LV = CGF.EmitLValueForField(
4491         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4492     LValue PriorityLV = CGF.EmitLValueForField(
4493         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4494     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4495   }
4496   Result.NewTask = NewTask;
4497   Result.TaskEntry = TaskEntry;
4498   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4499   Result.TDBase = TDBase;
4500   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4501   return Result;
4502 }
4503 
4504 namespace {
4505 /// Dependence kind for RTL.
4506 enum RTLDependenceKindTy {
4507   DepIn = 0x01,
4508   DepInOut = 0x3,
4509   DepMutexInOutSet = 0x4,
4510   DepInOutSet = 0x8,
4511   DepOmpAllMem = 0x80,
4512 };
4513 /// Fields ids in kmp_depend_info record.
4514 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4515 } // namespace
4516 
4517 /// Translates internal dependency kind into the runtime kind.
translateDependencyKind(OpenMPDependClauseKind K)4518 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4519   RTLDependenceKindTy DepKind;
4520   switch (K) {
4521   case OMPC_DEPEND_in:
4522     DepKind = DepIn;
4523     break;
4524   // Out and InOut dependencies must use the same code.
4525   case OMPC_DEPEND_out:
4526   case OMPC_DEPEND_inout:
4527     DepKind = DepInOut;
4528     break;
4529   case OMPC_DEPEND_mutexinoutset:
4530     DepKind = DepMutexInOutSet;
4531     break;
4532   case OMPC_DEPEND_inoutset:
4533     DepKind = DepInOutSet;
4534     break;
4535   case OMPC_DEPEND_outallmemory:
4536     DepKind = DepOmpAllMem;
4537     break;
4538   case OMPC_DEPEND_source:
4539   case OMPC_DEPEND_sink:
4540   case OMPC_DEPEND_depobj:
4541   case OMPC_DEPEND_inoutallmemory:
4542   case OMPC_DEPEND_unknown:
4543     llvm_unreachable("Unknown task dependence type");
4544   }
4545   return DepKind;
4546 }
4547 
4548 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getDependTypes(ASTContext & C,QualType & KmpDependInfoTy,QualType & FlagsTy)4549 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4550                            QualType &FlagsTy) {
4551   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4552   if (KmpDependInfoTy.isNull()) {
4553     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4554     KmpDependInfoRD->startDefinition();
4555     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4556     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4557     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4558     KmpDependInfoRD->completeDefinition();
4559     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4560   }
4561 }
4562 
4563 std::pair<llvm::Value *, LValue>
getDepobjElements(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4564 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4565                                    SourceLocation Loc) {
4566   ASTContext &C = CGM.getContext();
4567   QualType FlagsTy;
4568   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4569   RecordDecl *KmpDependInfoRD =
4570       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4571   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4572   LValue Base = CGF.EmitLoadOfPointerLValue(
4573       CGF.Builder.CreateElementBitCast(
4574           DepobjLVal.getAddress(CGF),
4575           CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4576       KmpDependInfoPtrTy->castAs<PointerType>());
4577   Address DepObjAddr = CGF.Builder.CreateGEP(
4578       Base.getAddress(CGF),
4579       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4580   LValue NumDepsBase = CGF.MakeAddrLValue(
4581       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4582   // NumDeps = deps[i].base_addr;
4583   LValue BaseAddrLVal = CGF.EmitLValueForField(
4584       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4585   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4586   return std::make_pair(NumDeps, Base);
4587 }
4588 
emitDependData(CodeGenFunction & CGF,QualType & KmpDependInfoTy,llvm::PointerUnion<unsigned *,LValue * > Pos,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4589 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4590                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4591                            const OMPTaskDataTy::DependData &Data,
4592                            Address DependenciesArray) {
4593   CodeGenModule &CGM = CGF.CGM;
4594   ASTContext &C = CGM.getContext();
4595   QualType FlagsTy;
4596   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4597   RecordDecl *KmpDependInfoRD =
4598       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4599   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4600 
4601   OMPIteratorGeneratorScope IteratorScope(
4602       CGF, cast_or_null<OMPIteratorExpr>(
4603                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4604                                  : nullptr));
4605   for (const Expr *E : Data.DepExprs) {
4606     llvm::Value *Addr;
4607     llvm::Value *Size;
4608 
4609     // The expression will be a nullptr in the 'omp_all_memory' case.
4610     if (E) {
4611       std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4612       Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4613     } else {
4614       Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4615       Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4616     }
4617     LValue Base;
4618     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4619       Base = CGF.MakeAddrLValue(
4620           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4621     } else {
4622       assert(E && "Expected a non-null expression");
4623       LValue &PosLVal = *Pos.get<LValue *>();
4624       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4625       Base = CGF.MakeAddrLValue(
4626           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4627     }
4628     // deps[i].base_addr = &<Dependencies[i].second>;
4629     LValue BaseAddrLVal = CGF.EmitLValueForField(
4630         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4631     CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4632     // deps[i].len = sizeof(<Dependencies[i].second>);
4633     LValue LenLVal = CGF.EmitLValueForField(
4634         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4635     CGF.EmitStoreOfScalar(Size, LenLVal);
4636     // deps[i].flags = <Dependencies[i].first>;
4637     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4638     LValue FlagsLVal = CGF.EmitLValueForField(
4639         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4640     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4641                           FlagsLVal);
4642     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4643       ++(*P);
4644     } else {
4645       LValue &PosLVal = *Pos.get<LValue *>();
4646       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4647       Idx = CGF.Builder.CreateNUWAdd(Idx,
4648                                      llvm::ConstantInt::get(Idx->getType(), 1));
4649       CGF.EmitStoreOfScalar(Idx, PosLVal);
4650     }
4651   }
4652 }
4653 
emitDepobjElementsSizes(CodeGenFunction & CGF,QualType & KmpDependInfoTy,const OMPTaskDataTy::DependData & Data)4654 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4655     CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4656     const OMPTaskDataTy::DependData &Data) {
4657   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4658          "Expected depobj dependecy kind.");
4659   SmallVector<llvm::Value *, 4> Sizes;
4660   SmallVector<LValue, 4> SizeLVals;
4661   ASTContext &C = CGF.getContext();
4662   {
4663     OMPIteratorGeneratorScope IteratorScope(
4664         CGF, cast_or_null<OMPIteratorExpr>(
4665                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4666                                    : nullptr));
4667     for (const Expr *E : Data.DepExprs) {
4668       llvm::Value *NumDeps;
4669       LValue Base;
4670       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4671       std::tie(NumDeps, Base) =
4672           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4673       LValue NumLVal = CGF.MakeAddrLValue(
4674           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4675           C.getUIntPtrType());
4676       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4677                               NumLVal.getAddress(CGF));
4678       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4679       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4680       CGF.EmitStoreOfScalar(Add, NumLVal);
4681       SizeLVals.push_back(NumLVal);
4682     }
4683   }
4684   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4685     llvm::Value *Size =
4686         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4687     Sizes.push_back(Size);
4688   }
4689   return Sizes;
4690 }
4691 
emitDepobjElements(CodeGenFunction & CGF,QualType & KmpDependInfoTy,LValue PosLVal,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4692 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4693                                          QualType &KmpDependInfoTy,
4694                                          LValue PosLVal,
4695                                          const OMPTaskDataTy::DependData &Data,
4696                                          Address DependenciesArray) {
4697   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4698          "Expected depobj dependecy kind.");
4699   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4700   {
4701     OMPIteratorGeneratorScope IteratorScope(
4702         CGF, cast_or_null<OMPIteratorExpr>(
4703                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4704                                    : nullptr));
4705     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4706       const Expr *E = Data.DepExprs[I];
4707       llvm::Value *NumDeps;
4708       LValue Base;
4709       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4710       std::tie(NumDeps, Base) =
4711           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4712 
4713       // memcopy dependency data.
4714       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4715           ElSize,
4716           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4717       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4718       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4719       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4720 
4721       // Increase pos.
4722       // pos += size;
4723       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4724       CGF.EmitStoreOfScalar(Add, PosLVal);
4725     }
4726   }
4727 }
4728 
emitDependClause(CodeGenFunction & CGF,ArrayRef<OMPTaskDataTy::DependData> Dependencies,SourceLocation Loc)4729 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4730     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4731     SourceLocation Loc) {
4732   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4733         return D.DepExprs.empty();
4734       }))
4735     return std::make_pair(nullptr, Address::invalid());
4736   // Process list of dependencies.
4737   ASTContext &C = CGM.getContext();
4738   Address DependenciesArray = Address::invalid();
4739   llvm::Value *NumOfElements = nullptr;
4740   unsigned NumDependencies = std::accumulate(
4741       Dependencies.begin(), Dependencies.end(), 0,
4742       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4743         return D.DepKind == OMPC_DEPEND_depobj
4744                    ? V
4745                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4746       });
4747   QualType FlagsTy;
4748   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4749   bool HasDepobjDeps = false;
4750   bool HasRegularWithIterators = false;
4751   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4752   llvm::Value *NumOfRegularWithIterators =
4753       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4754   // Calculate number of depobj dependecies and regular deps with the iterators.
4755   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4756     if (D.DepKind == OMPC_DEPEND_depobj) {
4757       SmallVector<llvm::Value *, 4> Sizes =
4758           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4759       for (llvm::Value *Size : Sizes) {
4760         NumOfDepobjElements =
4761             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4762       }
4763       HasDepobjDeps = true;
4764       continue;
4765     }
4766     // Include number of iterations, if any.
4767 
4768     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4769       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4770         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4771         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4772         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4773             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4774         NumOfRegularWithIterators =
4775             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4776       }
4777       HasRegularWithIterators = true;
4778       continue;
4779     }
4780   }
4781 
4782   QualType KmpDependInfoArrayTy;
4783   if (HasDepobjDeps || HasRegularWithIterators) {
4784     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4785                                            /*isSigned=*/false);
4786     if (HasDepobjDeps) {
4787       NumOfElements =
4788           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4789     }
4790     if (HasRegularWithIterators) {
4791       NumOfElements =
4792           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4793     }
4794     auto *OVE = new (C) OpaqueValueExpr(
4795         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4796         VK_PRValue);
4797     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4798                                                   RValue::get(NumOfElements));
4799     KmpDependInfoArrayTy =
4800         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4801                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4802     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4803     // Properly emit variable-sized array.
4804     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4805                                          ImplicitParamDecl::Other);
4806     CGF.EmitVarDecl(*PD);
4807     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4808     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4809                                               /*isSigned=*/false);
4810   } else {
4811     KmpDependInfoArrayTy = C.getConstantArrayType(
4812         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4813         ArrayType::Normal, /*IndexTypeQuals=*/0);
4814     DependenciesArray =
4815         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4816     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4817     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4818                                            /*isSigned=*/false);
4819   }
4820   unsigned Pos = 0;
4821   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4822     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4823         Dependencies[I].IteratorExpr)
4824       continue;
4825     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4826                    DependenciesArray);
4827   }
4828   // Copy regular dependecies with iterators.
4829   LValue PosLVal = CGF.MakeAddrLValue(
4830       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4831   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4832   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4833     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4834         !Dependencies[I].IteratorExpr)
4835       continue;
4836     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4837                    DependenciesArray);
4838   }
4839   // Copy final depobj arrays without iterators.
4840   if (HasDepobjDeps) {
4841     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4842       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4843         continue;
4844       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4845                          DependenciesArray);
4846     }
4847   }
4848   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4849       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4850   return std::make_pair(NumOfElements, DependenciesArray);
4851 }
4852 
emitDepobjDependClause(CodeGenFunction & CGF,const OMPTaskDataTy::DependData & Dependencies,SourceLocation Loc)4853 Address CGOpenMPRuntime::emitDepobjDependClause(
4854     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4855     SourceLocation Loc) {
4856   if (Dependencies.DepExprs.empty())
4857     return Address::invalid();
4858   // Process list of dependencies.
4859   ASTContext &C = CGM.getContext();
4860   Address DependenciesArray = Address::invalid();
4861   unsigned NumDependencies = Dependencies.DepExprs.size();
4862   QualType FlagsTy;
4863   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4864   RecordDecl *KmpDependInfoRD =
4865       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4866 
4867   llvm::Value *Size;
4868   // Define type kmp_depend_info[<Dependencies.size()>];
4869   // For depobj reserve one extra element to store the number of elements.
4870   // It is required to handle depobj(x) update(in) construct.
4871   // kmp_depend_info[<Dependencies.size()>] deps;
4872   llvm::Value *NumDepsVal;
4873   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4874   if (const auto *IE =
4875           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4876     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4877     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4878       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4879       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4880       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4881     }
4882     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4883                                     NumDepsVal);
4884     CharUnits SizeInBytes =
4885         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4886     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4887     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4888     NumDepsVal =
4889         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4890   } else {
4891     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4892         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4893         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4894     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4895     Size = CGM.getSize(Sz.alignTo(Align));
4896     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4897   }
4898   // Need to allocate on the dynamic memory.
4899   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4900   // Use default allocator.
4901   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4902   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4903 
4904   llvm::Value *Addr =
4905       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4906                               CGM.getModule(), OMPRTL___kmpc_alloc),
4907                           Args, ".dep.arr.addr");
4908   llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4909   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4910       Addr, KmpDependInfoLlvmTy->getPointerTo());
4911   DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4912   // Write number of elements in the first element of array for depobj.
4913   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4914   // deps[i].base_addr = NumDependencies;
4915   LValue BaseAddrLVal = CGF.EmitLValueForField(
4916       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4917   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4918   llvm::PointerUnion<unsigned *, LValue *> Pos;
4919   unsigned Idx = 1;
4920   LValue PosLVal;
4921   if (Dependencies.IteratorExpr) {
4922     PosLVal = CGF.MakeAddrLValue(
4923         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4924         C.getSizeType());
4925     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4926                           /*IsInit=*/true);
4927     Pos = &PosLVal;
4928   } else {
4929     Pos = &Idx;
4930   }
4931   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4932   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4933       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4934       CGF.Int8Ty);
4935   return DependenciesArray;
4936 }
4937 
emitDestroyClause(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4938 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4939                                         SourceLocation Loc) {
4940   ASTContext &C = CGM.getContext();
4941   QualType FlagsTy;
4942   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4943   LValue Base = CGF.EmitLoadOfPointerLValue(
4944       DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4945   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4946   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4947       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4948       CGF.ConvertTypeForMem(KmpDependInfoTy));
4949   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4950       Addr.getElementType(), Addr.getPointer(),
4951       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4952   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4953                                                                CGF.VoidPtrTy);
4954   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4955   // Use default allocator.
4956   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4957   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4958 
4959   // _kmpc_free(gtid, addr, nullptr);
4960   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4961                                 CGM.getModule(), OMPRTL___kmpc_free),
4962                             Args);
4963 }
4964 
emitUpdateClause(CodeGenFunction & CGF,LValue DepobjLVal,OpenMPDependClauseKind NewDepKind,SourceLocation Loc)4965 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4966                                        OpenMPDependClauseKind NewDepKind,
4967                                        SourceLocation Loc) {
4968   ASTContext &C = CGM.getContext();
4969   QualType FlagsTy;
4970   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4971   RecordDecl *KmpDependInfoRD =
4972       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4973   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4974   llvm::Value *NumDeps;
4975   LValue Base;
4976   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4977 
4978   Address Begin = Base.getAddress(CGF);
4979   // Cast from pointer to array type to pointer to single element.
4980   llvm::Value *End = CGF.Builder.CreateGEP(
4981       Begin.getElementType(), Begin.getPointer(), NumDeps);
4982   // The basic structure here is a while-do loop.
4983   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4984   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4985   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4986   CGF.EmitBlock(BodyBB);
4987   llvm::PHINode *ElementPHI =
4988       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4989   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4990   Begin = Begin.withPointer(ElementPHI);
4991   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4992                             Base.getTBAAInfo());
4993   // deps[i].flags = NewDepKind;
4994   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4995   LValue FlagsLVal = CGF.EmitLValueForField(
4996       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4997   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4998                         FlagsLVal);
4999 
5000   // Shift the address forward by one element.
5001   Address ElementNext =
5002       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5003   ElementPHI->addIncoming(ElementNext.getPointer(),
5004                           CGF.Builder.GetInsertBlock());
5005   llvm::Value *IsEmpty =
5006       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5007   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5008   // Done.
5009   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5010 }
5011 
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)5012 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5013                                    const OMPExecutableDirective &D,
5014                                    llvm::Function *TaskFunction,
5015                                    QualType SharedsTy, Address Shareds,
5016                                    const Expr *IfCond,
5017                                    const OMPTaskDataTy &Data) {
5018   if (!CGF.HaveInsertPoint())
5019     return;
5020 
5021   TaskResultTy Result =
5022       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5023   llvm::Value *NewTask = Result.NewTask;
5024   llvm::Function *TaskEntry = Result.TaskEntry;
5025   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5026   LValue TDBase = Result.TDBase;
5027   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5028   // Process list of dependences.
5029   Address DependenciesArray = Address::invalid();
5030   llvm::Value *NumOfElements;
5031   std::tie(NumOfElements, DependenciesArray) =
5032       emitDependClause(CGF, Data.Dependences, Loc);
5033 
5034   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5035   // libcall.
5036   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5037   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5038   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5039   // list is not empty
5040   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5041   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5042   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5043   llvm::Value *DepTaskArgs[7];
5044   if (!Data.Dependences.empty()) {
5045     DepTaskArgs[0] = UpLoc;
5046     DepTaskArgs[1] = ThreadID;
5047     DepTaskArgs[2] = NewTask;
5048     DepTaskArgs[3] = NumOfElements;
5049     DepTaskArgs[4] = DependenciesArray.getPointer();
5050     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5051     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5052   }
5053   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5054                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5055     if (!Data.Tied) {
5056       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5057       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5058       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5059     }
5060     if (!Data.Dependences.empty()) {
5061       CGF.EmitRuntimeCall(
5062           OMPBuilder.getOrCreateRuntimeFunction(
5063               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5064           DepTaskArgs);
5065     } else {
5066       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5067                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5068                           TaskArgs);
5069     }
5070     // Check if parent region is untied and build return for untied task;
5071     if (auto *Region =
5072             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5073       Region->emitUntiedSwitch(CGF);
5074   };
5075 
5076   llvm::Value *DepWaitTaskArgs[6];
5077   if (!Data.Dependences.empty()) {
5078     DepWaitTaskArgs[0] = UpLoc;
5079     DepWaitTaskArgs[1] = ThreadID;
5080     DepWaitTaskArgs[2] = NumOfElements;
5081     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5082     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5083     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5084   }
5085   auto &M = CGM.getModule();
5086   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5087                         TaskEntry, &Data, &DepWaitTaskArgs,
5088                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5089     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5090     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5091     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5092     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5093     // is specified.
5094     if (!Data.Dependences.empty())
5095       CGF.EmitRuntimeCall(
5096           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5097           DepWaitTaskArgs);
5098     // Call proxy_task_entry(gtid, new_task);
5099     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5100                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5101       Action.Enter(CGF);
5102       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5103       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5104                                                           OutlinedFnArgs);
5105     };
5106 
5107     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5108     // kmp_task_t *new_task);
5109     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5110     // kmp_task_t *new_task);
5111     RegionCodeGenTy RCG(CodeGen);
5112     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5113                               M, OMPRTL___kmpc_omp_task_begin_if0),
5114                           TaskArgs,
5115                           OMPBuilder.getOrCreateRuntimeFunction(
5116                               M, OMPRTL___kmpc_omp_task_complete_if0),
5117                           TaskArgs);
5118     RCG.setAction(Action);
5119     RCG(CGF);
5120   };
5121 
5122   if (IfCond) {
5123     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5124   } else {
5125     RegionCodeGenTy ThenRCG(ThenCodeGen);
5126     ThenRCG(CGF);
5127   }
5128 }
5129 
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)5130 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5131                                        const OMPLoopDirective &D,
5132                                        llvm::Function *TaskFunction,
5133                                        QualType SharedsTy, Address Shareds,
5134                                        const Expr *IfCond,
5135                                        const OMPTaskDataTy &Data) {
5136   if (!CGF.HaveInsertPoint())
5137     return;
5138   TaskResultTy Result =
5139       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5140   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5141   // libcall.
5142   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5143   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5144   // sched, kmp_uint64 grainsize, void *task_dup);
5145   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5146   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5147   llvm::Value *IfVal;
5148   if (IfCond) {
5149     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5150                                       /*isSigned=*/true);
5151   } else {
5152     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5153   }
5154 
5155   LValue LBLVal = CGF.EmitLValueForField(
5156       Result.TDBase,
5157       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5158   const auto *LBVar =
5159       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5160   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5161                        LBLVal.getQuals(),
5162                        /*IsInitializer=*/true);
5163   LValue UBLVal = CGF.EmitLValueForField(
5164       Result.TDBase,
5165       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5166   const auto *UBVar =
5167       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5168   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5169                        UBLVal.getQuals(),
5170                        /*IsInitializer=*/true);
5171   LValue StLVal = CGF.EmitLValueForField(
5172       Result.TDBase,
5173       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5174   const auto *StVar =
5175       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5176   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5177                        StLVal.getQuals(),
5178                        /*IsInitializer=*/true);
5179   // Store reductions address.
5180   LValue RedLVal = CGF.EmitLValueForField(
5181       Result.TDBase,
5182       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5183   if (Data.Reductions) {
5184     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5185   } else {
5186     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5187                                CGF.getContext().VoidPtrTy);
5188   }
5189   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5190   llvm::Value *TaskArgs[] = {
5191       UpLoc,
5192       ThreadID,
5193       Result.NewTask,
5194       IfVal,
5195       LBLVal.getPointer(CGF),
5196       UBLVal.getPointer(CGF),
5197       CGF.EmitLoadOfScalar(StLVal, Loc),
5198       llvm::ConstantInt::getSigned(
5199           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5200       llvm::ConstantInt::getSigned(
5201           CGF.IntTy, Data.Schedule.getPointer()
5202                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5203                          : NoSchedule),
5204       Data.Schedule.getPointer()
5205           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5206                                       /*isSigned=*/false)
5207           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5208       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5209                              Result.TaskDupFn, CGF.VoidPtrTy)
5210                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5211   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5212                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5213                       TaskArgs);
5214 }
5215 
5216 /// Emit reduction operation for each element of array (required for
5217 /// array sections) LHS op = RHS.
5218 /// \param Type Type of array.
5219 /// \param LHSVar Variable on the left side of the reduction operation
5220 /// (references element of array in original variable).
5221 /// \param RHSVar Variable on the right side of the reduction operation
5222 /// (references element of array in original variable).
5223 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5224 /// RHSVar.
EmitOMPAggregateReduction(CodeGenFunction & CGF,QualType Type,const VarDecl * LHSVar,const VarDecl * RHSVar,const llvm::function_ref<void (CodeGenFunction & CGF,const Expr *,const Expr *,const Expr *)> & RedOpGen,const Expr * XExpr=nullptr,const Expr * EExpr=nullptr,const Expr * UpExpr=nullptr)5225 static void EmitOMPAggregateReduction(
5226     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5227     const VarDecl *RHSVar,
5228     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5229                                   const Expr *, const Expr *)> &RedOpGen,
5230     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5231     const Expr *UpExpr = nullptr) {
5232   // Perform element-by-element initialization.
5233   QualType ElementTy;
5234   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5235   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5236 
5237   // Drill down to the base element type on both arrays.
5238   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5239   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5240 
5241   llvm::Value *RHSBegin = RHSAddr.getPointer();
5242   llvm::Value *LHSBegin = LHSAddr.getPointer();
5243   // Cast from pointer to array type to pointer to single element.
5244   llvm::Value *LHSEnd =
5245       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5246   // The basic structure here is a while-do loop.
5247   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5248   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5249   llvm::Value *IsEmpty =
5250       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5251   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5252 
5253   // Enter the loop body, making that address the current address.
5254   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5255   CGF.EmitBlock(BodyBB);
5256 
5257   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5258 
5259   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5260       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5261   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5262   Address RHSElementCurrent(
5263       RHSElementPHI, RHSAddr.getElementType(),
5264       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5265 
5266   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5267       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5268   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5269   Address LHSElementCurrent(
5270       LHSElementPHI, LHSAddr.getElementType(),
5271       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5272 
5273   // Emit copy.
5274   CodeGenFunction::OMPPrivateScope Scope(CGF);
5275   Scope.addPrivate(LHSVar, LHSElementCurrent);
5276   Scope.addPrivate(RHSVar, RHSElementCurrent);
5277   Scope.Privatize();
5278   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5279   Scope.ForceCleanup();
5280 
5281   // Shift the address forward by one element.
5282   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5283       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5284       "omp.arraycpy.dest.element");
5285   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5286       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5287       "omp.arraycpy.src.element");
5288   // Check whether we've reached the end.
5289   llvm::Value *Done =
5290       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5291   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5292   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5293   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5294 
5295   // Done.
5296   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5297 }
5298 
5299 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5300 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5301 /// UDR combiner function.
emitReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp)5302 static void emitReductionCombiner(CodeGenFunction &CGF,
5303                                   const Expr *ReductionOp) {
5304   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5305     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5306       if (const auto *DRE =
5307               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5308         if (const auto *DRD =
5309                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5310           std::pair<llvm::Function *, llvm::Function *> Reduction =
5311               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5312           RValue Func = RValue::get(Reduction.first);
5313           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5314           CGF.EmitIgnoredExpr(ReductionOp);
5315           return;
5316         }
5317   CGF.EmitIgnoredExpr(ReductionOp);
5318 }
5319 
emitReductionFunction(SourceLocation Loc,llvm::Type * ArgsElemType,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps)5320 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5321     SourceLocation Loc, llvm::Type *ArgsElemType,
5322     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5323     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5324   ASTContext &C = CGM.getContext();
5325 
5326   // void reduction_func(void *LHSArg, void *RHSArg);
5327   FunctionArgList Args;
5328   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5329                            ImplicitParamDecl::Other);
5330   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5331                            ImplicitParamDecl::Other);
5332   Args.push_back(&LHSArg);
5333   Args.push_back(&RHSArg);
5334   const auto &CGFI =
5335       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5336   std::string Name = getName({"omp", "reduction", "reduction_func"});
5337   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5338                                     llvm::GlobalValue::InternalLinkage, Name,
5339                                     &CGM.getModule());
5340   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5341   Fn->setDoesNotRecurse();
5342   CodeGenFunction CGF(CGM);
5343   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5344 
5345   // Dst = (void*[n])(LHSArg);
5346   // Src = (void*[n])(RHSArg);
5347   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5348                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5349                   ArgsElemType->getPointerTo()),
5350               ArgsElemType, CGF.getPointerAlign());
5351   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5352                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5353                   ArgsElemType->getPointerTo()),
5354               ArgsElemType, CGF.getPointerAlign());
5355 
5356   //  ...
5357   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5358   //  ...
5359   CodeGenFunction::OMPPrivateScope Scope(CGF);
5360   const auto *IPriv = Privates.begin();
5361   unsigned Idx = 0;
5362   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5363     const auto *RHSVar =
5364         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5365     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5366     const auto *LHSVar =
5367         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5368     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5369     QualType PrivTy = (*IPriv)->getType();
5370     if (PrivTy->isVariablyModifiedType()) {
5371       // Get array size and emit VLA type.
5372       ++Idx;
5373       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5374       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5375       const VariableArrayType *VLA =
5376           CGF.getContext().getAsVariableArrayType(PrivTy);
5377       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5378       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5379           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5380       CGF.EmitVariablyModifiedType(PrivTy);
5381     }
5382   }
5383   Scope.Privatize();
5384   IPriv = Privates.begin();
5385   const auto *ILHS = LHSExprs.begin();
5386   const auto *IRHS = RHSExprs.begin();
5387   for (const Expr *E : ReductionOps) {
5388     if ((*IPriv)->getType()->isArrayType()) {
5389       // Emit reduction for array section.
5390       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5391       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5392       EmitOMPAggregateReduction(
5393           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5394           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5395             emitReductionCombiner(CGF, E);
5396           });
5397     } else {
5398       // Emit reduction for array subscript or single variable.
5399       emitReductionCombiner(CGF, E);
5400     }
5401     ++IPriv;
5402     ++ILHS;
5403     ++IRHS;
5404   }
5405   Scope.ForceCleanup();
5406   CGF.FinishFunction();
5407   return Fn;
5408 }
5409 
emitSingleReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp,const Expr * PrivateRef,const DeclRefExpr * LHS,const DeclRefExpr * RHS)5410 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5411                                                   const Expr *ReductionOp,
5412                                                   const Expr *PrivateRef,
5413                                                   const DeclRefExpr *LHS,
5414                                                   const DeclRefExpr *RHS) {
5415   if (PrivateRef->getType()->isArrayType()) {
5416     // Emit reduction for array section.
5417     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5418     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5419     EmitOMPAggregateReduction(
5420         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5421         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5422           emitReductionCombiner(CGF, ReductionOp);
5423         });
5424   } else {
5425     // Emit reduction for array subscript or single variable.
5426     emitReductionCombiner(CGF, ReductionOp);
5427   }
5428 }
5429 
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)5430 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5431                                     ArrayRef<const Expr *> Privates,
5432                                     ArrayRef<const Expr *> LHSExprs,
5433                                     ArrayRef<const Expr *> RHSExprs,
5434                                     ArrayRef<const Expr *> ReductionOps,
5435                                     ReductionOptionsTy Options) {
5436   if (!CGF.HaveInsertPoint())
5437     return;
5438 
5439   bool WithNowait = Options.WithNowait;
5440   bool SimpleReduction = Options.SimpleReduction;
5441 
5442   // Next code should be emitted for reduction:
5443   //
5444   // static kmp_critical_name lock = { 0 };
5445   //
5446   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5447   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5448   //  ...
5449   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5450   //  *(Type<n>-1*)rhs[<n>-1]);
5451   // }
5452   //
5453   // ...
5454   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5455   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5456   // RedList, reduce_func, &<lock>)) {
5457   // case 1:
5458   //  ...
5459   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5460   //  ...
5461   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5462   // break;
5463   // case 2:
5464   //  ...
5465   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5466   //  ...
5467   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5468   // break;
5469   // default:;
5470   // }
5471   //
5472   // if SimpleReduction is true, only the next code is generated:
5473   //  ...
5474   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5475   //  ...
5476 
5477   ASTContext &C = CGM.getContext();
5478 
5479   if (SimpleReduction) {
5480     CodeGenFunction::RunCleanupsScope Scope(CGF);
5481     const auto *IPriv = Privates.begin();
5482     const auto *ILHS = LHSExprs.begin();
5483     const auto *IRHS = RHSExprs.begin();
5484     for (const Expr *E : ReductionOps) {
5485       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5486                                   cast<DeclRefExpr>(*IRHS));
5487       ++IPriv;
5488       ++ILHS;
5489       ++IRHS;
5490     }
5491     return;
5492   }
5493 
5494   // 1. Build a list of reduction variables.
5495   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5496   auto Size = RHSExprs.size();
5497   for (const Expr *E : Privates) {
5498     if (E->getType()->isVariablyModifiedType())
5499       // Reserve place for array size.
5500       ++Size;
5501   }
5502   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5503   QualType ReductionArrayTy =
5504       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5505                              /*IndexTypeQuals=*/0);
5506   Address ReductionList =
5507       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5508   const auto *IPriv = Privates.begin();
5509   unsigned Idx = 0;
5510   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5511     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5512     CGF.Builder.CreateStore(
5513         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5514             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5515         Elem);
5516     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5517       // Store array size.
5518       ++Idx;
5519       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5520       llvm::Value *Size = CGF.Builder.CreateIntCast(
5521           CGF.getVLASize(
5522                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5523               .NumElts,
5524           CGF.SizeTy, /*isSigned=*/false);
5525       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5526                               Elem);
5527     }
5528   }
5529 
5530   // 2. Emit reduce_func().
5531   llvm::Function *ReductionFn =
5532       emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5533                             Privates, LHSExprs, RHSExprs, ReductionOps);
5534 
5535   // 3. Create static kmp_critical_name lock = { 0 };
5536   std::string Name = getName({"reduction"});
5537   llvm::Value *Lock = getCriticalRegionLock(Name);
5538 
5539   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5540   // RedList, reduce_func, &<lock>);
5541   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5542   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5543   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5544   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5545       ReductionList.getPointer(), CGF.VoidPtrTy);
5546   llvm::Value *Args[] = {
5547       IdentTLoc,                             // ident_t *<loc>
5548       ThreadId,                              // i32 <gtid>
5549       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5550       ReductionArrayTySize,                  // size_type sizeof(RedList)
5551       RL,                                    // void *RedList
5552       ReductionFn, // void (*) (void *, void *) <reduce_func>
5553       Lock         // kmp_critical_name *&<lock>
5554   };
5555   llvm::Value *Res = CGF.EmitRuntimeCall(
5556       OMPBuilder.getOrCreateRuntimeFunction(
5557           CGM.getModule(),
5558           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5559       Args);
5560 
5561   // 5. Build switch(res)
5562   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5563   llvm::SwitchInst *SwInst =
5564       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5565 
5566   // 6. Build case 1:
5567   //  ...
5568   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5569   //  ...
5570   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5571   // break;
5572   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5573   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5574   CGF.EmitBlock(Case1BB);
5575 
5576   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5577   llvm::Value *EndArgs[] = {
5578       IdentTLoc, // ident_t *<loc>
5579       ThreadId,  // i32 <gtid>
5580       Lock       // kmp_critical_name *&<lock>
5581   };
5582   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5583                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5584     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5585     const auto *IPriv = Privates.begin();
5586     const auto *ILHS = LHSExprs.begin();
5587     const auto *IRHS = RHSExprs.begin();
5588     for (const Expr *E : ReductionOps) {
5589       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5590                                      cast<DeclRefExpr>(*IRHS));
5591       ++IPriv;
5592       ++ILHS;
5593       ++IRHS;
5594     }
5595   };
5596   RegionCodeGenTy RCG(CodeGen);
5597   CommonActionTy Action(
5598       nullptr, llvm::None,
5599       OMPBuilder.getOrCreateRuntimeFunction(
5600           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5601                                       : OMPRTL___kmpc_end_reduce),
5602       EndArgs);
5603   RCG.setAction(Action);
5604   RCG(CGF);
5605 
5606   CGF.EmitBranch(DefaultBB);
5607 
5608   // 7. Build case 2:
5609   //  ...
5610   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5611   //  ...
5612   // break;
5613   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5614   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5615   CGF.EmitBlock(Case2BB);
5616 
5617   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5618                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5619     const auto *ILHS = LHSExprs.begin();
5620     const auto *IRHS = RHSExprs.begin();
5621     const auto *IPriv = Privates.begin();
5622     for (const Expr *E : ReductionOps) {
5623       const Expr *XExpr = nullptr;
5624       const Expr *EExpr = nullptr;
5625       const Expr *UpExpr = nullptr;
5626       BinaryOperatorKind BO = BO_Comma;
5627       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5628         if (BO->getOpcode() == BO_Assign) {
5629           XExpr = BO->getLHS();
5630           UpExpr = BO->getRHS();
5631         }
5632       }
5633       // Try to emit update expression as a simple atomic.
5634       const Expr *RHSExpr = UpExpr;
5635       if (RHSExpr) {
5636         // Analyze RHS part of the whole expression.
5637         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5638                 RHSExpr->IgnoreParenImpCasts())) {
5639           // If this is a conditional operator, analyze its condition for
5640           // min/max reduction operator.
5641           RHSExpr = ACO->getCond();
5642         }
5643         if (const auto *BORHS =
5644                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5645           EExpr = BORHS->getRHS();
5646           BO = BORHS->getOpcode();
5647         }
5648       }
5649       if (XExpr) {
5650         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5651         auto &&AtomicRedGen = [BO, VD,
5652                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5653                                     const Expr *EExpr, const Expr *UpExpr) {
5654           LValue X = CGF.EmitLValue(XExpr);
5655           RValue E;
5656           if (EExpr)
5657             E = CGF.EmitAnyExpr(EExpr);
5658           CGF.EmitOMPAtomicSimpleUpdateExpr(
5659               X, E, BO, /*IsXLHSInRHSPart=*/true,
5660               llvm::AtomicOrdering::Monotonic, Loc,
5661               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5662                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5663                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5664                 CGF.emitOMPSimpleStore(
5665                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5666                     VD->getType().getNonReferenceType(), Loc);
5667                 PrivateScope.addPrivate(VD, LHSTemp);
5668                 (void)PrivateScope.Privatize();
5669                 return CGF.EmitAnyExpr(UpExpr);
5670               });
5671         };
5672         if ((*IPriv)->getType()->isArrayType()) {
5673           // Emit atomic reduction for array section.
5674           const auto *RHSVar =
5675               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5676           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5677                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5678         } else {
5679           // Emit atomic reduction for array subscript or single variable.
5680           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5681         }
5682       } else {
5683         // Emit as a critical region.
5684         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5685                                            const Expr *, const Expr *) {
5686           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5687           std::string Name = RT.getName({"atomic_reduction"});
5688           RT.emitCriticalRegion(
5689               CGF, Name,
5690               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5691                 Action.Enter(CGF);
5692                 emitReductionCombiner(CGF, E);
5693               },
5694               Loc);
5695         };
5696         if ((*IPriv)->getType()->isArrayType()) {
5697           const auto *LHSVar =
5698               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5699           const auto *RHSVar =
5700               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5701           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5702                                     CritRedGen);
5703         } else {
5704           CritRedGen(CGF, nullptr, nullptr, nullptr);
5705         }
5706       }
5707       ++ILHS;
5708       ++IRHS;
5709       ++IPriv;
5710     }
5711   };
5712   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5713   if (!WithNowait) {
5714     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5715     llvm::Value *EndArgs[] = {
5716         IdentTLoc, // ident_t *<loc>
5717         ThreadId,  // i32 <gtid>
5718         Lock       // kmp_critical_name *&<lock>
5719     };
5720     CommonActionTy Action(nullptr, llvm::None,
5721                           OMPBuilder.getOrCreateRuntimeFunction(
5722                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5723                           EndArgs);
5724     AtomicRCG.setAction(Action);
5725     AtomicRCG(CGF);
5726   } else {
5727     AtomicRCG(CGF);
5728   }
5729 
5730   CGF.EmitBranch(DefaultBB);
5731   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5732 }
5733 
5734 /// Generates unique name for artificial threadprivate variables.
5735 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
generateUniqueName(CodeGenModule & CGM,StringRef Prefix,const Expr * Ref)5736 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5737                                       const Expr *Ref) {
5738   SmallString<256> Buffer;
5739   llvm::raw_svector_ostream Out(Buffer);
5740   const clang::DeclRefExpr *DE;
5741   const VarDecl *D = ::getBaseDecl(Ref, DE);
5742   if (!D)
5743     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5744   D = D->getCanonicalDecl();
5745   std::string Name = CGM.getOpenMPRuntime().getName(
5746       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5747   Out << Prefix << Name << "_"
5748       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5749   return std::string(Out.str());
5750 }
5751 
5752 /// Emits reduction initializer function:
5753 /// \code
5754 /// void @.red_init(void* %arg, void* %orig) {
5755 /// %0 = bitcast void* %arg to <type>*
5756 /// store <type> <init>, <type>* %0
5757 /// ret void
5758 /// }
5759 /// \endcode
emitReduceInitFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5760 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5761                                            SourceLocation Loc,
5762                                            ReductionCodeGen &RCG, unsigned N) {
5763   ASTContext &C = CGM.getContext();
5764   QualType VoidPtrTy = C.VoidPtrTy;
5765   VoidPtrTy.addRestrict();
5766   FunctionArgList Args;
5767   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5768                           ImplicitParamDecl::Other);
5769   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5770                               ImplicitParamDecl::Other);
5771   Args.emplace_back(&Param);
5772   Args.emplace_back(&ParamOrig);
5773   const auto &FnInfo =
5774       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5775   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5776   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5777   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5778                                     Name, &CGM.getModule());
5779   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5780   Fn->setDoesNotRecurse();
5781   CodeGenFunction CGF(CGM);
5782   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5783   QualType PrivateType = RCG.getPrivateType(N);
5784   Address PrivateAddr = CGF.EmitLoadOfPointer(
5785       CGF.Builder.CreateElementBitCast(
5786           CGF.GetAddrOfLocalVar(&Param),
5787           CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5788       C.getPointerType(PrivateType)->castAs<PointerType>());
5789   llvm::Value *Size = nullptr;
5790   // If the size of the reduction item is non-constant, load it from global
5791   // threadprivate variable.
5792   if (RCG.getSizes(N).second) {
5793     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5794         CGF, CGM.getContext().getSizeType(),
5795         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5796     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5797                                 CGM.getContext().getSizeType(), Loc);
5798   }
5799   RCG.emitAggregateType(CGF, N, Size);
5800   Address OrigAddr = Address::invalid();
5801   // If initializer uses initializer from declare reduction construct, emit a
5802   // pointer to the address of the original reduction item (reuired by reduction
5803   // initializer)
5804   if (RCG.usesReductionInitializer(N)) {
5805     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5806     OrigAddr = CGF.EmitLoadOfPointer(
5807         SharedAddr,
5808         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5809   }
5810   // Emit the initializer:
5811   // %0 = bitcast void* %arg to <type>*
5812   // store <type> <init>, <type>* %0
5813   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5814                          [](CodeGenFunction &) { return false; });
5815   CGF.FinishFunction();
5816   return Fn;
5817 }
5818 
5819 /// Emits reduction combiner function:
5820 /// \code
5821 /// void @.red_comb(void* %arg0, void* %arg1) {
5822 /// %lhs = bitcast void* %arg0 to <type>*
5823 /// %rhs = bitcast void* %arg1 to <type>*
5824 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5825 /// store <type> %2, <type>* %lhs
5826 /// ret void
5827 /// }
5828 /// \endcode
emitReduceCombFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N,const Expr * ReductionOp,const Expr * LHS,const Expr * RHS,const Expr * PrivateRef)5829 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5830                                            SourceLocation Loc,
5831                                            ReductionCodeGen &RCG, unsigned N,
5832                                            const Expr *ReductionOp,
5833                                            const Expr *LHS, const Expr *RHS,
5834                                            const Expr *PrivateRef) {
5835   ASTContext &C = CGM.getContext();
5836   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5837   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5838   FunctionArgList Args;
5839   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5840                                C.VoidPtrTy, ImplicitParamDecl::Other);
5841   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5842                             ImplicitParamDecl::Other);
5843   Args.emplace_back(&ParamInOut);
5844   Args.emplace_back(&ParamIn);
5845   const auto &FnInfo =
5846       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5847   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5848   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5849   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5850                                     Name, &CGM.getModule());
5851   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5852   Fn->setDoesNotRecurse();
5853   CodeGenFunction CGF(CGM);
5854   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5855   llvm::Value *Size = nullptr;
5856   // If the size of the reduction item is non-constant, load it from global
5857   // threadprivate variable.
5858   if (RCG.getSizes(N).second) {
5859     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5860         CGF, CGM.getContext().getSizeType(),
5861         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5862     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5863                                 CGM.getContext().getSizeType(), Loc);
5864   }
5865   RCG.emitAggregateType(CGF, N, Size);
5866   // Remap lhs and rhs variables to the addresses of the function arguments.
5867   // %lhs = bitcast void* %arg0 to <type>*
5868   // %rhs = bitcast void* %arg1 to <type>*
5869   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5870   PrivateScope.addPrivate(
5871       LHSVD,
5872       // Pull out the pointer to the variable.
5873       CGF.EmitLoadOfPointer(
5874           CGF.Builder.CreateElementBitCast(
5875               CGF.GetAddrOfLocalVar(&ParamInOut),
5876               CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5877           C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5878   PrivateScope.addPrivate(
5879       RHSVD,
5880       // Pull out the pointer to the variable.
5881       CGF.EmitLoadOfPointer(
5882           CGF.Builder.CreateElementBitCast(
5883             CGF.GetAddrOfLocalVar(&ParamIn),
5884             CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5885           C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5886   PrivateScope.Privatize();
5887   // Emit the combiner body:
5888   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5889   // store <type> %2, <type>* %lhs
5890   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5891       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5892       cast<DeclRefExpr>(RHS));
5893   CGF.FinishFunction();
5894   return Fn;
5895 }
5896 
5897 /// Emits reduction finalizer function:
5898 /// \code
5899 /// void @.red_fini(void* %arg) {
5900 /// %0 = bitcast void* %arg to <type>*
5901 /// <destroy>(<type>* %0)
5902 /// ret void
5903 /// }
5904 /// \endcode
emitReduceFiniFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5905 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5906                                            SourceLocation Loc,
5907                                            ReductionCodeGen &RCG, unsigned N) {
5908   if (!RCG.needCleanups(N))
5909     return nullptr;
5910   ASTContext &C = CGM.getContext();
5911   FunctionArgList Args;
5912   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5913                           ImplicitParamDecl::Other);
5914   Args.emplace_back(&Param);
5915   const auto &FnInfo =
5916       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5917   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5918   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5919   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5920                                     Name, &CGM.getModule());
5921   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5922   Fn->setDoesNotRecurse();
5923   CodeGenFunction CGF(CGM);
5924   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5925   Address PrivateAddr = CGF.EmitLoadOfPointer(
5926       CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5927   llvm::Value *Size = nullptr;
5928   // If the size of the reduction item is non-constant, load it from global
5929   // threadprivate variable.
5930   if (RCG.getSizes(N).second) {
5931     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5932         CGF, CGM.getContext().getSizeType(),
5933         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5934     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5935                                 CGM.getContext().getSizeType(), Loc);
5936   }
5937   RCG.emitAggregateType(CGF, N, Size);
5938   // Emit the finalizer body:
5939   // <destroy>(<type>* %0)
5940   RCG.emitCleanups(CGF, N, PrivateAddr);
5941   CGF.FinishFunction(Loc);
5942   return Fn;
5943 }
5944 
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)5945 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5946     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5947     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5948   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5949     return nullptr;
5950 
5951   // Build typedef struct:
5952   // kmp_taskred_input {
5953   //   void *reduce_shar; // shared reduction item
5954   //   void *reduce_orig; // original reduction item used for initialization
5955   //   size_t reduce_size; // size of data item
5956   //   void *reduce_init; // data initialization routine
5957   //   void *reduce_fini; // data finalization routine
5958   //   void *reduce_comb; // data combiner routine
5959   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5960   // } kmp_taskred_input_t;
5961   ASTContext &C = CGM.getContext();
5962   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5963   RD->startDefinition();
5964   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5965   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5966   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5967   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5968   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5969   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5970   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5971       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5972   RD->completeDefinition();
5973   QualType RDType = C.getRecordType(RD);
5974   unsigned Size = Data.ReductionVars.size();
5975   llvm::APInt ArraySize(/*numBits=*/64, Size);
5976   QualType ArrayRDType = C.getConstantArrayType(
5977       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5978   // kmp_task_red_input_t .rd_input.[Size];
5979   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5980   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5981                        Data.ReductionCopies, Data.ReductionOps);
5982   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5983     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5984     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5985                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5986     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5987         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5988         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5989         ".rd_input.gep.");
5990     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5991     // ElemLVal.reduce_shar = &Shareds[Cnt];
5992     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5993     RCG.emitSharedOrigLValue(CGF, Cnt);
5994     llvm::Value *CastedShared =
5995         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
5996     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5997     // ElemLVal.reduce_orig = &Origs[Cnt];
5998     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5999     llvm::Value *CastedOrig =
6000         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6001     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6002     RCG.emitAggregateType(CGF, Cnt);
6003     llvm::Value *SizeValInChars;
6004     llvm::Value *SizeVal;
6005     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6006     // We use delayed creation/initialization for VLAs and array sections. It is
6007     // required because runtime does not provide the way to pass the sizes of
6008     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6009     // threadprivate global variables are used to store these values and use
6010     // them in the functions.
6011     bool DelayedCreation = !!SizeVal;
6012     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6013                                                /*isSigned=*/false);
6014     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6015     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6016     // ElemLVal.reduce_init = init;
6017     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6018     llvm::Value *InitAddr =
6019         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6020     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6021     // ElemLVal.reduce_fini = fini;
6022     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6023     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6024     llvm::Value *FiniAddr = Fini
6025                                 ? CGF.EmitCastToVoidPtr(Fini)
6026                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6027     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6028     // ElemLVal.reduce_comb = comb;
6029     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6030     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6031         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6032         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6033     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6034     // ElemLVal.flags = 0;
6035     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6036     if (DelayedCreation) {
6037       CGF.EmitStoreOfScalar(
6038           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6039           FlagsLVal);
6040     } else
6041       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6042                                  FlagsLVal.getType());
6043   }
6044   if (Data.IsReductionWithTaskMod) {
6045     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6046     // is_ws, int num, void *data);
6047     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6048     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6049                                                   CGM.IntTy, /*isSigned=*/true);
6050     llvm::Value *Args[] = {
6051         IdentTLoc, GTid,
6052         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6053                                /*isSigned=*/true),
6054         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6055         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6056             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6057     return CGF.EmitRuntimeCall(
6058         OMPBuilder.getOrCreateRuntimeFunction(
6059             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6060         Args);
6061   }
6062   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6063   llvm::Value *Args[] = {
6064       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6065                                 /*isSigned=*/true),
6066       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6067       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6068                                                       CGM.VoidPtrTy)};
6069   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6070                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6071                              Args);
6072 }
6073 
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)6074 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6075                                             SourceLocation Loc,
6076                                             bool IsWorksharingReduction) {
6077   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6078   // is_ws, int num, void *data);
6079   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6080   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6081                                                 CGM.IntTy, /*isSigned=*/true);
6082   llvm::Value *Args[] = {IdentTLoc, GTid,
6083                          llvm::ConstantInt::get(CGM.IntTy,
6084                                                 IsWorksharingReduction ? 1 : 0,
6085                                                 /*isSigned=*/true)};
6086   (void)CGF.EmitRuntimeCall(
6087       OMPBuilder.getOrCreateRuntimeFunction(
6088           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6089       Args);
6090 }
6091 
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)6092 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6093                                               SourceLocation Loc,
6094                                               ReductionCodeGen &RCG,
6095                                               unsigned N) {
6096   auto Sizes = RCG.getSizes(N);
6097   // Emit threadprivate global variable if the type is non-constant
6098   // (Sizes.second = nullptr).
6099   if (Sizes.second) {
6100     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6101                                                      /*isSigned=*/false);
6102     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6103         CGF, CGM.getContext().getSizeType(),
6104         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6105     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6106   }
6107 }
6108 
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)6109 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6110                                               SourceLocation Loc,
6111                                               llvm::Value *ReductionsPtr,
6112                                               LValue SharedLVal) {
6113   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6114   // *d);
6115   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6116                                                    CGM.IntTy,
6117                                                    /*isSigned=*/true),
6118                          ReductionsPtr,
6119                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6120                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6121   return Address(
6122       CGF.EmitRuntimeCall(
6123           OMPBuilder.getOrCreateRuntimeFunction(
6124               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6125           Args),
6126       CGF.Int8Ty, SharedLVal.getAlignment());
6127 }
6128 
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPTaskDataTy & Data)6129 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6130                                        const OMPTaskDataTy &Data) {
6131   if (!CGF.HaveInsertPoint())
6132     return;
6133 
6134   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6135     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6136     OMPBuilder.createTaskwait(CGF.Builder);
6137   } else {
6138     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6139     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6140     auto &M = CGM.getModule();
6141     Address DependenciesArray = Address::invalid();
6142     llvm::Value *NumOfElements;
6143     std::tie(NumOfElements, DependenciesArray) =
6144         emitDependClause(CGF, Data.Dependences, Loc);
6145     llvm::Value *DepWaitTaskArgs[6];
6146     if (!Data.Dependences.empty()) {
6147       DepWaitTaskArgs[0] = UpLoc;
6148       DepWaitTaskArgs[1] = ThreadID;
6149       DepWaitTaskArgs[2] = NumOfElements;
6150       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6151       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6152       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6153 
6154       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6155 
6156       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6157       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6158       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6159       // is specified.
6160       CGF.EmitRuntimeCall(
6161           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6162           DepWaitTaskArgs);
6163 
6164     } else {
6165 
6166       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6167       // global_tid);
6168       llvm::Value *Args[] = {UpLoc, ThreadID};
6169       // Ignore return result until untied tasks are supported.
6170       CGF.EmitRuntimeCall(
6171           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6172           Args);
6173     }
6174   }
6175 
6176   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6177     Region->emitUntiedSwitch(CGF);
6178 }
6179 
emitInlinedDirective(CodeGenFunction & CGF,OpenMPDirectiveKind InnerKind,const RegionCodeGenTy & CodeGen,bool HasCancel)6180 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6181                                            OpenMPDirectiveKind InnerKind,
6182                                            const RegionCodeGenTy &CodeGen,
6183                                            bool HasCancel) {
6184   if (!CGF.HaveInsertPoint())
6185     return;
6186   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6187                                  InnerKind != OMPD_critical &&
6188                                      InnerKind != OMPD_master &&
6189                                      InnerKind != OMPD_masked);
6190   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6191 }
6192 
6193 namespace {
6194 enum RTCancelKind {
6195   CancelNoreq = 0,
6196   CancelParallel = 1,
6197   CancelLoop = 2,
6198   CancelSections = 3,
6199   CancelTaskgroup = 4
6200 };
6201 } // anonymous namespace
6202 
getCancellationKind(OpenMPDirectiveKind CancelRegion)6203 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6204   RTCancelKind CancelKind = CancelNoreq;
6205   if (CancelRegion == OMPD_parallel)
6206     CancelKind = CancelParallel;
6207   else if (CancelRegion == OMPD_for)
6208     CancelKind = CancelLoop;
6209   else if (CancelRegion == OMPD_sections)
6210     CancelKind = CancelSections;
6211   else {
6212     assert(CancelRegion == OMPD_taskgroup);
6213     CancelKind = CancelTaskgroup;
6214   }
6215   return CancelKind;
6216 }
6217 
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)6218 void CGOpenMPRuntime::emitCancellationPointCall(
6219     CodeGenFunction &CGF, SourceLocation Loc,
6220     OpenMPDirectiveKind CancelRegion) {
6221   if (!CGF.HaveInsertPoint())
6222     return;
6223   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6224   // global_tid, kmp_int32 cncl_kind);
6225   if (auto *OMPRegionInfo =
6226           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6227     // For 'cancellation point taskgroup', the task region info may not have a
6228     // cancel. This may instead happen in another adjacent task.
6229     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6230       llvm::Value *Args[] = {
6231           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6232           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6233       // Ignore return result until untied tasks are supported.
6234       llvm::Value *Result = CGF.EmitRuntimeCall(
6235           OMPBuilder.getOrCreateRuntimeFunction(
6236               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6237           Args);
6238       // if (__kmpc_cancellationpoint()) {
6239       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6240       //   exit from construct;
6241       // }
6242       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6243       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6244       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6245       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6246       CGF.EmitBlock(ExitBB);
6247       if (CancelRegion == OMPD_parallel)
6248         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6249       // exit from construct;
6250       CodeGenFunction::JumpDest CancelDest =
6251           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6252       CGF.EmitBranchThroughCleanup(CancelDest);
6253       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6254     }
6255   }
6256 }
6257 
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)6258 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6259                                      const Expr *IfCond,
6260                                      OpenMPDirectiveKind CancelRegion) {
6261   if (!CGF.HaveInsertPoint())
6262     return;
6263   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6264   // kmp_int32 cncl_kind);
6265   auto &M = CGM.getModule();
6266   if (auto *OMPRegionInfo =
6267           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6268     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6269                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6270       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6271       llvm::Value *Args[] = {
6272           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6273           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6274       // Ignore return result until untied tasks are supported.
6275       llvm::Value *Result = CGF.EmitRuntimeCall(
6276           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6277       // if (__kmpc_cancel()) {
6278       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6279       //   exit from construct;
6280       // }
6281       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6282       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6283       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6284       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6285       CGF.EmitBlock(ExitBB);
6286       if (CancelRegion == OMPD_parallel)
6287         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6288       // exit from construct;
6289       CodeGenFunction::JumpDest CancelDest =
6290           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6291       CGF.EmitBranchThroughCleanup(CancelDest);
6292       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6293     };
6294     if (IfCond) {
6295       emitIfClause(CGF, IfCond, ThenGen,
6296                    [](CodeGenFunction &, PrePostActionTy &) {});
6297     } else {
6298       RegionCodeGenTy ThenRCG(ThenGen);
6299       ThenRCG(CGF);
6300     }
6301   }
6302 }
6303 
6304 namespace {
6305 /// Cleanup action for uses_allocators support.
6306 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6307   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6308 
6309 public:
OMPUsesAllocatorsActionTy(ArrayRef<std::pair<const Expr *,const Expr * >> Allocators)6310   OMPUsesAllocatorsActionTy(
6311       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6312       : Allocators(Allocators) {}
Enter(CodeGenFunction & CGF)6313   void Enter(CodeGenFunction &CGF) override {
6314     if (!CGF.HaveInsertPoint())
6315       return;
6316     for (const auto &AllocatorData : Allocators) {
6317       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6318           CGF, AllocatorData.first, AllocatorData.second);
6319     }
6320   }
Exit(CodeGenFunction & CGF)6321   void Exit(CodeGenFunction &CGF) override {
6322     if (!CGF.HaveInsertPoint())
6323       return;
6324     for (const auto &AllocatorData : Allocators) {
6325       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6326                                                         AllocatorData.first);
6327     }
6328   }
6329 };
6330 } // namespace
6331 
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6332 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6333     const OMPExecutableDirective &D, StringRef ParentName,
6334     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6335     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6336   assert(!ParentName.empty() && "Invalid target region parent name!");
6337   HasEmittedTargetRegion = true;
6338   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6339   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6340     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6341       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6342       if (!D.AllocatorTraits)
6343         continue;
6344       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6345     }
6346   }
6347   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6348   CodeGen.setAction(UsesAllocatorAction);
6349   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6350                                    IsOffloadEntry, CodeGen);
6351 }
6352 
emitUsesAllocatorsInit(CodeGenFunction & CGF,const Expr * Allocator,const Expr * AllocatorTraits)6353 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6354                                              const Expr *Allocator,
6355                                              const Expr *AllocatorTraits) {
6356   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6357   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6358   // Use default memspace handle.
6359   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6360   llvm::Value *NumTraits = llvm::ConstantInt::get(
6361       CGF.IntTy, cast<ConstantArrayType>(
6362                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6363                      ->getSize()
6364                      .getLimitedValue());
6365   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6366   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6367       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6368   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6369                                            AllocatorTraitsLVal.getBaseInfo(),
6370                                            AllocatorTraitsLVal.getTBAAInfo());
6371   llvm::Value *Traits =
6372       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6373 
6374   llvm::Value *AllocatorVal =
6375       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6376                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6377                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6378   // Store to allocator.
6379   CGF.EmitVarDecl(*cast<VarDecl>(
6380       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6381   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6382   AllocatorVal =
6383       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6384                                Allocator->getType(), Allocator->getExprLoc());
6385   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6386 }
6387 
emitUsesAllocatorsFini(CodeGenFunction & CGF,const Expr * Allocator)6388 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6389                                              const Expr *Allocator) {
6390   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6391   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6392   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6393   llvm::Value *AllocatorVal =
6394       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6395   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6396                                           CGF.getContext().VoidPtrTy,
6397                                           Allocator->getExprLoc());
6398   (void)CGF.EmitRuntimeCall(
6399       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6400                                             OMPRTL___kmpc_destroy_allocator),
6401       {ThreadId, AllocatorVal});
6402 }
6403 
emitTargetOutlinedFunctionHelper(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6404 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6405     const OMPExecutableDirective &D, StringRef ParentName,
6406     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6407     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6408   // Create a unique name for the entry function using the source location
6409   // information of the current target region. The name will be something like:
6410   //
6411   // __omp_offloading_DD_FFFF_PP_lBB
6412   //
6413   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6414   // mangled name of the function that encloses the target region and BB is the
6415   // line number of the target region.
6416 
6417   const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
6418                                !CGM.getLangOpts().OpenMPOffloadMandatory;
6419   unsigned DeviceID;
6420   unsigned FileID;
6421   unsigned Line;
6422   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6423                            Line);
6424   SmallString<64> EntryFnName;
6425   {
6426     llvm::raw_svector_ostream OS(EntryFnName);
6427     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6428        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6429   }
6430 
6431   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6432 
6433   CodeGenFunction CGF(CGM, true);
6434   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6435   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6436 
6437   if (BuildOutlinedFn)
6438     OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6439 
6440   // If this target outline function is not an offload entry, we don't need to
6441   // register it.
6442   if (!IsOffloadEntry)
6443     return;
6444 
6445   // The target region ID is used by the runtime library to identify the current
6446   // target region, so it only has to be unique and not necessarily point to
6447   // anything. It could be the pointer to the outlined function that implements
6448   // the target region, but we aren't using that so that the compiler doesn't
6449   // need to keep that, and could therefore inline the host function if proven
6450   // worthwhile during optimization. In the other hand, if emitting code for the
6451   // device, the ID has to be the function address so that it can retrieved from
6452   // the offloading entry and launched by the runtime library. We also mark the
6453   // outlined function to have external linkage in case we are emitting code for
6454   // the device, because these functions will be entry points to the device.
6455 
6456   if (CGM.getLangOpts().OpenMPIsDevice) {
6457     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6458     OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage);
6459     OutlinedFn->setDSOLocal(false);
6460     if (CGM.getTriple().isAMDGCN())
6461       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6462   } else {
6463     std::string Name = getName({EntryFnName, "region_id"});
6464     OutlinedFnID = new llvm::GlobalVariable(
6465         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6466         llvm::GlobalValue::WeakAnyLinkage,
6467         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6468   }
6469 
6470   // If we do not allow host fallback we still need a named address to use.
6471   llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
6472   if (!BuildOutlinedFn) {
6473     assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&
6474            "Named kernel already exists?");
6475     TargetRegionEntryAddr = new llvm::GlobalVariable(
6476         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6477         llvm::GlobalValue::InternalLinkage,
6478         llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
6479   }
6480 
6481   // Register the information for the entry associated with this target region.
6482   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6483       DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
6484       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6485 
6486   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6487   int32_t DefaultValTeams = -1;
6488   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6489   if (DefaultValTeams > 0 && OutlinedFn) {
6490     OutlinedFn->addFnAttr("omp_target_num_teams",
6491                           std::to_string(DefaultValTeams));
6492   }
6493   int32_t DefaultValThreads = -1;
6494   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6495   if (DefaultValThreads > 0 && OutlinedFn) {
6496     OutlinedFn->addFnAttr("omp_target_thread_limit",
6497                           std::to_string(DefaultValThreads));
6498   }
6499 
6500   if (BuildOutlinedFn)
6501     CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6502 }
6503 
6504 /// Checks if the expression is constant or does not have non-trivial function
6505 /// calls.
isTrivial(ASTContext & Ctx,const Expr * E)6506 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6507   // We can skip constant expressions.
6508   // We can skip expressions with trivial calls or simple expressions.
6509   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6510           !E->hasNonTrivialCall(Ctx)) &&
6511          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6512 }
6513 
getSingleCompoundChild(ASTContext & Ctx,const Stmt * Body)6514 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6515                                                     const Stmt *Body) {
6516   const Stmt *Child = Body->IgnoreContainers();
6517   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6518     Child = nullptr;
6519     for (const Stmt *S : C->body()) {
6520       if (const auto *E = dyn_cast<Expr>(S)) {
6521         if (isTrivial(Ctx, E))
6522           continue;
6523       }
6524       // Some of the statements can be ignored.
6525       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6526           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6527         continue;
6528       // Analyze declarations.
6529       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6530         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6531               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6532                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6533                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6534                   isa<UsingDirectiveDecl>(D) ||
6535                   isa<OMPDeclareReductionDecl>(D) ||
6536                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6537                 return true;
6538               const auto *VD = dyn_cast<VarDecl>(D);
6539               if (!VD)
6540                 return false;
6541               return VD->hasGlobalStorage() || !VD->isUsed();
6542             }))
6543           continue;
6544       }
6545       // Found multiple children - cannot get the one child only.
6546       if (Child)
6547         return nullptr;
6548       Child = S;
6549     }
6550     if (Child)
6551       Child = Child->IgnoreContainers();
6552   }
6553   return Child;
6554 }
6555 
getNumTeamsExprForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,int32_t & DefaultVal)6556 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6557     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6558     int32_t &DefaultVal) {
6559 
6560   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6561   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6562          "Expected target-based executable directive.");
6563   switch (DirectiveKind) {
6564   case OMPD_target: {
6565     const auto *CS = D.getInnermostCapturedStmt();
6566     const auto *Body =
6567         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6568     const Stmt *ChildStmt =
6569         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6570     if (const auto *NestedDir =
6571             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6572       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6573         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6574           const Expr *NumTeams =
6575               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6576           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6577             if (auto Constant =
6578                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6579               DefaultVal = Constant->getExtValue();
6580           return NumTeams;
6581         }
6582         DefaultVal = 0;
6583         return nullptr;
6584       }
6585       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6586           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6587         DefaultVal = 1;
6588         return nullptr;
6589       }
6590       DefaultVal = 1;
6591       return nullptr;
6592     }
6593     // A value of -1 is used to check if we need to emit no teams region
6594     DefaultVal = -1;
6595     return nullptr;
6596   }
6597   case OMPD_target_teams:
6598   case OMPD_target_teams_distribute:
6599   case OMPD_target_teams_distribute_simd:
6600   case OMPD_target_teams_distribute_parallel_for:
6601   case OMPD_target_teams_distribute_parallel_for_simd: {
6602     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6603       const Expr *NumTeams =
6604           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6605       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6606         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6607           DefaultVal = Constant->getExtValue();
6608       return NumTeams;
6609     }
6610     DefaultVal = 0;
6611     return nullptr;
6612   }
6613   case OMPD_target_parallel:
6614   case OMPD_target_parallel_for:
6615   case OMPD_target_parallel_for_simd:
6616   case OMPD_target_simd:
6617     DefaultVal = 1;
6618     return nullptr;
6619   case OMPD_parallel:
6620   case OMPD_for:
6621   case OMPD_parallel_for:
6622   case OMPD_parallel_master:
6623   case OMPD_parallel_sections:
6624   case OMPD_for_simd:
6625   case OMPD_parallel_for_simd:
6626   case OMPD_cancel:
6627   case OMPD_cancellation_point:
6628   case OMPD_ordered:
6629   case OMPD_threadprivate:
6630   case OMPD_allocate:
6631   case OMPD_task:
6632   case OMPD_simd:
6633   case OMPD_tile:
6634   case OMPD_unroll:
6635   case OMPD_sections:
6636   case OMPD_section:
6637   case OMPD_single:
6638   case OMPD_master:
6639   case OMPD_critical:
6640   case OMPD_taskyield:
6641   case OMPD_barrier:
6642   case OMPD_taskwait:
6643   case OMPD_taskgroup:
6644   case OMPD_atomic:
6645   case OMPD_flush:
6646   case OMPD_depobj:
6647   case OMPD_scan:
6648   case OMPD_teams:
6649   case OMPD_target_data:
6650   case OMPD_target_exit_data:
6651   case OMPD_target_enter_data:
6652   case OMPD_distribute:
6653   case OMPD_distribute_simd:
6654   case OMPD_distribute_parallel_for:
6655   case OMPD_distribute_parallel_for_simd:
6656   case OMPD_teams_distribute:
6657   case OMPD_teams_distribute_simd:
6658   case OMPD_teams_distribute_parallel_for:
6659   case OMPD_teams_distribute_parallel_for_simd:
6660   case OMPD_target_update:
6661   case OMPD_declare_simd:
6662   case OMPD_declare_variant:
6663   case OMPD_begin_declare_variant:
6664   case OMPD_end_declare_variant:
6665   case OMPD_declare_target:
6666   case OMPD_end_declare_target:
6667   case OMPD_declare_reduction:
6668   case OMPD_declare_mapper:
6669   case OMPD_taskloop:
6670   case OMPD_taskloop_simd:
6671   case OMPD_master_taskloop:
6672   case OMPD_master_taskloop_simd:
6673   case OMPD_parallel_master_taskloop:
6674   case OMPD_parallel_master_taskloop_simd:
6675   case OMPD_requires:
6676   case OMPD_metadirective:
6677   case OMPD_unknown:
6678     break;
6679   default:
6680     break;
6681   }
6682   llvm_unreachable("Unexpected directive kind.");
6683 }
6684 
emitNumTeamsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6685 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6686     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6687   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6688          "Clauses associated with the teams directive expected to be emitted "
6689          "only for the host!");
6690   CGBuilderTy &Bld = CGF.Builder;
6691   int32_t DefaultNT = -1;
6692   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6693   if (NumTeams != nullptr) {
6694     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6695 
6696     switch (DirectiveKind) {
6697     case OMPD_target: {
6698       const auto *CS = D.getInnermostCapturedStmt();
6699       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6700       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6701       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6702                                                   /*IgnoreResultAssign*/ true);
6703       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6704                              /*isSigned=*/true);
6705     }
6706     case OMPD_target_teams:
6707     case OMPD_target_teams_distribute:
6708     case OMPD_target_teams_distribute_simd:
6709     case OMPD_target_teams_distribute_parallel_for:
6710     case OMPD_target_teams_distribute_parallel_for_simd: {
6711       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6712       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6713                                                   /*IgnoreResultAssign*/ true);
6714       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6715                              /*isSigned=*/true);
6716     }
6717     default:
6718       break;
6719     }
6720   }
6721 
6722   return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT);
6723 }
6724 
getNumThreads(CodeGenFunction & CGF,const CapturedStmt * CS,llvm::Value * DefaultThreadLimitVal)6725 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6726                                   llvm::Value *DefaultThreadLimitVal) {
6727   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6728       CGF.getContext(), CS->getCapturedStmt());
6729   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6730     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6731       llvm::Value *NumThreads = nullptr;
6732       llvm::Value *CondVal = nullptr;
6733       // Handle if clause. If if clause present, the number of threads is
6734       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6735       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6736         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6737         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6738         const OMPIfClause *IfClause = nullptr;
6739         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6740           if (C->getNameModifier() == OMPD_unknown ||
6741               C->getNameModifier() == OMPD_parallel) {
6742             IfClause = C;
6743             break;
6744           }
6745         }
6746         if (IfClause) {
6747           const Expr *Cond = IfClause->getCondition();
6748           bool Result;
6749           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6750             if (!Result)
6751               return CGF.Builder.getInt32(1);
6752           } else {
6753             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6754             if (const auto *PreInit =
6755                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6756               for (const auto *I : PreInit->decls()) {
6757                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6758                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6759                 } else {
6760                   CodeGenFunction::AutoVarEmission Emission =
6761                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6762                   CGF.EmitAutoVarCleanups(Emission);
6763                 }
6764               }
6765             }
6766             CondVal = CGF.EvaluateExprAsBool(Cond);
6767           }
6768         }
6769       }
6770       // Check the value of num_threads clause iff if clause was not specified
6771       // or is not evaluated to false.
6772       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6773         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6774         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6775         const auto *NumThreadsClause =
6776             Dir->getSingleClause<OMPNumThreadsClause>();
6777         CodeGenFunction::LexicalScope Scope(
6778             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6779         if (const auto *PreInit =
6780                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6781           for (const auto *I : PreInit->decls()) {
6782             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6783               CGF.EmitVarDecl(cast<VarDecl>(*I));
6784             } else {
6785               CodeGenFunction::AutoVarEmission Emission =
6786                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6787               CGF.EmitAutoVarCleanups(Emission);
6788             }
6789           }
6790         }
6791         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6792         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6793                                                /*isSigned=*/false);
6794         if (DefaultThreadLimitVal)
6795           NumThreads = CGF.Builder.CreateSelect(
6796               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6797               DefaultThreadLimitVal, NumThreads);
6798       } else {
6799         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6800                                            : CGF.Builder.getInt32(0);
6801       }
6802       // Process condition of the if clause.
6803       if (CondVal) {
6804         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6805                                               CGF.Builder.getInt32(1));
6806       }
6807       return NumThreads;
6808     }
6809     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6810       return CGF.Builder.getInt32(1);
6811     return DefaultThreadLimitVal;
6812   }
6813   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6814                                : CGF.Builder.getInt32(0);
6815 }
6816 
getNumThreadsExprForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,int32_t & DefaultVal)6817 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6818     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6819     int32_t &DefaultVal) {
6820   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6821   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6822          "Expected target-based executable directive.");
6823 
6824   switch (DirectiveKind) {
6825   case OMPD_target:
6826     // Teams have no clause thread_limit
6827     return nullptr;
6828   case OMPD_target_teams:
6829   case OMPD_target_teams_distribute:
6830     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6831       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6832       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6833       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6834         if (auto Constant =
6835                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6836           DefaultVal = Constant->getExtValue();
6837       return ThreadLimit;
6838     }
6839     return nullptr;
6840   case OMPD_target_parallel:
6841   case OMPD_target_parallel_for:
6842   case OMPD_target_parallel_for_simd:
6843   case OMPD_target_teams_distribute_parallel_for:
6844   case OMPD_target_teams_distribute_parallel_for_simd: {
6845     Expr *ThreadLimit = nullptr;
6846     Expr *NumThreads = nullptr;
6847     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6848       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6849       ThreadLimit = ThreadLimitClause->getThreadLimit();
6850       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6851         if (auto Constant =
6852                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6853           DefaultVal = Constant->getExtValue();
6854     }
6855     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6856       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6857       NumThreads = NumThreadsClause->getNumThreads();
6858       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6859         if (auto Constant =
6860                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6861           if (Constant->getExtValue() < DefaultVal) {
6862             DefaultVal = Constant->getExtValue();
6863             ThreadLimit = NumThreads;
6864           }
6865         }
6866       }
6867     }
6868     return ThreadLimit;
6869   }
6870   case OMPD_target_teams_distribute_simd:
6871   case OMPD_target_simd:
6872     DefaultVal = 1;
6873     return nullptr;
6874   case OMPD_parallel:
6875   case OMPD_for:
6876   case OMPD_parallel_for:
6877   case OMPD_parallel_master:
6878   case OMPD_parallel_sections:
6879   case OMPD_for_simd:
6880   case OMPD_parallel_for_simd:
6881   case OMPD_cancel:
6882   case OMPD_cancellation_point:
6883   case OMPD_ordered:
6884   case OMPD_threadprivate:
6885   case OMPD_allocate:
6886   case OMPD_task:
6887   case OMPD_simd:
6888   case OMPD_tile:
6889   case OMPD_unroll:
6890   case OMPD_sections:
6891   case OMPD_section:
6892   case OMPD_single:
6893   case OMPD_master:
6894   case OMPD_critical:
6895   case OMPD_taskyield:
6896   case OMPD_barrier:
6897   case OMPD_taskwait:
6898   case OMPD_taskgroup:
6899   case OMPD_atomic:
6900   case OMPD_flush:
6901   case OMPD_depobj:
6902   case OMPD_scan:
6903   case OMPD_teams:
6904   case OMPD_target_data:
6905   case OMPD_target_exit_data:
6906   case OMPD_target_enter_data:
6907   case OMPD_distribute:
6908   case OMPD_distribute_simd:
6909   case OMPD_distribute_parallel_for:
6910   case OMPD_distribute_parallel_for_simd:
6911   case OMPD_teams_distribute:
6912   case OMPD_teams_distribute_simd:
6913   case OMPD_teams_distribute_parallel_for:
6914   case OMPD_teams_distribute_parallel_for_simd:
6915   case OMPD_target_update:
6916   case OMPD_declare_simd:
6917   case OMPD_declare_variant:
6918   case OMPD_begin_declare_variant:
6919   case OMPD_end_declare_variant:
6920   case OMPD_declare_target:
6921   case OMPD_end_declare_target:
6922   case OMPD_declare_reduction:
6923   case OMPD_declare_mapper:
6924   case OMPD_taskloop:
6925   case OMPD_taskloop_simd:
6926   case OMPD_master_taskloop:
6927   case OMPD_master_taskloop_simd:
6928   case OMPD_parallel_master_taskloop:
6929   case OMPD_parallel_master_taskloop_simd:
6930   case OMPD_requires:
6931   case OMPD_unknown:
6932     break;
6933   default:
6934     break;
6935   }
6936   llvm_unreachable("Unsupported directive kind.");
6937 }
6938 
emitNumThreadsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6939 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6940     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6941   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6942          "Clauses associated with the teams directive expected to be emitted "
6943          "only for the host!");
6944   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6945   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6946          "Expected target-based executable directive.");
6947   CGBuilderTy &Bld = CGF.Builder;
6948   llvm::Value *ThreadLimitVal = nullptr;
6949   llvm::Value *NumThreadsVal = nullptr;
6950   switch (DirectiveKind) {
6951   case OMPD_target: {
6952     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6953     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6954       return NumThreads;
6955     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6956         CGF.getContext(), CS->getCapturedStmt());
6957     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6958       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6959         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6960         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6961         const auto *ThreadLimitClause =
6962             Dir->getSingleClause<OMPThreadLimitClause>();
6963         CodeGenFunction::LexicalScope Scope(
6964             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6965         if (const auto *PreInit =
6966                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6967           for (const auto *I : PreInit->decls()) {
6968             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6969               CGF.EmitVarDecl(cast<VarDecl>(*I));
6970             } else {
6971               CodeGenFunction::AutoVarEmission Emission =
6972                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6973               CGF.EmitAutoVarCleanups(Emission);
6974             }
6975           }
6976         }
6977         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6978             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6979         ThreadLimitVal =
6980             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6981       }
6982       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6983           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6984         CS = Dir->getInnermostCapturedStmt();
6985         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6986             CGF.getContext(), CS->getCapturedStmt());
6987         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6988       }
6989       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6990           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6991         CS = Dir->getInnermostCapturedStmt();
6992         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6993           return NumThreads;
6994       }
6995       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6996         return Bld.getInt32(1);
6997     }
6998     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6999   }
7000   case OMPD_target_teams: {
7001     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7002       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7003       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7004       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7005           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7006       ThreadLimitVal =
7007           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7008     }
7009     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7010     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7011       return NumThreads;
7012     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7013         CGF.getContext(), CS->getCapturedStmt());
7014     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7015       if (Dir->getDirectiveKind() == OMPD_distribute) {
7016         CS = Dir->getInnermostCapturedStmt();
7017         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7018           return NumThreads;
7019       }
7020     }
7021     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7022   }
7023   case OMPD_target_teams_distribute:
7024     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7025       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7026       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7027       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7028           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7029       ThreadLimitVal =
7030           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7031     }
7032     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7033   case OMPD_target_parallel:
7034   case OMPD_target_parallel_for:
7035   case OMPD_target_parallel_for_simd:
7036   case OMPD_target_teams_distribute_parallel_for:
7037   case OMPD_target_teams_distribute_parallel_for_simd: {
7038     llvm::Value *CondVal = nullptr;
7039     // Handle if clause. If if clause present, the number of threads is
7040     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7041     if (D.hasClausesOfKind<OMPIfClause>()) {
7042       const OMPIfClause *IfClause = nullptr;
7043       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7044         if (C->getNameModifier() == OMPD_unknown ||
7045             C->getNameModifier() == OMPD_parallel) {
7046           IfClause = C;
7047           break;
7048         }
7049       }
7050       if (IfClause) {
7051         const Expr *Cond = IfClause->getCondition();
7052         bool Result;
7053         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7054           if (!Result)
7055             return Bld.getInt32(1);
7056         } else {
7057           CodeGenFunction::RunCleanupsScope Scope(CGF);
7058           CondVal = CGF.EvaluateExprAsBool(Cond);
7059         }
7060       }
7061     }
7062     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7063       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7064       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7065       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7066           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7067       ThreadLimitVal =
7068           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7069     }
7070     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7071       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7072       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7073       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7074           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7075       NumThreadsVal =
7076           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7077       ThreadLimitVal = ThreadLimitVal
7078                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7079                                                                 ThreadLimitVal),
7080                                               NumThreadsVal, ThreadLimitVal)
7081                            : NumThreadsVal;
7082     }
7083     if (!ThreadLimitVal)
7084       ThreadLimitVal = Bld.getInt32(0);
7085     if (CondVal)
7086       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7087     return ThreadLimitVal;
7088   }
7089   case OMPD_target_teams_distribute_simd:
7090   case OMPD_target_simd:
7091     return Bld.getInt32(1);
7092   case OMPD_parallel:
7093   case OMPD_for:
7094   case OMPD_parallel_for:
7095   case OMPD_parallel_master:
7096   case OMPD_parallel_sections:
7097   case OMPD_for_simd:
7098   case OMPD_parallel_for_simd:
7099   case OMPD_cancel:
7100   case OMPD_cancellation_point:
7101   case OMPD_ordered:
7102   case OMPD_threadprivate:
7103   case OMPD_allocate:
7104   case OMPD_task:
7105   case OMPD_simd:
7106   case OMPD_tile:
7107   case OMPD_unroll:
7108   case OMPD_sections:
7109   case OMPD_section:
7110   case OMPD_single:
7111   case OMPD_master:
7112   case OMPD_critical:
7113   case OMPD_taskyield:
7114   case OMPD_barrier:
7115   case OMPD_taskwait:
7116   case OMPD_taskgroup:
7117   case OMPD_atomic:
7118   case OMPD_flush:
7119   case OMPD_depobj:
7120   case OMPD_scan:
7121   case OMPD_teams:
7122   case OMPD_target_data:
7123   case OMPD_target_exit_data:
7124   case OMPD_target_enter_data:
7125   case OMPD_distribute:
7126   case OMPD_distribute_simd:
7127   case OMPD_distribute_parallel_for:
7128   case OMPD_distribute_parallel_for_simd:
7129   case OMPD_teams_distribute:
7130   case OMPD_teams_distribute_simd:
7131   case OMPD_teams_distribute_parallel_for:
7132   case OMPD_teams_distribute_parallel_for_simd:
7133   case OMPD_target_update:
7134   case OMPD_declare_simd:
7135   case OMPD_declare_variant:
7136   case OMPD_begin_declare_variant:
7137   case OMPD_end_declare_variant:
7138   case OMPD_declare_target:
7139   case OMPD_end_declare_target:
7140   case OMPD_declare_reduction:
7141   case OMPD_declare_mapper:
7142   case OMPD_taskloop:
7143   case OMPD_taskloop_simd:
7144   case OMPD_master_taskloop:
7145   case OMPD_master_taskloop_simd:
7146   case OMPD_parallel_master_taskloop:
7147   case OMPD_parallel_master_taskloop_simd:
7148   case OMPD_requires:
7149   case OMPD_metadirective:
7150   case OMPD_unknown:
7151     break;
7152   default:
7153     break;
7154   }
7155   llvm_unreachable("Unsupported directive kind.");
7156 }
7157 
7158 namespace {
7159 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7160 
7161 // Utility to handle information from clauses associated with a given
7162 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7163 // It provides a convenient interface to obtain the information and generate
7164 // code for that information.
7165 class MappableExprsHandler {
7166 public:
7167   /// Values for bit flags used to specify the mapping type for
7168   /// offloading.
7169   enum OpenMPOffloadMappingFlags : uint64_t {
7170     /// No flags
7171     OMP_MAP_NONE = 0x0,
7172     /// Allocate memory on the device and move data from host to device.
7173     OMP_MAP_TO = 0x01,
7174     /// Allocate memory on the device and move data from device to host.
7175     OMP_MAP_FROM = 0x02,
7176     /// Always perform the requested mapping action on the element, even
7177     /// if it was already mapped before.
7178     OMP_MAP_ALWAYS = 0x04,
7179     /// Delete the element from the device environment, ignoring the
7180     /// current reference count associated with the element.
7181     OMP_MAP_DELETE = 0x08,
7182     /// The element being mapped is a pointer-pointee pair; both the
7183     /// pointer and the pointee should be mapped.
7184     OMP_MAP_PTR_AND_OBJ = 0x10,
7185     /// This flags signals that the base address of an entry should be
7186     /// passed to the target kernel as an argument.
7187     OMP_MAP_TARGET_PARAM = 0x20,
7188     /// Signal that the runtime library has to return the device pointer
7189     /// in the current position for the data being mapped. Used when we have the
7190     /// use_device_ptr or use_device_addr clause.
7191     OMP_MAP_RETURN_PARAM = 0x40,
7192     /// This flag signals that the reference being passed is a pointer to
7193     /// private data.
7194     OMP_MAP_PRIVATE = 0x80,
7195     /// Pass the element to the device by value.
7196     OMP_MAP_LITERAL = 0x100,
7197     /// Implicit map
7198     OMP_MAP_IMPLICIT = 0x200,
7199     /// Close is a hint to the runtime to allocate memory close to
7200     /// the target device.
7201     OMP_MAP_CLOSE = 0x400,
7202     /// 0x800 is reserved for compatibility with XLC.
7203     /// Produce a runtime error if the data is not already allocated.
7204     OMP_MAP_PRESENT = 0x1000,
7205     // Increment and decrement a separate reference counter so that the data
7206     // cannot be unmapped within the associated region.  Thus, this flag is
7207     // intended to be used on 'target' and 'target data' directives because they
7208     // are inherently structured.  It is not intended to be used on 'target
7209     // enter data' and 'target exit data' directives because they are inherently
7210     // dynamic.
7211     // This is an OpenMP extension for the sake of OpenACC support.
7212     OMP_MAP_OMPX_HOLD = 0x2000,
7213     /// Signal that the runtime library should use args as an array of
7214     /// descriptor_dim pointers and use args_size as dims. Used when we have
7215     /// non-contiguous list items in target update directive
7216     OMP_MAP_NON_CONTIG = 0x100000000000,
7217     /// The 16 MSBs of the flags indicate whether the entry is member of some
7218     /// struct/class.
7219     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7220     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7221   };
7222 
7223   /// Get the offset of the OMP_MAP_MEMBER_OF field.
getFlagMemberOffset()7224   static unsigned getFlagMemberOffset() {
7225     unsigned Offset = 0;
7226     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7227          Remain = Remain >> 1)
7228       Offset++;
7229     return Offset;
7230   }
7231 
7232   /// Class that holds debugging information for a data mapping to be passed to
7233   /// the runtime library.
7234   class MappingExprInfo {
7235     /// The variable declaration used for the data mapping.
7236     const ValueDecl *MapDecl = nullptr;
7237     /// The original expression used in the map clause, or null if there is
7238     /// none.
7239     const Expr *MapExpr = nullptr;
7240 
7241   public:
MappingExprInfo(const ValueDecl * MapDecl,const Expr * MapExpr=nullptr)7242     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7243         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7244 
getMapDecl() const7245     const ValueDecl *getMapDecl() const { return MapDecl; }
getMapExpr() const7246     const Expr *getMapExpr() const { return MapExpr; }
7247   };
7248 
7249   /// Class that associates information with a base pointer to be passed to the
7250   /// runtime library.
7251   class BasePointerInfo {
7252     /// The base pointer.
7253     llvm::Value *Ptr = nullptr;
7254     /// The base declaration that refers to this device pointer, or null if
7255     /// there is none.
7256     const ValueDecl *DevPtrDecl = nullptr;
7257 
7258   public:
BasePointerInfo(llvm::Value * Ptr,const ValueDecl * DevPtrDecl=nullptr)7259     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7260         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
operator *() const7261     llvm::Value *operator*() const { return Ptr; }
getDevicePtrDecl() const7262     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
setDevicePtrDecl(const ValueDecl * D)7263     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7264   };
7265 
7266   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7267   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7268   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7269   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7270   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7271   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7272   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7273 
7274   /// This structure contains combined information generated for mappable
7275   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7276   /// mappers, and non-contiguous information.
7277   struct MapCombinedInfoTy {
7278     struct StructNonContiguousInfo {
7279       bool IsNonContiguous = false;
7280       MapDimArrayTy Dims;
7281       MapNonContiguousArrayTy Offsets;
7282       MapNonContiguousArrayTy Counts;
7283       MapNonContiguousArrayTy Strides;
7284     };
7285     MapExprsArrayTy Exprs;
7286     MapBaseValuesArrayTy BasePointers;
7287     MapValuesArrayTy Pointers;
7288     MapValuesArrayTy Sizes;
7289     MapFlagsArrayTy Types;
7290     MapMappersArrayTy Mappers;
7291     StructNonContiguousInfo NonContigInfo;
7292 
7293     /// Append arrays in \a CurInfo.
append__anond842882b2d11::MappableExprsHandler::MapCombinedInfoTy7294     void append(MapCombinedInfoTy &CurInfo) {
7295       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7296       BasePointers.append(CurInfo.BasePointers.begin(),
7297                           CurInfo.BasePointers.end());
7298       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7299       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7300       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7301       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7302       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7303                                  CurInfo.NonContigInfo.Dims.end());
7304       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7305                                     CurInfo.NonContigInfo.Offsets.end());
7306       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7307                                    CurInfo.NonContigInfo.Counts.end());
7308       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7309                                     CurInfo.NonContigInfo.Strides.end());
7310     }
7311   };
7312 
7313   /// Map between a struct and the its lowest & highest elements which have been
7314   /// mapped.
7315   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7316   ///                    HE(FieldIndex, Pointer)}
7317   struct StructRangeInfoTy {
7318     MapCombinedInfoTy PreliminaryMapData;
7319     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7320         0, Address::invalid()};
7321     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7322         0, Address::invalid()};
7323     Address Base = Address::invalid();
7324     Address LB = Address::invalid();
7325     bool IsArraySection = false;
7326     bool HasCompleteRecord = false;
7327   };
7328 
7329 private:
7330   /// Kind that defines how a device pointer has to be returned.
7331   struct MapInfo {
7332     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7333     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7334     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7335     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7336     bool ReturnDevicePointer = false;
7337     bool IsImplicit = false;
7338     const ValueDecl *Mapper = nullptr;
7339     const Expr *VarRef = nullptr;
7340     bool ForDeviceAddr = false;
7341 
7342     MapInfo() = default;
MapInfo__anond842882b2d11::MappableExprsHandler::MapInfo7343     MapInfo(
7344         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7345         OpenMPMapClauseKind MapType,
7346         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7347         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7348         bool ReturnDevicePointer, bool IsImplicit,
7349         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7350         bool ForDeviceAddr = false)
7351         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7352           MotionModifiers(MotionModifiers),
7353           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7354           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7355   };
7356 
7357   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7358   /// member and there is no map information about it, then emission of that
7359   /// entry is deferred until the whole struct has been processed.
7360   struct DeferredDevicePtrEntryTy {
7361     const Expr *IE = nullptr;
7362     const ValueDecl *VD = nullptr;
7363     bool ForDeviceAddr = false;
7364 
DeferredDevicePtrEntryTy__anond842882b2d11::MappableExprsHandler::DeferredDevicePtrEntryTy7365     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7366                              bool ForDeviceAddr)
7367         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7368   };
7369 
7370   /// The target directive from where the mappable clauses were extracted. It
7371   /// is either a executable directive or a user-defined mapper directive.
7372   llvm::PointerUnion<const OMPExecutableDirective *,
7373                      const OMPDeclareMapperDecl *>
7374       CurDir;
7375 
7376   /// Function the directive is being generated for.
7377   CodeGenFunction &CGF;
7378 
7379   /// Set of all first private variables in the current directive.
7380   /// bool data is set to true if the variable is implicitly marked as
7381   /// firstprivate, false otherwise.
7382   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7383 
7384   /// Map between device pointer declarations and their expression components.
7385   /// The key value for declarations in 'this' is null.
7386   llvm::DenseMap<
7387       const ValueDecl *,
7388       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7389       DevPointersMap;
7390 
7391   /// Map between lambda declarations and their map type.
7392   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7393 
getExprTypeSize(const Expr * E) const7394   llvm::Value *getExprTypeSize(const Expr *E) const {
7395     QualType ExprTy = E->getType().getCanonicalType();
7396 
7397     // Calculate the size for array shaping expression.
7398     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7399       llvm::Value *Size =
7400           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7401       for (const Expr *SE : OAE->getDimensions()) {
7402         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7403         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7404                                       CGF.getContext().getSizeType(),
7405                                       SE->getExprLoc());
7406         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7407       }
7408       return Size;
7409     }
7410 
7411     // Reference types are ignored for mapping purposes.
7412     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7413       ExprTy = RefTy->getPointeeType().getCanonicalType();
7414 
7415     // Given that an array section is considered a built-in type, we need to
7416     // do the calculation based on the length of the section instead of relying
7417     // on CGF.getTypeSize(E->getType()).
7418     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7419       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7420                             OAE->getBase()->IgnoreParenImpCasts())
7421                             .getCanonicalType();
7422 
7423       // If there is no length associated with the expression and lower bound is
7424       // not specified too, that means we are using the whole length of the
7425       // base.
7426       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7427           !OAE->getLowerBound())
7428         return CGF.getTypeSize(BaseTy);
7429 
7430       llvm::Value *ElemSize;
7431       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7432         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7433       } else {
7434         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7435         assert(ATy && "Expecting array type if not a pointer type.");
7436         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7437       }
7438 
7439       // If we don't have a length at this point, that is because we have an
7440       // array section with a single element.
7441       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7442         return ElemSize;
7443 
7444       if (const Expr *LenExpr = OAE->getLength()) {
7445         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7446         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7447                                              CGF.getContext().getSizeType(),
7448                                              LenExpr->getExprLoc());
7449         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7450       }
7451       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7452              OAE->getLowerBound() && "expected array_section[lb:].");
7453       // Size = sizetype - lb * elemtype;
7454       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7455       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7456       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7457                                        CGF.getContext().getSizeType(),
7458                                        OAE->getLowerBound()->getExprLoc());
7459       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7460       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7461       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7462       LengthVal = CGF.Builder.CreateSelect(
7463           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7464       return LengthVal;
7465     }
7466     return CGF.getTypeSize(ExprTy);
7467   }
7468 
7469   /// Return the corresponding bits for a given map clause modifier. Add
7470   /// a flag marking the map as a pointer if requested. Add a flag marking the
7471   /// map as the first one of a series of maps that relate to the same map
7472   /// expression.
getMapTypeBits(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,ArrayRef<OpenMPMotionModifierKind> MotionModifiers,bool IsImplicit,bool AddPtrFlag,bool AddIsTargetParamFlag,bool IsNonContiguous) const7473   OpenMPOffloadMappingFlags getMapTypeBits(
7474       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7475       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7476       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7477     OpenMPOffloadMappingFlags Bits =
7478         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7479     switch (MapType) {
7480     case OMPC_MAP_alloc:
7481     case OMPC_MAP_release:
7482       // alloc and release is the default behavior in the runtime library,  i.e.
7483       // if we don't pass any bits alloc/release that is what the runtime is
7484       // going to do. Therefore, we don't need to signal anything for these two
7485       // type modifiers.
7486       break;
7487     case OMPC_MAP_to:
7488       Bits |= OMP_MAP_TO;
7489       break;
7490     case OMPC_MAP_from:
7491       Bits |= OMP_MAP_FROM;
7492       break;
7493     case OMPC_MAP_tofrom:
7494       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7495       break;
7496     case OMPC_MAP_delete:
7497       Bits |= OMP_MAP_DELETE;
7498       break;
7499     case OMPC_MAP_unknown:
7500       llvm_unreachable("Unexpected map type!");
7501     }
7502     if (AddPtrFlag)
7503       Bits |= OMP_MAP_PTR_AND_OBJ;
7504     if (AddIsTargetParamFlag)
7505       Bits |= OMP_MAP_TARGET_PARAM;
7506     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7507       Bits |= OMP_MAP_ALWAYS;
7508     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7509       Bits |= OMP_MAP_CLOSE;
7510     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7511         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7512       Bits |= OMP_MAP_PRESENT;
7513     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7514       Bits |= OMP_MAP_OMPX_HOLD;
7515     if (IsNonContiguous)
7516       Bits |= OMP_MAP_NON_CONTIG;
7517     return Bits;
7518   }
7519 
7520   /// Return true if the provided expression is a final array section. A
7521   /// final array section, is one whose length can't be proved to be one.
isFinalArraySectionExpression(const Expr * E) const7522   bool isFinalArraySectionExpression(const Expr *E) const {
7523     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7524 
7525     // It is not an array section and therefore not a unity-size one.
7526     if (!OASE)
7527       return false;
7528 
7529     // An array section with no colon always refer to a single element.
7530     if (OASE->getColonLocFirst().isInvalid())
7531       return false;
7532 
7533     const Expr *Length = OASE->getLength();
7534 
7535     // If we don't have a length we have to check if the array has size 1
7536     // for this dimension. Also, we should always expect a length if the
7537     // base type is pointer.
7538     if (!Length) {
7539       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7540                              OASE->getBase()->IgnoreParenImpCasts())
7541                              .getCanonicalType();
7542       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7543         return ATy->getSize().getSExtValue() != 1;
7544       // If we don't have a constant dimension length, we have to consider
7545       // the current section as having any size, so it is not necessarily
7546       // unitary. If it happen to be unity size, that's user fault.
7547       return true;
7548     }
7549 
7550     // Check if the length evaluates to 1.
7551     Expr::EvalResult Result;
7552     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7553       return true; // Can have more that size 1.
7554 
7555     llvm::APSInt ConstLength = Result.Val.getInt();
7556     return ConstLength.getSExtValue() != 1;
7557   }
7558 
7559   /// Generate the base pointers, section pointers, sizes, map type bits, and
7560   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7561   /// map type, map or motion modifiers, and expression components.
7562   /// \a IsFirstComponent should be set to true if the provided set of
7563   /// components is the first associated with a capture.
generateInfoForComponentList(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,ArrayRef<OpenMPMotionModifierKind> MotionModifiers,OMPClauseMappableExprCommon::MappableExprComponentListRef Components,MapCombinedInfoTy & CombinedInfo,StructRangeInfoTy & PartialStruct,bool IsFirstComponentList,bool IsImplicit,const ValueDecl * Mapper=nullptr,bool ForDeviceAddr=false,const ValueDecl * BaseDecl=nullptr,const Expr * MapExpr=nullptr,ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedElements=llvm::None) const7564   void generateInfoForComponentList(
7565       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7566       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7567       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7568       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7569       bool IsFirstComponentList, bool IsImplicit,
7570       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7571       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7572       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7573           OverlappedElements = llvm::None) const {
7574     // The following summarizes what has to be generated for each map and the
7575     // types below. The generated information is expressed in this order:
7576     // base pointer, section pointer, size, flags
7577     // (to add to the ones that come from the map type and modifier).
7578     //
7579     // double d;
7580     // int i[100];
7581     // float *p;
7582     //
7583     // struct S1 {
7584     //   int i;
7585     //   float f[50];
7586     // }
7587     // struct S2 {
7588     //   int i;
7589     //   float f[50];
7590     //   S1 s;
7591     //   double *p;
7592     //   struct S2 *ps;
7593     //   int &ref;
7594     // }
7595     // S2 s;
7596     // S2 *ps;
7597     //
7598     // map(d)
7599     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7600     //
7601     // map(i)
7602     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7603     //
7604     // map(i[1:23])
7605     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7606     //
7607     // map(p)
7608     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7609     //
7610     // map(p[1:24])
7611     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7612     // in unified shared memory mode or for local pointers
7613     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7614     //
7615     // map(s)
7616     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7617     //
7618     // map(s.i)
7619     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7620     //
7621     // map(s.s.f)
7622     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7623     //
7624     // map(s.p)
7625     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7626     //
7627     // map(to: s.p[:22])
7628     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7629     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7630     // &(s.p), &(s.p[0]), 22*sizeof(double),
7631     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7632     // (*) alloc space for struct members, only this is a target parameter
7633     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7634     //      optimizes this entry out, same in the examples below)
7635     // (***) map the pointee (map: to)
7636     //
7637     // map(to: s.ref)
7638     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7639     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7640     // (*) alloc space for struct members, only this is a target parameter
7641     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7642     //      optimizes this entry out, same in the examples below)
7643     // (***) map the pointee (map: to)
7644     //
7645     // map(s.ps)
7646     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7647     //
7648     // map(from: s.ps->s.i)
7649     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7650     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7651     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7652     //
7653     // map(to: s.ps->ps)
7654     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7655     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7656     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7657     //
7658     // map(s.ps->ps->ps)
7659     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7660     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7661     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7662     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7663     //
7664     // map(to: s.ps->ps->s.f[:22])
7665     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7666     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7667     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7668     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7669     //
7670     // map(ps)
7671     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7672     //
7673     // map(ps->i)
7674     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7675     //
7676     // map(ps->s.f)
7677     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7678     //
7679     // map(from: ps->p)
7680     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7681     //
7682     // map(to: ps->p[:22])
7683     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7684     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7685     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7686     //
7687     // map(ps->ps)
7688     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7689     //
7690     // map(from: ps->ps->s.i)
7691     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7692     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7693     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7694     //
7695     // map(from: ps->ps->ps)
7696     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7697     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7698     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7699     //
7700     // map(ps->ps->ps->ps)
7701     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7702     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7703     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7704     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7705     //
7706     // map(to: ps->ps->ps->s.f[:22])
7707     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7708     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7709     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7710     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7711     //
7712     // map(to: s.f[:22]) map(from: s.p[:33])
7713     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7714     //     sizeof(double*) (**), TARGET_PARAM
7715     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7716     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7717     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7718     // (*) allocate contiguous space needed to fit all mapped members even if
7719     //     we allocate space for members not mapped (in this example,
7720     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7721     //     them as well because they fall between &s.f[0] and &s.p)
7722     //
7723     // map(from: s.f[:22]) map(to: ps->p[:33])
7724     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7725     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7726     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7727     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7728     // (*) the struct this entry pertains to is the 2nd element in the list of
7729     //     arguments, hence MEMBER_OF(2)
7730     //
7731     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7732     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7733     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7734     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7735     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7736     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7737     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7738     // (*) the struct this entry pertains to is the 4th element in the list
7739     //     of arguments, hence MEMBER_OF(4)
7740 
7741     // Track if the map information being generated is the first for a capture.
7742     bool IsCaptureFirstInfo = IsFirstComponentList;
7743     // When the variable is on a declare target link or in a to clause with
7744     // unified memory, a reference is needed to hold the host/device address
7745     // of the variable.
7746     bool RequiresReference = false;
7747 
7748     // Scan the components from the base to the complete expression.
7749     auto CI = Components.rbegin();
7750     auto CE = Components.rend();
7751     auto I = CI;
7752 
7753     // Track if the map information being generated is the first for a list of
7754     // components.
7755     bool IsExpressionFirstInfo = true;
7756     bool FirstPointerInComplexData = false;
7757     Address BP = Address::invalid();
7758     const Expr *AssocExpr = I->getAssociatedExpression();
7759     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7760     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7761     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7762 
7763     if (isa<MemberExpr>(AssocExpr)) {
7764       // The base is the 'this' pointer. The content of the pointer is going
7765       // to be the base of the field being mapped.
7766       BP = CGF.LoadCXXThisAddress();
7767     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7768                (OASE &&
7769                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7770       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7771     } else if (OAShE &&
7772                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7773       BP = Address(
7774           CGF.EmitScalarExpr(OAShE->getBase()),
7775           CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7776           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7777     } else {
7778       // The base is the reference to the variable.
7779       // BP = &Var.
7780       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7781       if (const auto *VD =
7782               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7783         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7784                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7785           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7786               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7787                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7788             RequiresReference = true;
7789             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7790           }
7791         }
7792       }
7793 
7794       // If the variable is a pointer and is being dereferenced (i.e. is not
7795       // the last component), the base has to be the pointer itself, not its
7796       // reference. References are ignored for mapping purposes.
7797       QualType Ty =
7798           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7799       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7800         // No need to generate individual map information for the pointer, it
7801         // can be associated with the combined storage if shared memory mode is
7802         // active or the base declaration is not global variable.
7803         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7804         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7805             !VD || VD->hasLocalStorage())
7806           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7807         else
7808           FirstPointerInComplexData = true;
7809         ++I;
7810       }
7811     }
7812 
7813     // Track whether a component of the list should be marked as MEMBER_OF some
7814     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7815     // in a component list should be marked as MEMBER_OF, all subsequent entries
7816     // do not belong to the base struct. E.g.
7817     // struct S2 s;
7818     // s.ps->ps->ps->f[:]
7819     //   (1) (2) (3) (4)
7820     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7821     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7822     // is the pointee of ps(2) which is not member of struct s, so it should not
7823     // be marked as such (it is still PTR_AND_OBJ).
7824     // The variable is initialized to false so that PTR_AND_OBJ entries which
7825     // are not struct members are not considered (e.g. array of pointers to
7826     // data).
7827     bool ShouldBeMemberOf = false;
7828 
7829     // Variable keeping track of whether or not we have encountered a component
7830     // in the component list which is a member expression. Useful when we have a
7831     // pointer or a final array section, in which case it is the previous
7832     // component in the list which tells us whether we have a member expression.
7833     // E.g. X.f[:]
7834     // While processing the final array section "[:]" it is "f" which tells us
7835     // whether we are dealing with a member of a declared struct.
7836     const MemberExpr *EncounteredME = nullptr;
7837 
7838     // Track for the total number of dimension. Start from one for the dummy
7839     // dimension.
7840     uint64_t DimSize = 1;
7841 
7842     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7843     bool IsPrevMemberReference = false;
7844 
7845     for (; I != CE; ++I) {
7846       // If the current component is member of a struct (parent struct) mark it.
7847       if (!EncounteredME) {
7848         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7849         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7850         // as MEMBER_OF the parent struct.
7851         if (EncounteredME) {
7852           ShouldBeMemberOf = true;
7853           // Do not emit as complex pointer if this is actually not array-like
7854           // expression.
7855           if (FirstPointerInComplexData) {
7856             QualType Ty = std::prev(I)
7857                               ->getAssociatedDeclaration()
7858                               ->getType()
7859                               .getNonReferenceType();
7860             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7861             FirstPointerInComplexData = false;
7862           }
7863         }
7864       }
7865 
7866       auto Next = std::next(I);
7867 
7868       // We need to generate the addresses and sizes if this is the last
7869       // component, if the component is a pointer or if it is an array section
7870       // whose length can't be proved to be one. If this is a pointer, it
7871       // becomes the base address for the following components.
7872 
7873       // A final array section, is one whose length can't be proved to be one.
7874       // If the map item is non-contiguous then we don't treat any array section
7875       // as final array section.
7876       bool IsFinalArraySection =
7877           !IsNonContiguous &&
7878           isFinalArraySectionExpression(I->getAssociatedExpression());
7879 
7880       // If we have a declaration for the mapping use that, otherwise use
7881       // the base declaration of the map clause.
7882       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7883                                      ? I->getAssociatedDeclaration()
7884                                      : BaseDecl;
7885       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7886                                                : MapExpr;
7887 
7888       // Get information on whether the element is a pointer. Have to do a
7889       // special treatment for array sections given that they are built-in
7890       // types.
7891       const auto *OASE =
7892           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7893       const auto *OAShE =
7894           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7895       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7896       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7897       bool IsPointer =
7898           OAShE ||
7899           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7900                        .getCanonicalType()
7901                        ->isAnyPointerType()) ||
7902           I->getAssociatedExpression()->getType()->isAnyPointerType();
7903       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7904                                MapDecl &&
7905                                MapDecl->getType()->isLValueReferenceType();
7906       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7907 
7908       if (OASE)
7909         ++DimSize;
7910 
7911       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7912           IsFinalArraySection) {
7913         // If this is not the last component, we expect the pointer to be
7914         // associated with an array expression or member expression.
7915         assert((Next == CE ||
7916                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7917                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7918                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7919                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7920                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7921                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7922                "Unexpected expression");
7923 
7924         Address LB = Address::invalid();
7925         Address LowestElem = Address::invalid();
7926         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7927                                        const MemberExpr *E) {
7928           const Expr *BaseExpr = E->getBase();
7929           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7930           // scalar.
7931           LValue BaseLV;
7932           if (E->isArrow()) {
7933             LValueBaseInfo BaseInfo;
7934             TBAAAccessInfo TBAAInfo;
7935             Address Addr =
7936                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7937             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7938             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7939           } else {
7940             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7941           }
7942           return BaseLV;
7943         };
7944         if (OAShE) {
7945           LowestElem = LB =
7946               Address(CGF.EmitScalarExpr(OAShE->getBase()),
7947                       CGF.ConvertTypeForMem(
7948                           OAShE->getBase()->getType()->getPointeeType()),
7949                       CGF.getContext().getTypeAlignInChars(
7950                           OAShE->getBase()->getType()));
7951         } else if (IsMemberReference) {
7952           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7953           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7954           LowestElem = CGF.EmitLValueForFieldInitialization(
7955                               BaseLVal, cast<FieldDecl>(MapDecl))
7956                            .getAddress(CGF);
7957           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7958                    .getAddress(CGF);
7959         } else {
7960           LowestElem = LB =
7961               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7962                   .getAddress(CGF);
7963         }
7964 
7965         // If this component is a pointer inside the base struct then we don't
7966         // need to create any entry for it - it will be combined with the object
7967         // it is pointing to into a single PTR_AND_OBJ entry.
7968         bool IsMemberPointerOrAddr =
7969             EncounteredME &&
7970             (((IsPointer || ForDeviceAddr) &&
7971               I->getAssociatedExpression() == EncounteredME) ||
7972              (IsPrevMemberReference && !IsPointer) ||
7973              (IsMemberReference && Next != CE &&
7974               !Next->getAssociatedExpression()->getType()->isPointerType()));
7975         if (!OverlappedElements.empty() && Next == CE) {
7976           // Handle base element with the info for overlapped elements.
7977           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7978           assert(!IsPointer &&
7979                  "Unexpected base element with the pointer type.");
7980           // Mark the whole struct as the struct that requires allocation on the
7981           // device.
7982           PartialStruct.LowestElem = {0, LowestElem};
7983           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7984               I->getAssociatedExpression()->getType());
7985           Address HB = CGF.Builder.CreateConstGEP(
7986               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7987                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7988               TypeSize.getQuantity() - 1);
7989           PartialStruct.HighestElem = {
7990               std::numeric_limits<decltype(
7991                   PartialStruct.HighestElem.first)>::max(),
7992               HB};
7993           PartialStruct.Base = BP;
7994           PartialStruct.LB = LB;
7995           assert(
7996               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7997               "Overlapped elements must be used only once for the variable.");
7998           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7999           // Emit data for non-overlapped data.
8000           OpenMPOffloadMappingFlags Flags =
8001               OMP_MAP_MEMBER_OF |
8002               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8003                              /*AddPtrFlag=*/false,
8004                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8005           llvm::Value *Size = nullptr;
8006           // Do bitcopy of all non-overlapped structure elements.
8007           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8008                    Component : OverlappedElements) {
8009             Address ComponentLB = Address::invalid();
8010             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8011                  Component) {
8012               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8013                 const auto *FD = dyn_cast<FieldDecl>(VD);
8014                 if (FD && FD->getType()->isLValueReferenceType()) {
8015                   const auto *ME =
8016                       cast<MemberExpr>(MC.getAssociatedExpression());
8017                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8018                   ComponentLB =
8019                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8020                           .getAddress(CGF);
8021                 } else {
8022                   ComponentLB =
8023                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8024                           .getAddress(CGF);
8025                 }
8026                 Size = CGF.Builder.CreatePtrDiff(
8027                     CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8028                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8029                 break;
8030               }
8031             }
8032             assert(Size && "Failed to determine structure size");
8033             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8034             CombinedInfo.BasePointers.push_back(BP.getPointer());
8035             CombinedInfo.Pointers.push_back(LB.getPointer());
8036             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8037                 Size, CGF.Int64Ty, /*isSigned=*/true));
8038             CombinedInfo.Types.push_back(Flags);
8039             CombinedInfo.Mappers.push_back(nullptr);
8040             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8041                                                                       : 1);
8042             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8043           }
8044           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8045           CombinedInfo.BasePointers.push_back(BP.getPointer());
8046           CombinedInfo.Pointers.push_back(LB.getPointer());
8047           Size = CGF.Builder.CreatePtrDiff(
8048               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8049               CGF.EmitCastToVoidPtr(LB.getPointer()));
8050           CombinedInfo.Sizes.push_back(
8051               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8052           CombinedInfo.Types.push_back(Flags);
8053           CombinedInfo.Mappers.push_back(nullptr);
8054           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8055                                                                     : 1);
8056           break;
8057         }
8058         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8059         if (!IsMemberPointerOrAddr ||
8060             (Next == CE && MapType != OMPC_MAP_unknown)) {
8061           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8062           CombinedInfo.BasePointers.push_back(BP.getPointer());
8063           CombinedInfo.Pointers.push_back(LB.getPointer());
8064           CombinedInfo.Sizes.push_back(
8065               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8066           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8067                                                                     : 1);
8068 
8069           // If Mapper is valid, the last component inherits the mapper.
8070           bool HasMapper = Mapper && Next == CE;
8071           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8072 
8073           // We need to add a pointer flag for each map that comes from the
8074           // same expression except for the first one. We also need to signal
8075           // this map is the first one that relates with the current capture
8076           // (there is a set of entries for each capture).
8077           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8078               MapType, MapModifiers, MotionModifiers, IsImplicit,
8079               !IsExpressionFirstInfo || RequiresReference ||
8080                   FirstPointerInComplexData || IsMemberReference,
8081               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8082 
8083           if (!IsExpressionFirstInfo || IsMemberReference) {
8084             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8085             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8086             if (IsPointer || (IsMemberReference && Next != CE))
8087               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8088                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8089 
8090             if (ShouldBeMemberOf) {
8091               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8092               // should be later updated with the correct value of MEMBER_OF.
8093               Flags |= OMP_MAP_MEMBER_OF;
8094               // From now on, all subsequent PTR_AND_OBJ entries should not be
8095               // marked as MEMBER_OF.
8096               ShouldBeMemberOf = false;
8097             }
8098           }
8099 
8100           CombinedInfo.Types.push_back(Flags);
8101         }
8102 
8103         // If we have encountered a member expression so far, keep track of the
8104         // mapped member. If the parent is "*this", then the value declaration
8105         // is nullptr.
8106         if (EncounteredME) {
8107           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8108           unsigned FieldIndex = FD->getFieldIndex();
8109 
8110           // Update info about the lowest and highest elements for this struct
8111           if (!PartialStruct.Base.isValid()) {
8112             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8113             if (IsFinalArraySection) {
8114               Address HB =
8115                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8116                       .getAddress(CGF);
8117               PartialStruct.HighestElem = {FieldIndex, HB};
8118             } else {
8119               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8120             }
8121             PartialStruct.Base = BP;
8122             PartialStruct.LB = BP;
8123           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8124             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8125           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8126             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8127           }
8128         }
8129 
8130         // Need to emit combined struct for array sections.
8131         if (IsFinalArraySection || IsNonContiguous)
8132           PartialStruct.IsArraySection = true;
8133 
8134         // If we have a final array section, we are done with this expression.
8135         if (IsFinalArraySection)
8136           break;
8137 
8138         // The pointer becomes the base for the next element.
8139         if (Next != CE)
8140           BP = IsMemberReference ? LowestElem : LB;
8141 
8142         IsExpressionFirstInfo = false;
8143         IsCaptureFirstInfo = false;
8144         FirstPointerInComplexData = false;
8145         IsPrevMemberReference = IsMemberReference;
8146       } else if (FirstPointerInComplexData) {
8147         QualType Ty = Components.rbegin()
8148                           ->getAssociatedDeclaration()
8149                           ->getType()
8150                           .getNonReferenceType();
8151         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8152         FirstPointerInComplexData = false;
8153       }
8154     }
8155     // If ran into the whole component - allocate the space for the whole
8156     // record.
8157     if (!EncounteredME)
8158       PartialStruct.HasCompleteRecord = true;
8159 
8160     if (!IsNonContiguous)
8161       return;
8162 
8163     const ASTContext &Context = CGF.getContext();
8164 
8165     // For supporting stride in array section, we need to initialize the first
8166     // dimension size as 1, first offset as 0, and first count as 1
8167     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8168     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8169     MapValuesArrayTy CurStrides;
8170     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8171     uint64_t ElementTypeSize;
8172 
8173     // Collect Size information for each dimension and get the element size as
8174     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8175     // should be [10, 10] and the first stride is 4 btyes.
8176     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8177          Components) {
8178       const Expr *AssocExpr = Component.getAssociatedExpression();
8179       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8180 
8181       if (!OASE)
8182         continue;
8183 
8184       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8185       auto *CAT = Context.getAsConstantArrayType(Ty);
8186       auto *VAT = Context.getAsVariableArrayType(Ty);
8187 
8188       // We need all the dimension size except for the last dimension.
8189       assert((VAT || CAT || &Component == &*Components.begin()) &&
8190              "Should be either ConstantArray or VariableArray if not the "
8191              "first Component");
8192 
8193       // Get element size if CurStrides is empty.
8194       if (CurStrides.empty()) {
8195         const Type *ElementType = nullptr;
8196         if (CAT)
8197           ElementType = CAT->getElementType().getTypePtr();
8198         else if (VAT)
8199           ElementType = VAT->getElementType().getTypePtr();
8200         else
8201           assert(&Component == &*Components.begin() &&
8202                  "Only expect pointer (non CAT or VAT) when this is the "
8203                  "first Component");
8204         // If ElementType is null, then it means the base is a pointer
8205         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8206         // for next iteration.
8207         if (ElementType) {
8208           // For the case that having pointer as base, we need to remove one
8209           // level of indirection.
8210           if (&Component != &*Components.begin())
8211             ElementType = ElementType->getPointeeOrArrayElementType();
8212           ElementTypeSize =
8213               Context.getTypeSizeInChars(ElementType).getQuantity();
8214           CurStrides.push_back(
8215               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8216         }
8217       }
8218       // Get dimension value except for the last dimension since we don't need
8219       // it.
8220       if (DimSizes.size() < Components.size() - 1) {
8221         if (CAT)
8222           DimSizes.push_back(llvm::ConstantInt::get(
8223               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8224         else if (VAT)
8225           DimSizes.push_back(CGF.Builder.CreateIntCast(
8226               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8227               /*IsSigned=*/false));
8228       }
8229     }
8230 
8231     // Skip the dummy dimension since we have already have its information.
8232     auto *DI = DimSizes.begin() + 1;
8233     // Product of dimension.
8234     llvm::Value *DimProd =
8235         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8236 
8237     // Collect info for non-contiguous. Notice that offset, count, and stride
8238     // are only meaningful for array-section, so we insert a null for anything
8239     // other than array-section.
8240     // Also, the size of offset, count, and stride are not the same as
8241     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8242     // count, and stride are the same as the number of non-contiguous
8243     // declaration in target update to/from clause.
8244     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8245          Components) {
8246       const Expr *AssocExpr = Component.getAssociatedExpression();
8247 
8248       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8249         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8250             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8251             /*isSigned=*/false);
8252         CurOffsets.push_back(Offset);
8253         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8254         CurStrides.push_back(CurStrides.back());
8255         continue;
8256       }
8257 
8258       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8259 
8260       if (!OASE)
8261         continue;
8262 
8263       // Offset
8264       const Expr *OffsetExpr = OASE->getLowerBound();
8265       llvm::Value *Offset = nullptr;
8266       if (!OffsetExpr) {
8267         // If offset is absent, then we just set it to zero.
8268         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8269       } else {
8270         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8271                                            CGF.Int64Ty,
8272                                            /*isSigned=*/false);
8273       }
8274       CurOffsets.push_back(Offset);
8275 
8276       // Count
8277       const Expr *CountExpr = OASE->getLength();
8278       llvm::Value *Count = nullptr;
8279       if (!CountExpr) {
8280         // In Clang, once a high dimension is an array section, we construct all
8281         // the lower dimension as array section, however, for case like
8282         // arr[0:2][2], Clang construct the inner dimension as an array section
8283         // but it actually is not in an array section form according to spec.
8284         if (!OASE->getColonLocFirst().isValid() &&
8285             !OASE->getColonLocSecond().isValid()) {
8286           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8287         } else {
8288           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8289           // When the length is absent it defaults to ⌈(size −
8290           // lower-bound)/stride⌉, where size is the size of the array
8291           // dimension.
8292           const Expr *StrideExpr = OASE->getStride();
8293           llvm::Value *Stride =
8294               StrideExpr
8295                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8296                                               CGF.Int64Ty, /*isSigned=*/false)
8297                   : nullptr;
8298           if (Stride)
8299             Count = CGF.Builder.CreateUDiv(
8300                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8301           else
8302             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8303         }
8304       } else {
8305         Count = CGF.EmitScalarExpr(CountExpr);
8306       }
8307       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8308       CurCounts.push_back(Count);
8309 
8310       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8311       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8312       //              Offset      Count     Stride
8313       //    D0          0           1         4    (int)    <- dummy dimension
8314       //    D1          0           2         8    (2 * (1) * 4)
8315       //    D2          1           2         20   (1 * (1 * 5) * 4)
8316       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8317       const Expr *StrideExpr = OASE->getStride();
8318       llvm::Value *Stride =
8319           StrideExpr
8320               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8321                                           CGF.Int64Ty, /*isSigned=*/false)
8322               : nullptr;
8323       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8324       if (Stride)
8325         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8326       else
8327         CurStrides.push_back(DimProd);
8328       if (DI != DimSizes.end())
8329         ++DI;
8330     }
8331 
8332     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8333     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8334     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8335   }
8336 
8337   /// Return the adjusted map modifiers if the declaration a capture refers to
8338   /// appears in a first-private clause. This is expected to be used only with
8339   /// directives that start with 'target'.
8340   MappableExprsHandler::OpenMPOffloadMappingFlags
getMapModifiersForPrivateClauses(const CapturedStmt::Capture & Cap) const8341   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8342     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8343 
8344     // A first private variable captured by reference will use only the
8345     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8346     // declaration is known as first-private in this handler.
8347     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8348       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8349         return MappableExprsHandler::OMP_MAP_TO |
8350                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8351       return MappableExprsHandler::OMP_MAP_PRIVATE |
8352              MappableExprsHandler::OMP_MAP_TO;
8353     }
8354     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8355     if (I != LambdasMap.end())
8356       // for map(to: lambda): using user specified map type.
8357       return getMapTypeBits(
8358           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8359           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8360           /*AddPtrFlag=*/false,
8361           /*AddIsTargetParamFlag=*/false,
8362           /*isNonContiguous=*/false);
8363     return MappableExprsHandler::OMP_MAP_TO |
8364            MappableExprsHandler::OMP_MAP_FROM;
8365   }
8366 
getMemberOfFlag(unsigned Position)8367   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8368     // Rotate by getFlagMemberOffset() bits.
8369     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8370                                                   << getFlagMemberOffset());
8371   }
8372 
setCorrectMemberOfFlag(OpenMPOffloadMappingFlags & Flags,OpenMPOffloadMappingFlags MemberOfFlag)8373   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8374                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8375     // If the entry is PTR_AND_OBJ but has not been marked with the special
8376     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8377     // marked as MEMBER_OF.
8378     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8379         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8380       return;
8381 
8382     // Reset the placeholder value to prepare the flag for the assignment of the
8383     // proper MEMBER_OF value.
8384     Flags &= ~OMP_MAP_MEMBER_OF;
8385     Flags |= MemberOfFlag;
8386   }
8387 
getPlainLayout(const CXXRecordDecl * RD,llvm::SmallVectorImpl<const FieldDecl * > & Layout,bool AsBase) const8388   void getPlainLayout(const CXXRecordDecl *RD,
8389                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8390                       bool AsBase) const {
8391     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8392 
8393     llvm::StructType *St =
8394         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8395 
8396     unsigned NumElements = St->getNumElements();
8397     llvm::SmallVector<
8398         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8399         RecordLayout(NumElements);
8400 
8401     // Fill bases.
8402     for (const auto &I : RD->bases()) {
8403       if (I.isVirtual())
8404         continue;
8405       const auto *Base = I.getType()->getAsCXXRecordDecl();
8406       // Ignore empty bases.
8407       if (Base->isEmpty() || CGF.getContext()
8408                                  .getASTRecordLayout(Base)
8409                                  .getNonVirtualSize()
8410                                  .isZero())
8411         continue;
8412 
8413       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8414       RecordLayout[FieldIndex] = Base;
8415     }
8416     // Fill in virtual bases.
8417     for (const auto &I : RD->vbases()) {
8418       const auto *Base = I.getType()->getAsCXXRecordDecl();
8419       // Ignore empty bases.
8420       if (Base->isEmpty())
8421         continue;
8422       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8423       if (RecordLayout[FieldIndex])
8424         continue;
8425       RecordLayout[FieldIndex] = Base;
8426     }
8427     // Fill in all the fields.
8428     assert(!RD->isUnion() && "Unexpected union.");
8429     for (const auto *Field : RD->fields()) {
8430       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8431       // will fill in later.)
8432       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8433         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8434         RecordLayout[FieldIndex] = Field;
8435       }
8436     }
8437     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8438              &Data : RecordLayout) {
8439       if (Data.isNull())
8440         continue;
8441       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8442         getPlainLayout(Base, Layout, /*AsBase=*/true);
8443       else
8444         Layout.push_back(Data.get<const FieldDecl *>());
8445     }
8446   }
8447 
8448   /// Generate all the base pointers, section pointers, sizes, map types, and
8449   /// mappers for the extracted mappable expressions (all included in \a
8450   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8451   /// pair of the relevant declaration and index where it occurs is appended to
8452   /// the device pointers info array.
generateAllInfoForClauses(ArrayRef<const OMPClause * > Clauses,MapCombinedInfoTy & CombinedInfo,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkipVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ()) const8453   void generateAllInfoForClauses(
8454       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8455       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8456           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8457     // We have to process the component lists that relate with the same
8458     // declaration in a single chunk so that we can generate the map flags
8459     // correctly. Therefore, we organize all lists in a map.
8460     enum MapKind { Present, Allocs, Other, Total };
8461     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8462                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8463         Info;
8464 
8465     // Helper function to fill the information map for the different supported
8466     // clauses.
8467     auto &&InfoGen =
8468         [&Info, &SkipVarSet](
8469             const ValueDecl *D, MapKind Kind,
8470             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8471             OpenMPMapClauseKind MapType,
8472             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8473             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8474             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8475             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8476           if (SkipVarSet.contains(D))
8477             return;
8478           auto It = Info.find(D);
8479           if (It == Info.end())
8480             It = Info
8481                      .insert(std::make_pair(
8482                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8483                      .first;
8484           It->second[Kind].emplace_back(
8485               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8486               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8487         };
8488 
8489     for (const auto *Cl : Clauses) {
8490       const auto *C = dyn_cast<OMPMapClause>(Cl);
8491       if (!C)
8492         continue;
8493       MapKind Kind = Other;
8494       if (llvm::is_contained(C->getMapTypeModifiers(),
8495                              OMPC_MAP_MODIFIER_present))
8496         Kind = Present;
8497       else if (C->getMapType() == OMPC_MAP_alloc)
8498         Kind = Allocs;
8499       const auto *EI = C->getVarRefs().begin();
8500       for (const auto L : C->component_lists()) {
8501         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8502         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8503                 C->getMapTypeModifiers(), llvm::None,
8504                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8505                 E);
8506         ++EI;
8507       }
8508     }
8509     for (const auto *Cl : Clauses) {
8510       const auto *C = dyn_cast<OMPToClause>(Cl);
8511       if (!C)
8512         continue;
8513       MapKind Kind = Other;
8514       if (llvm::is_contained(C->getMotionModifiers(),
8515                              OMPC_MOTION_MODIFIER_present))
8516         Kind = Present;
8517       const auto *EI = C->getVarRefs().begin();
8518       for (const auto L : C->component_lists()) {
8519         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8520                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8521                 C->isImplicit(), std::get<2>(L), *EI);
8522         ++EI;
8523       }
8524     }
8525     for (const auto *Cl : Clauses) {
8526       const auto *C = dyn_cast<OMPFromClause>(Cl);
8527       if (!C)
8528         continue;
8529       MapKind Kind = Other;
8530       if (llvm::is_contained(C->getMotionModifiers(),
8531                              OMPC_MOTION_MODIFIER_present))
8532         Kind = Present;
8533       const auto *EI = C->getVarRefs().begin();
8534       for (const auto L : C->component_lists()) {
8535         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8536                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8537                 C->isImplicit(), std::get<2>(L), *EI);
8538         ++EI;
8539       }
8540     }
8541 
8542     // Look at the use_device_ptr clause information and mark the existing map
8543     // entries as such. If there is no map information for an entry in the
8544     // use_device_ptr list, we create one with map type 'alloc' and zero size
8545     // section. It is the user fault if that was not mapped before. If there is
8546     // no map information and the pointer is a struct member, then we defer the
8547     // emission of that entry until the whole struct has been processed.
8548     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8549                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8550         DeferredInfo;
8551     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8552 
8553     for (const auto *Cl : Clauses) {
8554       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8555       if (!C)
8556         continue;
8557       for (const auto L : C->component_lists()) {
8558         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8559             std::get<1>(L);
8560         assert(!Components.empty() &&
8561                "Not expecting empty list of components!");
8562         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8563         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8564         const Expr *IE = Components.back().getAssociatedExpression();
8565         // If the first component is a member expression, we have to look into
8566         // 'this', which maps to null in the map of map information. Otherwise
8567         // look directly for the information.
8568         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8569 
8570         // We potentially have map information for this declaration already.
8571         // Look for the first set of components that refer to it.
8572         if (It != Info.end()) {
8573           bool Found = false;
8574           for (auto &Data : It->second) {
8575             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8576               return MI.Components.back().getAssociatedDeclaration() == VD;
8577             });
8578             // If we found a map entry, signal that the pointer has to be
8579             // returned and move on to the next declaration. Exclude cases where
8580             // the base pointer is mapped as array subscript, array section or
8581             // array shaping. The base address is passed as a pointer to base in
8582             // this case and cannot be used as a base for use_device_ptr list
8583             // item.
8584             if (CI != Data.end()) {
8585               auto PrevCI = std::next(CI->Components.rbegin());
8586               const auto *VarD = dyn_cast<VarDecl>(VD);
8587               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8588                   isa<MemberExpr>(IE) ||
8589                   !VD->getType().getNonReferenceType()->isPointerType() ||
8590                   PrevCI == CI->Components.rend() ||
8591                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8592                   VarD->hasLocalStorage()) {
8593                 CI->ReturnDevicePointer = true;
8594                 Found = true;
8595                 break;
8596               }
8597             }
8598           }
8599           if (Found)
8600             continue;
8601         }
8602 
8603         // We didn't find any match in our map information - generate a zero
8604         // size array section - if the pointer is a struct member we defer this
8605         // action until the whole struct has been processed.
8606         if (isa<MemberExpr>(IE)) {
8607           // Insert the pointer into Info to be processed by
8608           // generateInfoForComponentList. Because it is a member pointer
8609           // without a pointee, no entry will be generated for it, therefore
8610           // we need to generate one after the whole struct has been processed.
8611           // Nonetheless, generateInfoForComponentList must be called to take
8612           // the pointer into account for the calculation of the range of the
8613           // partial struct.
8614           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8615                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8616                   nullptr);
8617           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8618         } else {
8619           llvm::Value *Ptr =
8620               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8621           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8622           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8623           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8624           UseDevicePtrCombinedInfo.Sizes.push_back(
8625               llvm::Constant::getNullValue(CGF.Int64Ty));
8626           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8627           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8628         }
8629       }
8630     }
8631 
8632     // Look at the use_device_addr clause information and mark the existing map
8633     // entries as such. If there is no map information for an entry in the
8634     // use_device_addr list, we create one with map type 'alloc' and zero size
8635     // section. It is the user fault if that was not mapped before. If there is
8636     // no map information and the pointer is a struct member, then we defer the
8637     // emission of that entry until the whole struct has been processed.
8638     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8639     for (const auto *Cl : Clauses) {
8640       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8641       if (!C)
8642         continue;
8643       for (const auto L : C->component_lists()) {
8644         assert(!std::get<1>(L).empty() &&
8645                "Not expecting empty list of components!");
8646         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8647         if (!Processed.insert(VD).second)
8648           continue;
8649         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8650         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8651         // If the first component is a member expression, we have to look into
8652         // 'this', which maps to null in the map of map information. Otherwise
8653         // look directly for the information.
8654         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8655 
8656         // We potentially have map information for this declaration already.
8657         // Look for the first set of components that refer to it.
8658         if (It != Info.end()) {
8659           bool Found = false;
8660           for (auto &Data : It->second) {
8661             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8662               return MI.Components.back().getAssociatedDeclaration() == VD;
8663             });
8664             // If we found a map entry, signal that the pointer has to be
8665             // returned and move on to the next declaration.
8666             if (CI != Data.end()) {
8667               CI->ReturnDevicePointer = true;
8668               Found = true;
8669               break;
8670             }
8671           }
8672           if (Found)
8673             continue;
8674         }
8675 
8676         // We didn't find any match in our map information - generate a zero
8677         // size array section - if the pointer is a struct member we defer this
8678         // action until the whole struct has been processed.
8679         if (isa<MemberExpr>(IE)) {
8680           // Insert the pointer into Info to be processed by
8681           // generateInfoForComponentList. Because it is a member pointer
8682           // without a pointee, no entry will be generated for it, therefore
8683           // we need to generate one after the whole struct has been processed.
8684           // Nonetheless, generateInfoForComponentList must be called to take
8685           // the pointer into account for the calculation of the range of the
8686           // partial struct.
8687           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8688                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8689                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8690           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8691         } else {
8692           llvm::Value *Ptr;
8693           if (IE->isGLValue())
8694             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8695           else
8696             Ptr = CGF.EmitScalarExpr(IE);
8697           CombinedInfo.Exprs.push_back(VD);
8698           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8699           CombinedInfo.Pointers.push_back(Ptr);
8700           CombinedInfo.Sizes.push_back(
8701               llvm::Constant::getNullValue(CGF.Int64Ty));
8702           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8703           CombinedInfo.Mappers.push_back(nullptr);
8704         }
8705       }
8706     }
8707 
8708     for (const auto &Data : Info) {
8709       StructRangeInfoTy PartialStruct;
8710       // Temporary generated information.
8711       MapCombinedInfoTy CurInfo;
8712       const Decl *D = Data.first;
8713       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8714       for (const auto &M : Data.second) {
8715         for (const MapInfo &L : M) {
8716           assert(!L.Components.empty() &&
8717                  "Not expecting declaration with no component lists.");
8718 
8719           // Remember the current base pointer index.
8720           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8721           CurInfo.NonContigInfo.IsNonContiguous =
8722               L.Components.back().isNonContiguous();
8723           generateInfoForComponentList(
8724               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8725               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8726               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8727 
8728           // If this entry relates with a device pointer, set the relevant
8729           // declaration and add the 'return pointer' flag.
8730           if (L.ReturnDevicePointer) {
8731             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8732                    "Unexpected number of mapped base pointers.");
8733 
8734             const ValueDecl *RelevantVD =
8735                 L.Components.back().getAssociatedDeclaration();
8736             assert(RelevantVD &&
8737                    "No relevant declaration related with device pointer??");
8738 
8739             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8740                 RelevantVD);
8741             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8742           }
8743         }
8744       }
8745 
8746       // Append any pending zero-length pointers which are struct members and
8747       // used with use_device_ptr or use_device_addr.
8748       auto CI = DeferredInfo.find(Data.first);
8749       if (CI != DeferredInfo.end()) {
8750         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8751           llvm::Value *BasePtr;
8752           llvm::Value *Ptr;
8753           if (L.ForDeviceAddr) {
8754             if (L.IE->isGLValue())
8755               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8756             else
8757               Ptr = this->CGF.EmitScalarExpr(L.IE);
8758             BasePtr = Ptr;
8759             // Entry is RETURN_PARAM. Also, set the placeholder value
8760             // MEMBER_OF=FFFF so that the entry is later updated with the
8761             // correct value of MEMBER_OF.
8762             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8763           } else {
8764             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8765             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8766                                              L.IE->getExprLoc());
8767             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8768             // placeholder value MEMBER_OF=FFFF so that the entry is later
8769             // updated with the correct value of MEMBER_OF.
8770             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8771                                     OMP_MAP_MEMBER_OF);
8772           }
8773           CurInfo.Exprs.push_back(L.VD);
8774           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8775           CurInfo.Pointers.push_back(Ptr);
8776           CurInfo.Sizes.push_back(
8777               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8778           CurInfo.Mappers.push_back(nullptr);
8779         }
8780       }
8781       // If there is an entry in PartialStruct it means we have a struct with
8782       // individual members mapped. Emit an extra combined entry.
8783       if (PartialStruct.Base.isValid()) {
8784         CurInfo.NonContigInfo.Dims.push_back(0);
8785         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8786       }
8787 
8788       // We need to append the results of this capture to what we already
8789       // have.
8790       CombinedInfo.append(CurInfo);
8791     }
8792     // Append data for use_device_ptr clauses.
8793     CombinedInfo.append(UseDevicePtrCombinedInfo);
8794   }
8795 
8796 public:
MappableExprsHandler(const OMPExecutableDirective & Dir,CodeGenFunction & CGF)8797   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8798       : CurDir(&Dir), CGF(CGF) {
8799     // Extract firstprivate clause information.
8800     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8801       for (const auto *D : C->varlists())
8802         FirstPrivateDecls.try_emplace(
8803             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8804     // Extract implicit firstprivates from uses_allocators clauses.
8805     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8806       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8807         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8808         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8809           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8810                                         /*Implicit=*/true);
8811         else if (const auto *VD = dyn_cast<VarDecl>(
8812                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8813                          ->getDecl()))
8814           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8815       }
8816     }
8817     // Extract device pointer clause information.
8818     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8819       for (auto L : C->component_lists())
8820         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8821     // Extract map information.
8822     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8823       if (C->getMapType() != OMPC_MAP_to)
8824         continue;
8825       for (auto L : C->component_lists()) {
8826         const ValueDecl *VD = std::get<0>(L);
8827         const auto *RD = VD ? VD->getType()
8828                                   .getCanonicalType()
8829                                   .getNonReferenceType()
8830                                   ->getAsCXXRecordDecl()
8831                             : nullptr;
8832         if (RD && RD->isLambda())
8833           LambdasMap.try_emplace(std::get<0>(L), C);
8834       }
8835     }
8836   }
8837 
8838   /// Constructor for the declare mapper directive.
MappableExprsHandler(const OMPDeclareMapperDecl & Dir,CodeGenFunction & CGF)8839   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8840       : CurDir(&Dir), CGF(CGF) {}
8841 
8842   /// Generate code for the combined entry if we have a partially mapped struct
8843   /// and take care of the mapping flags of the arguments corresponding to
8844   /// individual struct members.
emitCombinedEntry(MapCombinedInfoTy & CombinedInfo,MapFlagsArrayTy & CurTypes,const StructRangeInfoTy & PartialStruct,const ValueDecl * VD=nullptr,bool NotTargetParams=true) const8845   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8846                          MapFlagsArrayTy &CurTypes,
8847                          const StructRangeInfoTy &PartialStruct,
8848                          const ValueDecl *VD = nullptr,
8849                          bool NotTargetParams = true) const {
8850     if (CurTypes.size() == 1 &&
8851         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8852         !PartialStruct.IsArraySection)
8853       return;
8854     Address LBAddr = PartialStruct.LowestElem.second;
8855     Address HBAddr = PartialStruct.HighestElem.second;
8856     if (PartialStruct.HasCompleteRecord) {
8857       LBAddr = PartialStruct.LB;
8858       HBAddr = PartialStruct.LB;
8859     }
8860     CombinedInfo.Exprs.push_back(VD);
8861     // Base is the base of the struct
8862     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8863     // Pointer is the address of the lowest element
8864     llvm::Value *LB = LBAddr.getPointer();
8865     CombinedInfo.Pointers.push_back(LB);
8866     // There should not be a mapper for a combined entry.
8867     CombinedInfo.Mappers.push_back(nullptr);
8868     // Size is (addr of {highest+1} element) - (addr of lowest element)
8869     llvm::Value *HB = HBAddr.getPointer();
8870     llvm::Value *HAddr =
8871         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8872     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8873     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8874     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8875     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8876                                                   /*isSigned=*/false);
8877     CombinedInfo.Sizes.push_back(Size);
8878     // Map type is always TARGET_PARAM, if generate info for captures.
8879     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8880                                                  : OMP_MAP_TARGET_PARAM);
8881     // If any element has the present modifier, then make sure the runtime
8882     // doesn't attempt to allocate the struct.
8883     if (CurTypes.end() !=
8884         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8885           return Type & OMP_MAP_PRESENT;
8886         }))
8887       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8888     // Remove TARGET_PARAM flag from the first element
8889     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8890     // If any element has the ompx_hold modifier, then make sure the runtime
8891     // uses the hold reference count for the struct as a whole so that it won't
8892     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8893     // elements as well so the runtime knows which reference count to check
8894     // when determining whether it's time for device-to-host transfers of
8895     // individual elements.
8896     if (CurTypes.end() !=
8897         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8898           return Type & OMP_MAP_OMPX_HOLD;
8899         })) {
8900       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8901       for (auto &M : CurTypes)
8902         M |= OMP_MAP_OMPX_HOLD;
8903     }
8904 
8905     // All other current entries will be MEMBER_OF the combined entry
8906     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8907     // 0xFFFF in the MEMBER_OF field).
8908     OpenMPOffloadMappingFlags MemberOfFlag =
8909         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8910     for (auto &M : CurTypes)
8911       setCorrectMemberOfFlag(M, MemberOfFlag);
8912   }
8913 
8914   /// Generate all the base pointers, section pointers, sizes, map types, and
8915   /// mappers for the extracted mappable expressions (all included in \a
8916   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8917   /// pair of the relevant declaration and index where it occurs is appended to
8918   /// the device pointers info array.
generateAllInfo(MapCombinedInfoTy & CombinedInfo,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkipVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ()) const8919   void generateAllInfo(
8920       MapCombinedInfoTy &CombinedInfo,
8921       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8922           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8923     assert(CurDir.is<const OMPExecutableDirective *>() &&
8924            "Expect a executable directive");
8925     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8926     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8927   }
8928 
8929   /// Generate all the base pointers, section pointers, sizes, map types, and
8930   /// mappers for the extracted map clauses of user-defined mapper (all included
8931   /// in \a CombinedInfo).
generateAllInfoForMapper(MapCombinedInfoTy & CombinedInfo) const8932   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8933     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8934            "Expect a declare mapper directive");
8935     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8936     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8937   }
8938 
8939   /// Emit capture info for lambdas for variables captured by reference.
generateInfoForLambdaCaptures(const ValueDecl * VD,llvm::Value * Arg,MapCombinedInfoTy & CombinedInfo,llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers) const8940   void generateInfoForLambdaCaptures(
8941       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8942       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8943     QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8944     const auto *RD = VDType->getAsCXXRecordDecl();
8945     if (!RD || !RD->isLambda())
8946       return;
8947     Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8948                    CGF.getContext().getDeclAlign(VD));
8949     LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8950     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8951     FieldDecl *ThisCapture = nullptr;
8952     RD->getCaptureFields(Captures, ThisCapture);
8953     if (ThisCapture) {
8954       LValue ThisLVal =
8955           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8956       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8957       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8958                                  VDLVal.getPointer(CGF));
8959       CombinedInfo.Exprs.push_back(VD);
8960       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8961       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8962       CombinedInfo.Sizes.push_back(
8963           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8964                                     CGF.Int64Ty, /*isSigned=*/true));
8965       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8966                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8967       CombinedInfo.Mappers.push_back(nullptr);
8968     }
8969     for (const LambdaCapture &LC : RD->captures()) {
8970       if (!LC.capturesVariable())
8971         continue;
8972       const VarDecl *VD = LC.getCapturedVar();
8973       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8974         continue;
8975       auto It = Captures.find(VD);
8976       assert(It != Captures.end() && "Found lambda capture without field.");
8977       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8978       if (LC.getCaptureKind() == LCK_ByRef) {
8979         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8980         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8981                                    VDLVal.getPointer(CGF));
8982         CombinedInfo.Exprs.push_back(VD);
8983         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8984         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8985         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8986             CGF.getTypeSize(
8987                 VD->getType().getCanonicalType().getNonReferenceType()),
8988             CGF.Int64Ty, /*isSigned=*/true));
8989       } else {
8990         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8991         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8992                                    VDLVal.getPointer(CGF));
8993         CombinedInfo.Exprs.push_back(VD);
8994         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8995         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8996         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8997       }
8998       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8999                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9000       CombinedInfo.Mappers.push_back(nullptr);
9001     }
9002   }
9003 
9004   /// Set correct indices for lambdas captures.
adjustMemberOfForLambdaCaptures(const llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapFlagsArrayTy & Types) const9005   void adjustMemberOfForLambdaCaptures(
9006       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9007       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9008       MapFlagsArrayTy &Types) const {
9009     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9010       // Set correct member_of idx for all implicit lambda captures.
9011       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9012                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9013         continue;
9014       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9015       assert(BasePtr && "Unable to find base lambda address.");
9016       int TgtIdx = -1;
9017       for (unsigned J = I; J > 0; --J) {
9018         unsigned Idx = J - 1;
9019         if (Pointers[Idx] != BasePtr)
9020           continue;
9021         TgtIdx = Idx;
9022         break;
9023       }
9024       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9025       // All other current entries will be MEMBER_OF the combined entry
9026       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9027       // 0xFFFF in the MEMBER_OF field).
9028       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9029       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9030     }
9031   }
9032 
9033   /// Generate the base pointers, section pointers, sizes, map types, and
9034   /// mappers associated to a given capture (all included in \a CombinedInfo).
generateInfoForCapture(const CapturedStmt::Capture * Cap,llvm::Value * Arg,MapCombinedInfoTy & CombinedInfo,StructRangeInfoTy & PartialStruct) const9035   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9036                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9037                               StructRangeInfoTy &PartialStruct) const {
9038     assert(!Cap->capturesVariableArrayType() &&
9039            "Not expecting to generate map info for a variable array type!");
9040 
9041     // We need to know when we generating information for the first component
9042     const ValueDecl *VD = Cap->capturesThis()
9043                               ? nullptr
9044                               : Cap->getCapturedVar()->getCanonicalDecl();
9045 
9046     // for map(to: lambda): skip here, processing it in
9047     // generateDefaultMapInfo
9048     if (LambdasMap.count(VD))
9049       return;
9050 
9051     // If this declaration appears in a is_device_ptr clause we just have to
9052     // pass the pointer by value. If it is a reference to a declaration, we just
9053     // pass its value.
9054     if (DevPointersMap.count(VD)) {
9055       CombinedInfo.Exprs.push_back(VD);
9056       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9057       CombinedInfo.Pointers.push_back(Arg);
9058       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9059           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9060           /*isSigned=*/true));
9061       CombinedInfo.Types.push_back(
9062           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9063           OMP_MAP_TARGET_PARAM);
9064       CombinedInfo.Mappers.push_back(nullptr);
9065       return;
9066     }
9067 
9068     using MapData =
9069         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9070                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9071                    const ValueDecl *, const Expr *>;
9072     SmallVector<MapData, 4> DeclComponentLists;
9073     assert(CurDir.is<const OMPExecutableDirective *>() &&
9074            "Expect a executable directive");
9075     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9076     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9077       const auto *EI = C->getVarRefs().begin();
9078       for (const auto L : C->decl_component_lists(VD)) {
9079         const ValueDecl *VDecl, *Mapper;
9080         // The Expression is not correct if the mapping is implicit
9081         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9082         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9083         std::tie(VDecl, Components, Mapper) = L;
9084         assert(VDecl == VD && "We got information for the wrong declaration??");
9085         assert(!Components.empty() &&
9086                "Not expecting declaration with no component lists.");
9087         DeclComponentLists.emplace_back(Components, C->getMapType(),
9088                                         C->getMapTypeModifiers(),
9089                                         C->isImplicit(), Mapper, E);
9090         ++EI;
9091       }
9092     }
9093     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9094                                              const MapData &RHS) {
9095       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9096       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9097       bool HasPresent =
9098           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9099       bool HasAllocs = MapType == OMPC_MAP_alloc;
9100       MapModifiers = std::get<2>(RHS);
9101       MapType = std::get<1>(LHS);
9102       bool HasPresentR =
9103           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9104       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9105       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9106     });
9107 
9108     // Find overlapping elements (including the offset from the base element).
9109     llvm::SmallDenseMap<
9110         const MapData *,
9111         llvm::SmallVector<
9112             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9113         4>
9114         OverlappedData;
9115     size_t Count = 0;
9116     for (const MapData &L : DeclComponentLists) {
9117       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9118       OpenMPMapClauseKind MapType;
9119       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9120       bool IsImplicit;
9121       const ValueDecl *Mapper;
9122       const Expr *VarRef;
9123       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9124           L;
9125       ++Count;
9126       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9127         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9128         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9129                  VarRef) = L1;
9130         auto CI = Components.rbegin();
9131         auto CE = Components.rend();
9132         auto SI = Components1.rbegin();
9133         auto SE = Components1.rend();
9134         for (; CI != CE && SI != SE; ++CI, ++SI) {
9135           if (CI->getAssociatedExpression()->getStmtClass() !=
9136               SI->getAssociatedExpression()->getStmtClass())
9137             break;
9138           // Are we dealing with different variables/fields?
9139           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9140             break;
9141         }
9142         // Found overlapping if, at least for one component, reached the head
9143         // of the components list.
9144         if (CI == CE || SI == SE) {
9145           // Ignore it if it is the same component.
9146           if (CI == CE && SI == SE)
9147             continue;
9148           const auto It = (SI == SE) ? CI : SI;
9149           // If one component is a pointer and another one is a kind of
9150           // dereference of this pointer (array subscript, section, dereference,
9151           // etc.), it is not an overlapping.
9152           // Same, if one component is a base and another component is a
9153           // dereferenced pointer memberexpr with the same base.
9154           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9155               (std::prev(It)->getAssociatedDeclaration() &&
9156                std::prev(It)
9157                    ->getAssociatedDeclaration()
9158                    ->getType()
9159                    ->isPointerType()) ||
9160               (It->getAssociatedDeclaration() &&
9161                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9162                std::next(It) != CE && std::next(It) != SE))
9163             continue;
9164           const MapData &BaseData = CI == CE ? L : L1;
9165           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9166               SI == SE ? Components : Components1;
9167           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9168           OverlappedElements.getSecond().push_back(SubData);
9169         }
9170       }
9171     }
9172     // Sort the overlapped elements for each item.
9173     llvm::SmallVector<const FieldDecl *, 4> Layout;
9174     if (!OverlappedData.empty()) {
9175       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9176       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9177       while (BaseType != OrigType) {
9178         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9179         OrigType = BaseType->getPointeeOrArrayElementType();
9180       }
9181 
9182       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9183         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9184       else {
9185         const auto *RD = BaseType->getAsRecordDecl();
9186         Layout.append(RD->field_begin(), RD->field_end());
9187       }
9188     }
9189     for (auto &Pair : OverlappedData) {
9190       llvm::stable_sort(
9191           Pair.getSecond(),
9192           [&Layout](
9193               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9194               OMPClauseMappableExprCommon::MappableExprComponentListRef
9195                   Second) {
9196             auto CI = First.rbegin();
9197             auto CE = First.rend();
9198             auto SI = Second.rbegin();
9199             auto SE = Second.rend();
9200             for (; CI != CE && SI != SE; ++CI, ++SI) {
9201               if (CI->getAssociatedExpression()->getStmtClass() !=
9202                   SI->getAssociatedExpression()->getStmtClass())
9203                 break;
9204               // Are we dealing with different variables/fields?
9205               if (CI->getAssociatedDeclaration() !=
9206                   SI->getAssociatedDeclaration())
9207                 break;
9208             }
9209 
9210             // Lists contain the same elements.
9211             if (CI == CE && SI == SE)
9212               return false;
9213 
9214             // List with less elements is less than list with more elements.
9215             if (CI == CE || SI == SE)
9216               return CI == CE;
9217 
9218             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9219             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9220             if (FD1->getParent() == FD2->getParent())
9221               return FD1->getFieldIndex() < FD2->getFieldIndex();
9222             const auto *It =
9223                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9224                   return FD == FD1 || FD == FD2;
9225                 });
9226             return *It == FD1;
9227           });
9228     }
9229 
9230     // Associated with a capture, because the mapping flags depend on it.
9231     // Go through all of the elements with the overlapped elements.
9232     bool IsFirstComponentList = true;
9233     for (const auto &Pair : OverlappedData) {
9234       const MapData &L = *Pair.getFirst();
9235       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9236       OpenMPMapClauseKind MapType;
9237       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9238       bool IsImplicit;
9239       const ValueDecl *Mapper;
9240       const Expr *VarRef;
9241       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9242           L;
9243       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9244           OverlappedComponents = Pair.getSecond();
9245       generateInfoForComponentList(
9246           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9247           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9248           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9249       IsFirstComponentList = false;
9250     }
9251     // Go through other elements without overlapped elements.
9252     for (const MapData &L : DeclComponentLists) {
9253       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9254       OpenMPMapClauseKind MapType;
9255       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9256       bool IsImplicit;
9257       const ValueDecl *Mapper;
9258       const Expr *VarRef;
9259       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9260           L;
9261       auto It = OverlappedData.find(&L);
9262       if (It == OverlappedData.end())
9263         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9264                                      Components, CombinedInfo, PartialStruct,
9265                                      IsFirstComponentList, IsImplicit, Mapper,
9266                                      /*ForDeviceAddr=*/false, VD, VarRef);
9267       IsFirstComponentList = false;
9268     }
9269   }
9270 
9271   /// Generate the default map information for a given capture \a CI,
9272   /// record field declaration \a RI and captured value \a CV.
generateDefaultMapInfo(const CapturedStmt::Capture & CI,const FieldDecl & RI,llvm::Value * CV,MapCombinedInfoTy & CombinedInfo) const9273   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9274                               const FieldDecl &RI, llvm::Value *CV,
9275                               MapCombinedInfoTy &CombinedInfo) const {
9276     bool IsImplicit = true;
9277     // Do the default mapping.
9278     if (CI.capturesThis()) {
9279       CombinedInfo.Exprs.push_back(nullptr);
9280       CombinedInfo.BasePointers.push_back(CV);
9281       CombinedInfo.Pointers.push_back(CV);
9282       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9283       CombinedInfo.Sizes.push_back(
9284           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9285                                     CGF.Int64Ty, /*isSigned=*/true));
9286       // Default map type.
9287       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9288     } else if (CI.capturesVariableByCopy()) {
9289       const VarDecl *VD = CI.getCapturedVar();
9290       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9291       CombinedInfo.BasePointers.push_back(CV);
9292       CombinedInfo.Pointers.push_back(CV);
9293       if (!RI.getType()->isAnyPointerType()) {
9294         // We have to signal to the runtime captures passed by value that are
9295         // not pointers.
9296         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9297         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9298             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9299       } else {
9300         // Pointers are implicitly mapped with a zero size and no flags
9301         // (other than first map that is added for all implicit maps).
9302         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9303         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9304       }
9305       auto I = FirstPrivateDecls.find(VD);
9306       if (I != FirstPrivateDecls.end())
9307         IsImplicit = I->getSecond();
9308     } else {
9309       assert(CI.capturesVariable() && "Expected captured reference.");
9310       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9311       QualType ElementType = PtrTy->getPointeeType();
9312       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9313           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9314       // The default map type for a scalar/complex type is 'to' because by
9315       // default the value doesn't have to be retrieved. For an aggregate
9316       // type, the default is 'tofrom'.
9317       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9318       const VarDecl *VD = CI.getCapturedVar();
9319       auto I = FirstPrivateDecls.find(VD);
9320       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9321       CombinedInfo.BasePointers.push_back(CV);
9322       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9323         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9324             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9325             AlignmentSource::Decl));
9326         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9327       } else {
9328         CombinedInfo.Pointers.push_back(CV);
9329       }
9330       if (I != FirstPrivateDecls.end())
9331         IsImplicit = I->getSecond();
9332     }
9333     // Every default map produces a single argument which is a target parameter.
9334     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9335 
9336     // Add flag stating this is an implicit map.
9337     if (IsImplicit)
9338       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9339 
9340     // No user-defined mapper for default mapping.
9341     CombinedInfo.Mappers.push_back(nullptr);
9342   }
9343 };
9344 } // anonymous namespace
9345 
emitNonContiguousDescriptor(CodeGenFunction & CGF,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo,CGOpenMPRuntime::TargetDataInfo & Info)9346 static void emitNonContiguousDescriptor(
9347     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9348     CGOpenMPRuntime::TargetDataInfo &Info) {
9349   CodeGenModule &CGM = CGF.CGM;
9350   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9351       &NonContigInfo = CombinedInfo.NonContigInfo;
9352 
9353   // Build an array of struct descriptor_dim and then assign it to
9354   // offload_args.
9355   //
9356   // struct descriptor_dim {
9357   //  uint64_t offset;
9358   //  uint64_t count;
9359   //  uint64_t stride
9360   // };
9361   ASTContext &C = CGF.getContext();
9362   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9363   RecordDecl *RD;
9364   RD = C.buildImplicitRecord("descriptor_dim");
9365   RD->startDefinition();
9366   addFieldToRecordDecl(C, RD, Int64Ty);
9367   addFieldToRecordDecl(C, RD, Int64Ty);
9368   addFieldToRecordDecl(C, RD, Int64Ty);
9369   RD->completeDefinition();
9370   QualType DimTy = C.getRecordType(RD);
9371 
9372   enum { OffsetFD = 0, CountFD, StrideFD };
9373   // We need two index variable here since the size of "Dims" is the same as the
9374   // size of Components, however, the size of offset, count, and stride is equal
9375   // to the size of base declaration that is non-contiguous.
9376   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9377     // Skip emitting ir if dimension size is 1 since it cannot be
9378     // non-contiguous.
9379     if (NonContigInfo.Dims[I] == 1)
9380       continue;
9381     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9382     QualType ArrayTy =
9383         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9384     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9385     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9386       unsigned RevIdx = EE - II - 1;
9387       LValue DimsLVal = CGF.MakeAddrLValue(
9388           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9389       // Offset
9390       LValue OffsetLVal = CGF.EmitLValueForField(
9391           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9392       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9393       // Count
9394       LValue CountLVal = CGF.EmitLValueForField(
9395           DimsLVal, *std::next(RD->field_begin(), CountFD));
9396       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9397       // Stride
9398       LValue StrideLVal = CGF.EmitLValueForField(
9399           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9400       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9401     }
9402     // args[I] = &dims
9403     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9404         DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
9405     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9406         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9407         Info.PointersArray, 0, I);
9408     Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign());
9409     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9410     ++L;
9411   }
9412 }
9413 
9414 // Try to extract the base declaration from a `this->x` expression if possible.
getDeclFromThisExpr(const Expr * E)9415 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9416   if (!E)
9417     return nullptr;
9418 
9419   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9420     if (const MemberExpr *ME =
9421             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9422       return ME->getMemberDecl();
9423   return nullptr;
9424 }
9425 
9426 /// Emit a string constant containing the names of the values mapped to the
9427 /// offloading runtime library.
9428 llvm::Constant *
emitMappingInformation(CodeGenFunction & CGF,llvm::OpenMPIRBuilder & OMPBuilder,MappableExprsHandler::MappingExprInfo & MapExprs)9429 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9430                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9431 
9432   uint32_t SrcLocStrSize;
9433   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9434     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9435 
9436   SourceLocation Loc;
9437   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9438     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9439       Loc = VD->getLocation();
9440     else
9441       Loc = MapExprs.getMapExpr()->getExprLoc();
9442   } else {
9443     Loc = MapExprs.getMapDecl()->getLocation();
9444   }
9445 
9446   std::string ExprName;
9447   if (MapExprs.getMapExpr()) {
9448     PrintingPolicy P(CGF.getContext().getLangOpts());
9449     llvm::raw_string_ostream OS(ExprName);
9450     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9451     OS.flush();
9452   } else {
9453     ExprName = MapExprs.getMapDecl()->getNameAsString();
9454   }
9455 
9456   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9457   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9458                                          PLoc.getLine(), PLoc.getColumn(),
9459                                          SrcLocStrSize);
9460 }
9461 
9462 /// Emit the arrays used to pass the captures and map information to the
9463 /// offloading runtime library. If there is no map or capture information,
9464 /// return nullptr by reference.
emitOffloadingArrays(CodeGenFunction & CGF,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo,CGOpenMPRuntime::TargetDataInfo & Info,llvm::OpenMPIRBuilder & OMPBuilder,bool IsNonContiguous=false)9465 static void emitOffloadingArrays(
9466     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9467     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9468     bool IsNonContiguous = false) {
9469   CodeGenModule &CGM = CGF.CGM;
9470   ASTContext &Ctx = CGF.getContext();
9471 
9472   // Reset the array information.
9473   Info.clearArrayInfo();
9474   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9475 
9476   if (Info.NumberOfPtrs) {
9477     // Detect if we have any capture size requiring runtime evaluation of the
9478     // size so that a constant array could be eventually used.
9479 
9480     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9481     QualType PointerArrayType = Ctx.getConstantArrayType(
9482         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9483         /*IndexTypeQuals=*/0);
9484 
9485     Info.BasePointersArray =
9486         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9487     Info.PointersArray =
9488         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9489     Address MappersArray =
9490         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9491     Info.MappersArray = MappersArray.getPointer();
9492 
9493     // If we don't have any VLA types or other types that require runtime
9494     // evaluation, we can use a constant array for the map sizes, otherwise we
9495     // need to fill up the arrays as we do for the pointers.
9496     QualType Int64Ty =
9497         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9498     SmallVector<llvm::Constant *> ConstSizes(
9499         CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9500     llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9501     for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9502       if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9503         if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9504           if (IsNonContiguous && (CombinedInfo.Types[I] &
9505                                   MappableExprsHandler::OMP_MAP_NON_CONTIG))
9506             ConstSizes[I] = llvm::ConstantInt::get(
9507                 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9508           else
9509             ConstSizes[I] = CI;
9510           continue;
9511         }
9512       }
9513       RuntimeSizes.set(I);
9514     }
9515 
9516     if (RuntimeSizes.all()) {
9517       QualType SizeArrayType = Ctx.getConstantArrayType(
9518           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9519           /*IndexTypeQuals=*/0);
9520       Info.SizesArray =
9521           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9522     } else {
9523       auto *SizesArrayInit = llvm::ConstantArray::get(
9524           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9525       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9526       auto *SizesArrayGbl = new llvm::GlobalVariable(
9527           CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9528           llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9529       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9530       if (RuntimeSizes.any()) {
9531         QualType SizeArrayType = Ctx.getConstantArrayType(
9532             Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9533             /*IndexTypeQuals=*/0);
9534         Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9535         llvm::Value *GblConstPtr =
9536             CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9537                 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9538         CGF.Builder.CreateMemCpy(
9539             Buffer,
9540             Address(GblConstPtr, CGM.Int64Ty,
9541                     CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9542                         /*DestWidth=*/64, /*Signed=*/false))),
9543             CGF.getTypeSize(SizeArrayType));
9544         Info.SizesArray = Buffer.getPointer();
9545       } else {
9546         Info.SizesArray = SizesArrayGbl;
9547       }
9548     }
9549 
9550     // The map types are always constant so we don't need to generate code to
9551     // fill arrays. Instead, we create an array constant.
9552     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9553     llvm::copy(CombinedInfo.Types, Mapping.begin());
9554     std::string MaptypesName =
9555         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9556     auto *MapTypesArrayGbl =
9557         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9558     Info.MapTypesArray = MapTypesArrayGbl;
9559 
9560     // The information types are only built if there is debug information
9561     // requested.
9562     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9563       Info.MapNamesArray = llvm::Constant::getNullValue(
9564           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9565     } else {
9566       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9567         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9568       };
9569       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9570       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9571       std::string MapnamesName =
9572           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9573       auto *MapNamesArrayGbl =
9574           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9575       Info.MapNamesArray = MapNamesArrayGbl;
9576     }
9577 
9578     // If there's a present map type modifier, it must not be applied to the end
9579     // of a region, so generate a separate map type array in that case.
9580     if (Info.separateBeginEndCalls()) {
9581       bool EndMapTypesDiffer = false;
9582       for (uint64_t &Type : Mapping) {
9583         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9584           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9585           EndMapTypesDiffer = true;
9586         }
9587       }
9588       if (EndMapTypesDiffer) {
9589         MapTypesArrayGbl =
9590             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9591         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9592       }
9593     }
9594 
9595     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9596       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9597       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9598           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9599           Info.BasePointersArray, 0, I);
9600       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9601           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9602       Address BPAddr(BP, BPVal->getType(),
9603                      Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9604       CGF.Builder.CreateStore(BPVal, BPAddr);
9605 
9606       if (Info.requiresDevicePointerInfo())
9607         if (const ValueDecl *DevVD =
9608                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9609           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9610 
9611       llvm::Value *PVal = CombinedInfo.Pointers[I];
9612       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9613           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9614           Info.PointersArray, 0, I);
9615       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9616           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9617       Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9618       CGF.Builder.CreateStore(PVal, PAddr);
9619 
9620       if (RuntimeSizes.test(I)) {
9621         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9622             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9623             Info.SizesArray,
9624             /*Idx0=*/0,
9625             /*Idx1=*/I);
9626         Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty));
9627         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9628                                                           CGM.Int64Ty,
9629                                                           /*isSigned=*/true),
9630                                 SAddr);
9631       }
9632 
9633       // Fill up the mapper array.
9634       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9635       if (CombinedInfo.Mappers[I]) {
9636         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9637             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9638         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9639         Info.HasMapper = true;
9640       }
9641       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9642       CGF.Builder.CreateStore(MFunc, MAddr);
9643     }
9644   }
9645 
9646   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9647       Info.NumberOfPtrs == 0)
9648     return;
9649 
9650   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9651 }
9652 
9653 namespace {
9654 /// Additional arguments for emitOffloadingArraysArgument function.
9655 struct ArgumentsOptions {
9656   bool ForEndCall = false;
9657   ArgumentsOptions() = default;
ArgumentsOptions__anond842882b3911::ArgumentsOptions9658   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9659 };
9660 } // namespace
9661 
9662 /// Emit the arguments to be passed to the runtime library based on the
9663 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9664 /// ForEndCall, emit map types to be passed for the end of the region instead of
9665 /// the beginning.
emitOffloadingArraysArgument(CodeGenFunction & CGF,llvm::Value * & BasePointersArrayArg,llvm::Value * & PointersArrayArg,llvm::Value * & SizesArrayArg,llvm::Value * & MapTypesArrayArg,llvm::Value * & MapNamesArrayArg,llvm::Value * & MappersArrayArg,CGOpenMPRuntime::TargetDataInfo & Info,const ArgumentsOptions & Options=ArgumentsOptions ())9666 static void emitOffloadingArraysArgument(
9667     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9668     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9669     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9670     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9671     const ArgumentsOptions &Options = ArgumentsOptions()) {
9672   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9673          "expected region end call to runtime only when end call is separate");
9674   CodeGenModule &CGM = CGF.CGM;
9675   if (Info.NumberOfPtrs) {
9676     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9677         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9678         Info.BasePointersArray,
9679         /*Idx0=*/0, /*Idx1=*/0);
9680     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9681         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9682         Info.PointersArray,
9683         /*Idx0=*/0,
9684         /*Idx1=*/0);
9685     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9686         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9687         /*Idx0=*/0, /*Idx1=*/0);
9688     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9689         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9690         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9691                                                     : Info.MapTypesArray,
9692         /*Idx0=*/0,
9693         /*Idx1=*/0);
9694 
9695     // Only emit the mapper information arrays if debug information is
9696     // requested.
9697     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9698       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9699     else
9700       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9701           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9702           Info.MapNamesArray,
9703           /*Idx0=*/0,
9704           /*Idx1=*/0);
9705     // If there is no user-defined mapper, set the mapper array to nullptr to
9706     // avoid an unnecessary data privatization
9707     if (!Info.HasMapper)
9708       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9709     else
9710       MappersArrayArg =
9711           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9712   } else {
9713     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9714     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9715     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9716     MapTypesArrayArg =
9717         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9718     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9719     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9720   }
9721 }
9722 
9723 /// Check for inner distribute directive.
9724 static const OMPExecutableDirective *
getNestedDistributeDirective(ASTContext & Ctx,const OMPExecutableDirective & D)9725 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9726   const auto *CS = D.getInnermostCapturedStmt();
9727   const auto *Body =
9728       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9729   const Stmt *ChildStmt =
9730       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9731 
9732   if (const auto *NestedDir =
9733           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9734     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9735     switch (D.getDirectiveKind()) {
9736     case OMPD_target:
9737       if (isOpenMPDistributeDirective(DKind))
9738         return NestedDir;
9739       if (DKind == OMPD_teams) {
9740         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9741             /*IgnoreCaptured=*/true);
9742         if (!Body)
9743           return nullptr;
9744         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9745         if (const auto *NND =
9746                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9747           DKind = NND->getDirectiveKind();
9748           if (isOpenMPDistributeDirective(DKind))
9749             return NND;
9750         }
9751       }
9752       return nullptr;
9753     case OMPD_target_teams:
9754       if (isOpenMPDistributeDirective(DKind))
9755         return NestedDir;
9756       return nullptr;
9757     case OMPD_target_parallel:
9758     case OMPD_target_simd:
9759     case OMPD_target_parallel_for:
9760     case OMPD_target_parallel_for_simd:
9761       return nullptr;
9762     case OMPD_target_teams_distribute:
9763     case OMPD_target_teams_distribute_simd:
9764     case OMPD_target_teams_distribute_parallel_for:
9765     case OMPD_target_teams_distribute_parallel_for_simd:
9766     case OMPD_parallel:
9767     case OMPD_for:
9768     case OMPD_parallel_for:
9769     case OMPD_parallel_master:
9770     case OMPD_parallel_sections:
9771     case OMPD_for_simd:
9772     case OMPD_parallel_for_simd:
9773     case OMPD_cancel:
9774     case OMPD_cancellation_point:
9775     case OMPD_ordered:
9776     case OMPD_threadprivate:
9777     case OMPD_allocate:
9778     case OMPD_task:
9779     case OMPD_simd:
9780     case OMPD_tile:
9781     case OMPD_unroll:
9782     case OMPD_sections:
9783     case OMPD_section:
9784     case OMPD_single:
9785     case OMPD_master:
9786     case OMPD_critical:
9787     case OMPD_taskyield:
9788     case OMPD_barrier:
9789     case OMPD_taskwait:
9790     case OMPD_taskgroup:
9791     case OMPD_atomic:
9792     case OMPD_flush:
9793     case OMPD_depobj:
9794     case OMPD_scan:
9795     case OMPD_teams:
9796     case OMPD_target_data:
9797     case OMPD_target_exit_data:
9798     case OMPD_target_enter_data:
9799     case OMPD_distribute:
9800     case OMPD_distribute_simd:
9801     case OMPD_distribute_parallel_for:
9802     case OMPD_distribute_parallel_for_simd:
9803     case OMPD_teams_distribute:
9804     case OMPD_teams_distribute_simd:
9805     case OMPD_teams_distribute_parallel_for:
9806     case OMPD_teams_distribute_parallel_for_simd:
9807     case OMPD_target_update:
9808     case OMPD_declare_simd:
9809     case OMPD_declare_variant:
9810     case OMPD_begin_declare_variant:
9811     case OMPD_end_declare_variant:
9812     case OMPD_declare_target:
9813     case OMPD_end_declare_target:
9814     case OMPD_declare_reduction:
9815     case OMPD_declare_mapper:
9816     case OMPD_taskloop:
9817     case OMPD_taskloop_simd:
9818     case OMPD_master_taskloop:
9819     case OMPD_master_taskloop_simd:
9820     case OMPD_parallel_master_taskloop:
9821     case OMPD_parallel_master_taskloop_simd:
9822     case OMPD_requires:
9823     case OMPD_metadirective:
9824     case OMPD_unknown:
9825     default:
9826       llvm_unreachable("Unexpected directive.");
9827     }
9828   }
9829 
9830   return nullptr;
9831 }
9832 
9833 /// Emit the user-defined mapper function. The code generation follows the
9834 /// pattern in the example below.
9835 /// \code
9836 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9837 ///                                           void *base, void *begin,
9838 ///                                           int64_t size, int64_t type,
9839 ///                                           void *name = nullptr) {
9840 ///   // Allocate space for an array section first or add a base/begin for
9841 ///   // pointer dereference.
9842 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9843 ///       !maptype.IsDelete)
9844 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9845 ///                                 size*sizeof(Ty), clearToFromMember(type));
9846 ///   // Map members.
9847 ///   for (unsigned i = 0; i < size; i++) {
9848 ///     // For each component specified by this mapper:
9849 ///     for (auto c : begin[i]->all_components) {
9850 ///       if (c.hasMapper())
9851 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9852 ///                       c.arg_type, c.arg_name);
9853 ///       else
9854 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9855 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9856 ///                                     c.arg_name);
9857 ///     }
9858 ///   }
9859 ///   // Delete the array section.
9860 ///   if (size > 1 && maptype.IsDelete)
9861 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9862 ///                                 size*sizeof(Ty), clearToFromMember(type));
9863 /// }
9864 /// \endcode
emitUserDefinedMapper(const OMPDeclareMapperDecl * D,CodeGenFunction * CGF)9865 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9866                                             CodeGenFunction *CGF) {
9867   if (UDMMap.count(D) > 0)
9868     return;
9869   ASTContext &C = CGM.getContext();
9870   QualType Ty = D->getType();
9871   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9872   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9873   auto *MapperVarDecl =
9874       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9875   SourceLocation Loc = D->getLocation();
9876   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9877   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9878 
9879   // Prepare mapper function arguments and attributes.
9880   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9881                               C.VoidPtrTy, ImplicitParamDecl::Other);
9882   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9883                             ImplicitParamDecl::Other);
9884   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9885                              C.VoidPtrTy, ImplicitParamDecl::Other);
9886   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9887                             ImplicitParamDecl::Other);
9888   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9889                             ImplicitParamDecl::Other);
9890   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9891                             ImplicitParamDecl::Other);
9892   FunctionArgList Args;
9893   Args.push_back(&HandleArg);
9894   Args.push_back(&BaseArg);
9895   Args.push_back(&BeginArg);
9896   Args.push_back(&SizeArg);
9897   Args.push_back(&TypeArg);
9898   Args.push_back(&NameArg);
9899   const CGFunctionInfo &FnInfo =
9900       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9901   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9902   SmallString<64> TyStr;
9903   llvm::raw_svector_ostream Out(TyStr);
9904   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9905   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9906   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9907                                     Name, &CGM.getModule());
9908   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9909   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9910   // Start the mapper function code generation.
9911   CodeGenFunction MapperCGF(CGM);
9912   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9913   // Compute the starting and end addresses of array elements.
9914   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9915       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9916       C.getPointerType(Int64Ty), Loc);
9917   // Prepare common arguments for array initiation and deletion.
9918   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9919       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9920       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9921   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9922       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9923       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9924   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9925       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9926       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9927   // Convert the size in bytes into the number of array elements.
9928   Size = MapperCGF.Builder.CreateExactUDiv(
9929       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9930   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9931       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9932   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9933   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9934       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9935       C.getPointerType(Int64Ty), Loc);
9936   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9937       MapperCGF.GetAddrOfLocalVar(&NameArg),
9938       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9939 
9940   // Emit array initiation if this is an array section and \p MapType indicates
9941   // that memory allocation is required.
9942   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9943   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9944                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9945 
9946   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9947 
9948   // Emit the loop header block.
9949   MapperCGF.EmitBlock(HeadBB);
9950   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9951   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9952   // Evaluate whether the initial condition is satisfied.
9953   llvm::Value *IsEmpty =
9954       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9955   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9956   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9957 
9958   // Emit the loop body block.
9959   MapperCGF.EmitBlock(BodyBB);
9960   llvm::BasicBlock *LastBB = BodyBB;
9961   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9962       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9963   PtrPHI->addIncoming(PtrBegin, EntryBB);
9964   Address PtrCurrent(PtrPHI, ElemTy,
9965                      MapperCGF.GetAddrOfLocalVar(&BeginArg)
9966                          .getAlignment()
9967                          .alignmentOfArrayElement(ElementSize));
9968   // Privatize the declared variable of mapper to be the current array element.
9969   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9970   Scope.addPrivate(MapperVarDecl, PtrCurrent);
9971   (void)Scope.Privatize();
9972 
9973   // Get map clause information. Fill up the arrays with all mapped variables.
9974   MappableExprsHandler::MapCombinedInfoTy Info;
9975   MappableExprsHandler MEHandler(*D, MapperCGF);
9976   MEHandler.generateAllInfoForMapper(Info);
9977 
9978   // Call the runtime API __tgt_mapper_num_components to get the number of
9979   // pre-existing components.
9980   llvm::Value *OffloadingArgs[] = {Handle};
9981   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9982       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9983                                             OMPRTL___tgt_mapper_num_components),
9984       OffloadingArgs);
9985   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9986       PreviousSize,
9987       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9988 
9989   // Fill up the runtime mapper handle for all components.
9990   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9991     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9992         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9993     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9994         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9995     llvm::Value *CurSizeArg = Info.Sizes[I];
9996     llvm::Value *CurNameArg =
9997         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9998             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9999             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10000 
10001     // Extract the MEMBER_OF field from the map type.
10002     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10003     llvm::Value *MemberMapType =
10004         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10005 
10006     // Combine the map type inherited from user-defined mapper with that
10007     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10008     // bits of the \a MapType, which is the input argument of the mapper
10009     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10010     // bits of MemberMapType.
10011     // [OpenMP 5.0], 1.2.6. map-type decay.
10012     //        | alloc |  to   | from  | tofrom | release | delete
10013     // ----------------------------------------------------------
10014     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10015     // to     | alloc |  to   | alloc |   to   | release | delete
10016     // from   | alloc | alloc | from  |  from  | release | delete
10017     // tofrom | alloc |  to   | from  | tofrom | release | delete
10018     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10019         MapType,
10020         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10021                                    MappableExprsHandler::OMP_MAP_FROM));
10022     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10023     llvm::BasicBlock *AllocElseBB =
10024         MapperCGF.createBasicBlock("omp.type.alloc.else");
10025     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10026     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10027     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10028     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10029     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10030     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10031     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10032     MapperCGF.EmitBlock(AllocBB);
10033     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10034         MemberMapType,
10035         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10036                                      MappableExprsHandler::OMP_MAP_FROM)));
10037     MapperCGF.Builder.CreateBr(EndBB);
10038     MapperCGF.EmitBlock(AllocElseBB);
10039     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10040         LeftToFrom,
10041         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10042     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10043     // In case of to, clear OMP_MAP_FROM.
10044     MapperCGF.EmitBlock(ToBB);
10045     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10046         MemberMapType,
10047         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10048     MapperCGF.Builder.CreateBr(EndBB);
10049     MapperCGF.EmitBlock(ToElseBB);
10050     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10051         LeftToFrom,
10052         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10053     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10054     // In case of from, clear OMP_MAP_TO.
10055     MapperCGF.EmitBlock(FromBB);
10056     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10057         MemberMapType,
10058         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10059     // In case of tofrom, do nothing.
10060     MapperCGF.EmitBlock(EndBB);
10061     LastBB = EndBB;
10062     llvm::PHINode *CurMapType =
10063         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10064     CurMapType->addIncoming(AllocMapType, AllocBB);
10065     CurMapType->addIncoming(ToMapType, ToBB);
10066     CurMapType->addIncoming(FromMapType, FromBB);
10067     CurMapType->addIncoming(MemberMapType, ToElseBB);
10068 
10069     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10070                                      CurSizeArg, CurMapType, CurNameArg};
10071     if (Info.Mappers[I]) {
10072       // Call the corresponding mapper function.
10073       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10074           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10075       assert(MapperFunc && "Expect a valid mapper function is available.");
10076       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10077     } else {
10078       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10079       // data structure.
10080       MapperCGF.EmitRuntimeCall(
10081           OMPBuilder.getOrCreateRuntimeFunction(
10082               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10083           OffloadingArgs);
10084     }
10085   }
10086 
10087   // Update the pointer to point to the next element that needs to be mapped,
10088   // and check whether we have mapped all elements.
10089   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10090       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10091   PtrPHI->addIncoming(PtrNext, LastBB);
10092   llvm::Value *IsDone =
10093       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10094   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10095   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10096 
10097   MapperCGF.EmitBlock(ExitBB);
10098   // Emit array deletion if this is an array section and \p MapType indicates
10099   // that deletion is required.
10100   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10101                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10102 
10103   // Emit the function exit block.
10104   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10105   MapperCGF.FinishFunction();
10106   UDMMap.try_emplace(D, Fn);
10107   if (CGF) {
10108     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10109     Decls.second.push_back(D);
10110   }
10111 }
10112 
10113 /// Emit the array initialization or deletion portion for user-defined mapper
10114 /// code generation. First, it evaluates whether an array section is mapped and
10115 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10116 /// true, and \a MapType indicates to not delete this array, array
10117 /// initialization code is generated. If \a IsInit is false, and \a MapType
10118 /// indicates to not this array, array deletion code is generated.
emitUDMapperArrayInitOrDel(CodeGenFunction & MapperCGF,llvm::Value * Handle,llvm::Value * Base,llvm::Value * Begin,llvm::Value * Size,llvm::Value * MapType,llvm::Value * MapName,CharUnits ElementSize,llvm::BasicBlock * ExitBB,bool IsInit)10119 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10120     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10121     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10122     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10123     bool IsInit) {
10124   StringRef Prefix = IsInit ? ".init" : ".del";
10125 
10126   // Evaluate if this is an array section.
10127   llvm::BasicBlock *BodyBB =
10128       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10129   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10130       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10131   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10132       MapType,
10133       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10134   llvm::Value *DeleteCond;
10135   llvm::Value *Cond;
10136   if (IsInit) {
10137     // base != begin?
10138     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10139     // IsPtrAndObj?
10140     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10141         MapType,
10142         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10143     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10144     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10145     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10146     DeleteCond = MapperCGF.Builder.CreateIsNull(
10147         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10148   } else {
10149     Cond = IsArray;
10150     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10151         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10152   }
10153   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10154   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10155 
10156   MapperCGF.EmitBlock(BodyBB);
10157   // Get the array size by multiplying element size and element number (i.e., \p
10158   // Size).
10159   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10160       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10161   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10162   // memory allocation/deletion purpose only.
10163   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10164       MapType,
10165       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10166                                    MappableExprsHandler::OMP_MAP_FROM)));
10167   MapTypeArg = MapperCGF.Builder.CreateOr(
10168       MapTypeArg,
10169       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10170 
10171   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10172   // data structure.
10173   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10174                                    ArraySize, MapTypeArg, MapName};
10175   MapperCGF.EmitRuntimeCall(
10176       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10177                                             OMPRTL___tgt_push_mapper_component),
10178       OffloadingArgs);
10179 }
10180 
getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl * D)10181 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10182     const OMPDeclareMapperDecl *D) {
10183   auto I = UDMMap.find(D);
10184   if (I != UDMMap.end())
10185     return I->second;
10186   emitUserDefinedMapper(D);
10187   return UDMMap.lookup(D);
10188 }
10189 
emitTargetNumIterationsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)10190 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
10191     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10192     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10193                                      const OMPLoopDirective &D)>
10194         SizeEmitter) {
10195   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10196   const OMPExecutableDirective *TD = &D;
10197   // Get nested teams distribute kind directive, if any.
10198   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10199     TD = getNestedDistributeDirective(CGM.getContext(), D);
10200   if (!TD)
10201     return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10202 
10203   const auto *LD = cast<OMPLoopDirective>(TD);
10204   if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10205     return NumIterations;
10206   return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10207 }
10208 
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)10209 void CGOpenMPRuntime::emitTargetCall(
10210     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10211     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10212     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10213     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10214                                      const OMPLoopDirective &D)>
10215         SizeEmitter) {
10216   if (!CGF.HaveInsertPoint())
10217     return;
10218 
10219   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
10220                                    CGM.getLangOpts().OpenMPOffloadMandatory;
10221 
10222   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10223 
10224   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10225                                  D.hasClausesOfKind<OMPNowaitClause>() ||
10226                                  D.hasClausesOfKind<OMPInReductionClause>();
10227   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10228   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10229   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10230                                             PrePostActionTy &) {
10231     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10232   };
10233   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10234 
10235   CodeGenFunction::OMPTargetDataInfo InputInfo;
10236   llvm::Value *MapTypesArray = nullptr;
10237   llvm::Value *MapNamesArray = nullptr;
10238   // Generate code for the host fallback function.
10239   auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
10240                         &CS, OffloadingMandatory](CodeGenFunction &CGF) {
10241     if (OffloadingMandatory) {
10242       CGF.Builder.CreateUnreachable();
10243     } else {
10244       if (RequiresOuterTask) {
10245         CapturedVars.clear();
10246         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10247       }
10248       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10249     }
10250   };
10251   // Fill up the pointer arrays and transfer execution to the device.
10252   auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
10253                     &MapNamesArray, SizeEmitter,
10254                     FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10255     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10256       // Reverse offloading is not supported, so just execute on the host.
10257       FallbackGen(CGF);
10258       return;
10259     }
10260 
10261     // On top of the arrays that were filled up, the target offloading call
10262     // takes as arguments the device id as well as the host pointer. The host
10263     // pointer is used by the runtime library to identify the current target
10264     // region, so it only has to be unique and not necessarily point to
10265     // anything. It could be the pointer to the outlined function that
10266     // implements the target region, but we aren't using that so that the
10267     // compiler doesn't need to keep that, and could therefore inline the host
10268     // function if proven worthwhile during optimization.
10269 
10270     // From this point on, we need to have an ID of the target region defined.
10271     assert(OutlinedFnID && "Invalid outlined function ID!");
10272     (void)OutlinedFnID;
10273 
10274     // Emit device ID if any.
10275     llvm::Value *DeviceID;
10276     if (Device.getPointer()) {
10277       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10278               Device.getInt() == OMPC_DEVICE_device_num) &&
10279              "Expected device_num modifier.");
10280       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10281       DeviceID =
10282           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10283     } else {
10284       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10285     }
10286 
10287     // Emit the number of elements in the offloading arrays.
10288     llvm::Value *PointerNum =
10289         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10290 
10291     // Return value of the runtime offloading call.
10292     llvm::Value *Return;
10293 
10294     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10295     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10296 
10297     // Source location for the ident struct
10298     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10299 
10300     // Get tripcount for the target loop-based directive.
10301     llvm::Value *NumIterations =
10302         emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10303 
10304     // Arguments for the target kernel.
10305     SmallVector<llvm::Value *> KernelArgs{
10306         CGF.Builder.getInt32(/* Version */ 1),
10307         PointerNum,
10308         InputInfo.BasePointersArray.getPointer(),
10309         InputInfo.PointersArray.getPointer(),
10310         InputInfo.SizesArray.getPointer(),
10311         MapTypesArray,
10312         MapNamesArray,
10313         InputInfo.MappersArray.getPointer(),
10314         NumIterations};
10315 
10316     // Arguments passed to the 'nowait' variant.
10317     SmallVector<llvm::Value *> NoWaitKernelArgs{
10318         CGF.Builder.getInt32(0),
10319         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
10320         CGF.Builder.getInt32(0),
10321         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
10322     };
10323 
10324     bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10325 
10326     // The target region is an outlined function launched by the runtime
10327     // via calls to __tgt_target_kernel().
10328     //
10329     // Note that on the host and CPU targets, the runtime implementation of
10330     // these calls simply call the outlined function without forking threads.
10331     // The outlined functions themselves have runtime calls to
10332     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10333     // the compiler in emitTeamsCall() and emitParallelCall().
10334     //
10335     // In contrast, on the NVPTX target, the implementation of
10336     // __tgt_target_teams() launches a GPU kernel with the requested number
10337     // of teams and threads so no additional calls to the runtime are required.
10338     // Check the error code and execute the host version if required.
10339     CGF.Builder.restoreIP(
10340         HasNoWait ? OMPBuilder.emitTargetKernel(
10341                         CGF.Builder, Return, RTLoc, DeviceID, NumTeams,
10342                         NumThreads, OutlinedFnID, KernelArgs, NoWaitKernelArgs)
10343                   : OMPBuilder.emitTargetKernel(CGF.Builder, Return, RTLoc,
10344                                                 DeviceID, NumTeams, NumThreads,
10345                                                 OutlinedFnID, KernelArgs));
10346 
10347     llvm::BasicBlock *OffloadFailedBlock =
10348         CGF.createBasicBlock("omp_offload.failed");
10349     llvm::BasicBlock *OffloadContBlock =
10350         CGF.createBasicBlock("omp_offload.cont");
10351     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10352     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10353 
10354     CGF.EmitBlock(OffloadFailedBlock);
10355     FallbackGen(CGF);
10356 
10357     CGF.EmitBranch(OffloadContBlock);
10358 
10359     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10360   };
10361 
10362   // Notify that the host version must be executed.
10363   auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10364     FallbackGen(CGF);
10365   };
10366 
10367   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10368                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10369                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10370     // Fill up the arrays with all the captured variables.
10371     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10372 
10373     // Get mappable expression information.
10374     MappableExprsHandler MEHandler(D, CGF);
10375     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10376     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10377 
10378     auto RI = CS.getCapturedRecordDecl()->field_begin();
10379     auto *CV = CapturedVars.begin();
10380     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10381                                               CE = CS.capture_end();
10382          CI != CE; ++CI, ++RI, ++CV) {
10383       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10384       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10385 
10386       // VLA sizes are passed to the outlined region by copy and do not have map
10387       // information associated.
10388       if (CI->capturesVariableArrayType()) {
10389         CurInfo.Exprs.push_back(nullptr);
10390         CurInfo.BasePointers.push_back(*CV);
10391         CurInfo.Pointers.push_back(*CV);
10392         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10393             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10394         // Copy to the device as an argument. No need to retrieve it.
10395         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10396                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10397                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10398         CurInfo.Mappers.push_back(nullptr);
10399       } else {
10400         // If we have any information in the map clause, we use it, otherwise we
10401         // just do a default mapping.
10402         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10403         if (!CI->capturesThis())
10404           MappedVarSet.insert(CI->getCapturedVar());
10405         else
10406           MappedVarSet.insert(nullptr);
10407         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10408           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10409         // Generate correct mapping for variables captured by reference in
10410         // lambdas.
10411         if (CI->capturesVariable())
10412           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10413                                                   CurInfo, LambdaPointers);
10414       }
10415       // We expect to have at least an element of information for this capture.
10416       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10417              "Non-existing map pointer for capture!");
10418       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10419              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10420              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10421              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10422              "Inconsistent map information sizes!");
10423 
10424       // If there is an entry in PartialStruct it means we have a struct with
10425       // individual members mapped. Emit an extra combined entry.
10426       if (PartialStruct.Base.isValid()) {
10427         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10428         MEHandler.emitCombinedEntry(
10429             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10430             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10431       }
10432 
10433       // We need to append the results of this capture to what we already have.
10434       CombinedInfo.append(CurInfo);
10435     }
10436     // Adjust MEMBER_OF flags for the lambdas captures.
10437     MEHandler.adjustMemberOfForLambdaCaptures(
10438         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10439         CombinedInfo.Types);
10440     // Map any list items in a map clause that were not captures because they
10441     // weren't referenced within the construct.
10442     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10443 
10444     TargetDataInfo Info;
10445     // Fill up the arrays and create the arguments.
10446     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10447     emitOffloadingArraysArgument(
10448         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10449         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10450         {/*ForEndCall=*/false});
10451 
10452     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10453     InputInfo.BasePointersArray =
10454         Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10455     InputInfo.PointersArray =
10456         Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10457     InputInfo.SizesArray =
10458         Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10459     InputInfo.MappersArray =
10460         Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10461     MapTypesArray = Info.MapTypesArray;
10462     MapNamesArray = Info.MapNamesArray;
10463     if (RequiresOuterTask)
10464       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10465     else
10466       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10467   };
10468 
10469   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10470                              CodeGenFunction &CGF, PrePostActionTy &) {
10471     if (RequiresOuterTask) {
10472       CodeGenFunction::OMPTargetDataInfo InputInfo;
10473       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10474     } else {
10475       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10476     }
10477   };
10478 
10479   // If we have a target function ID it means that we need to support
10480   // offloading, otherwise, just execute on the host. We need to execute on host
10481   // regardless of the conditional in the if clause if, e.g., the user do not
10482   // specify target triples.
10483   if (OutlinedFnID) {
10484     if (IfCond) {
10485       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10486     } else {
10487       RegionCodeGenTy ThenRCG(TargetThenGen);
10488       ThenRCG(CGF);
10489     }
10490   } else {
10491     RegionCodeGenTy ElseRCG(TargetElseGen);
10492     ElseRCG(CGF);
10493   }
10494 }
10495 
scanForTargetRegionsFunctions(const Stmt * S,StringRef ParentName)10496 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10497                                                     StringRef ParentName) {
10498   if (!S)
10499     return;
10500 
10501   // Codegen OMP target directives that offload compute to the device.
10502   bool RequiresDeviceCodegen =
10503       isa<OMPExecutableDirective>(S) &&
10504       isOpenMPTargetExecutionDirective(
10505           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10506 
10507   if (RequiresDeviceCodegen) {
10508     const auto &E = *cast<OMPExecutableDirective>(S);
10509     unsigned DeviceID;
10510     unsigned FileID;
10511     unsigned Line;
10512     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10513                              FileID, Line);
10514 
10515     // Is this a target region that should not be emitted as an entry point? If
10516     // so just signal we are done with this target region.
10517     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10518                                                             ParentName, Line))
10519       return;
10520 
10521     switch (E.getDirectiveKind()) {
10522     case OMPD_target:
10523       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10524                                                    cast<OMPTargetDirective>(E));
10525       break;
10526     case OMPD_target_parallel:
10527       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10528           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10529       break;
10530     case OMPD_target_teams:
10531       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10532           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10533       break;
10534     case OMPD_target_teams_distribute:
10535       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10536           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10537       break;
10538     case OMPD_target_teams_distribute_simd:
10539       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10540           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10541       break;
10542     case OMPD_target_parallel_for:
10543       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10544           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10545       break;
10546     case OMPD_target_parallel_for_simd:
10547       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10548           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10549       break;
10550     case OMPD_target_simd:
10551       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10552           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10553       break;
10554     case OMPD_target_teams_distribute_parallel_for:
10555       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10556           CGM, ParentName,
10557           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10558       break;
10559     case OMPD_target_teams_distribute_parallel_for_simd:
10560       CodeGenFunction::
10561           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10562               CGM, ParentName,
10563               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10564       break;
10565     case OMPD_parallel:
10566     case OMPD_for:
10567     case OMPD_parallel_for:
10568     case OMPD_parallel_master:
10569     case OMPD_parallel_sections:
10570     case OMPD_for_simd:
10571     case OMPD_parallel_for_simd:
10572     case OMPD_cancel:
10573     case OMPD_cancellation_point:
10574     case OMPD_ordered:
10575     case OMPD_threadprivate:
10576     case OMPD_allocate:
10577     case OMPD_task:
10578     case OMPD_simd:
10579     case OMPD_tile:
10580     case OMPD_unroll:
10581     case OMPD_sections:
10582     case OMPD_section:
10583     case OMPD_single:
10584     case OMPD_master:
10585     case OMPD_critical:
10586     case OMPD_taskyield:
10587     case OMPD_barrier:
10588     case OMPD_taskwait:
10589     case OMPD_taskgroup:
10590     case OMPD_atomic:
10591     case OMPD_flush:
10592     case OMPD_depobj:
10593     case OMPD_scan:
10594     case OMPD_teams:
10595     case OMPD_target_data:
10596     case OMPD_target_exit_data:
10597     case OMPD_target_enter_data:
10598     case OMPD_distribute:
10599     case OMPD_distribute_simd:
10600     case OMPD_distribute_parallel_for:
10601     case OMPD_distribute_parallel_for_simd:
10602     case OMPD_teams_distribute:
10603     case OMPD_teams_distribute_simd:
10604     case OMPD_teams_distribute_parallel_for:
10605     case OMPD_teams_distribute_parallel_for_simd:
10606     case OMPD_target_update:
10607     case OMPD_declare_simd:
10608     case OMPD_declare_variant:
10609     case OMPD_begin_declare_variant:
10610     case OMPD_end_declare_variant:
10611     case OMPD_declare_target:
10612     case OMPD_end_declare_target:
10613     case OMPD_declare_reduction:
10614     case OMPD_declare_mapper:
10615     case OMPD_taskloop:
10616     case OMPD_taskloop_simd:
10617     case OMPD_master_taskloop:
10618     case OMPD_master_taskloop_simd:
10619     case OMPD_parallel_master_taskloop:
10620     case OMPD_parallel_master_taskloop_simd:
10621     case OMPD_requires:
10622     case OMPD_metadirective:
10623     case OMPD_unknown:
10624     default:
10625       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10626     }
10627     return;
10628   }
10629 
10630   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10631     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10632       return;
10633 
10634     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10635     return;
10636   }
10637 
10638   // If this is a lambda function, look into its body.
10639   if (const auto *L = dyn_cast<LambdaExpr>(S))
10640     S = L->getBody();
10641 
10642   // Keep looking for target regions recursively.
10643   for (const Stmt *II : S->children())
10644     scanForTargetRegionsFunctions(II, ParentName);
10645 }
10646 
isAssumedToBeNotEmitted(const ValueDecl * VD,bool IsDevice)10647 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10648   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10649       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10650   if (!DevTy)
10651     return false;
10652   // Do not emit device_type(nohost) functions for the host.
10653   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10654     return true;
10655   // Do not emit device_type(host) functions for the device.
10656   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10657     return true;
10658   return false;
10659 }
10660 
emitTargetFunctions(GlobalDecl GD)10661 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10662   // If emitting code for the host, we do not process FD here. Instead we do
10663   // the normal code generation.
10664   if (!CGM.getLangOpts().OpenMPIsDevice) {
10665     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10666       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10667                                   CGM.getLangOpts().OpenMPIsDevice))
10668         return true;
10669     return false;
10670   }
10671 
10672   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10673   // Try to detect target regions in the function.
10674   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10675     StringRef Name = CGM.getMangledName(GD);
10676     scanForTargetRegionsFunctions(FD->getBody(), Name);
10677     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10678                                 CGM.getLangOpts().OpenMPIsDevice))
10679       return true;
10680   }
10681 
10682   // Do not to emit function if it is not marked as declare target.
10683   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10684          AlreadyEmittedTargetDecls.count(VD) == 0;
10685 }
10686 
emitTargetGlobalVariable(GlobalDecl GD)10687 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10688   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10689                               CGM.getLangOpts().OpenMPIsDevice))
10690     return true;
10691 
10692   if (!CGM.getLangOpts().OpenMPIsDevice)
10693     return false;
10694 
10695   // Check if there are Ctors/Dtors in this declaration and look for target
10696   // regions in it. We use the complete variant to produce the kernel name
10697   // mangling.
10698   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10699   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10700     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10701       StringRef ParentName =
10702           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10703       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10704     }
10705     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10706       StringRef ParentName =
10707           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10708       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10709     }
10710   }
10711 
10712   // Do not to emit variable if it is not marked as declare target.
10713   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10714       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10715           cast<VarDecl>(GD.getDecl()));
10716   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10717       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10718        HasRequiresUnifiedSharedMemory)) {
10719     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10720     return true;
10721   }
10722   return false;
10723 }
10724 
registerTargetGlobalVariable(const VarDecl * VD,llvm::Constant * Addr)10725 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10726                                                    llvm::Constant *Addr) {
10727   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10728       !CGM.getLangOpts().OpenMPIsDevice)
10729     return;
10730 
10731   // If we have host/nohost variables, they do not need to be registered.
10732   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10733       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10734   if (DevTy && *DevTy != OMPDeclareTargetDeclAttr::DT_Any)
10735     return;
10736 
10737   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10738       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10739   if (!Res) {
10740     if (CGM.getLangOpts().OpenMPIsDevice) {
10741       // Register non-target variables being emitted in device code (debug info
10742       // may cause this).
10743       StringRef VarName = CGM.getMangledName(VD);
10744       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10745     }
10746     return;
10747   }
10748   // Register declare target variables.
10749   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10750   StringRef VarName;
10751   CharUnits VarSize;
10752   llvm::GlobalValue::LinkageTypes Linkage;
10753 
10754   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10755       !HasRequiresUnifiedSharedMemory) {
10756     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10757     VarName = CGM.getMangledName(VD);
10758     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10759       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10760       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10761     } else {
10762       VarSize = CharUnits::Zero();
10763     }
10764     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10765     // Temp solution to prevent optimizations of the internal variables.
10766     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10767       // Do not create a "ref-variable" if the original is not also available
10768       // on the host.
10769       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10770         return;
10771       std::string RefName = getName({VarName, "ref"});
10772       if (!CGM.GetGlobalValue(RefName)) {
10773         llvm::Constant *AddrRef =
10774             getOrCreateInternalVariable(Addr->getType(), RefName);
10775         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10776         GVAddrRef->setConstant(/*Val=*/true);
10777         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10778         GVAddrRef->setInitializer(Addr);
10779         CGM.addCompilerUsedGlobal(GVAddrRef);
10780       }
10781     }
10782   } else {
10783     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10784             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10785              HasRequiresUnifiedSharedMemory)) &&
10786            "Declare target attribute must link or to with unified memory.");
10787     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10788       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10789     else
10790       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10791 
10792     if (CGM.getLangOpts().OpenMPIsDevice) {
10793       VarName = Addr->getName();
10794       Addr = nullptr;
10795     } else {
10796       VarName = getAddrOfDeclareTargetVar(VD).getName();
10797       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10798     }
10799     VarSize = CGM.getPointerSize();
10800     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10801   }
10802 
10803   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10804       VarName, Addr, VarSize, Flags, Linkage);
10805 }
10806 
emitTargetGlobal(GlobalDecl GD)10807 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10808   if (isa<FunctionDecl>(GD.getDecl()) ||
10809       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10810     return emitTargetFunctions(GD);
10811 
10812   return emitTargetGlobalVariable(GD);
10813 }
10814 
emitDeferredTargetDecls() const10815 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10816   for (const VarDecl *VD : DeferredGlobalVariables) {
10817     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10818         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10819     if (!Res)
10820       continue;
10821     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10822         !HasRequiresUnifiedSharedMemory) {
10823       CGM.EmitGlobal(VD);
10824     } else {
10825       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10826               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10827                HasRequiresUnifiedSharedMemory)) &&
10828              "Expected link clause or to clause with unified memory.");
10829       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10830     }
10831   }
10832 }
10833 
adjustTargetSpecificDataForLambdas(CodeGenFunction & CGF,const OMPExecutableDirective & D) const10834 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10835     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10836   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10837          " Expected target-based directive.");
10838 }
10839 
processRequiresDirective(const OMPRequiresDecl * D)10840 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10841   for (const OMPClause *Clause : D->clauselists()) {
10842     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10843       HasRequiresUnifiedSharedMemory = true;
10844     } else if (const auto *AC =
10845                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10846       switch (AC->getAtomicDefaultMemOrderKind()) {
10847       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10848         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10849         break;
10850       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10851         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10852         break;
10853       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10854         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10855         break;
10856       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10857         break;
10858       }
10859     }
10860   }
10861 }
10862 
getDefaultMemoryOrdering() const10863 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10864   return RequiresAtomicOrdering;
10865 }
10866 
hasAllocateAttributeForGlobalVar(const VarDecl * VD,LangAS & AS)10867 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10868                                                        LangAS &AS) {
10869   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10870     return false;
10871   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10872   switch(A->getAllocatorType()) {
10873   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10874   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10875   // Not supported, fallback to the default mem space.
10876   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10877   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10878   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10879   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10880   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10881   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10882   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10883     AS = LangAS::Default;
10884     return true;
10885   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10886     llvm_unreachable("Expected predefined allocator for the variables with the "
10887                      "static storage.");
10888   }
10889   return false;
10890 }
10891 
hasRequiresUnifiedSharedMemory() const10892 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10893   return HasRequiresUnifiedSharedMemory;
10894 }
10895 
DisableAutoDeclareTargetRAII(CodeGenModule & CGM)10896 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10897     CodeGenModule &CGM)
10898     : CGM(CGM) {
10899   if (CGM.getLangOpts().OpenMPIsDevice) {
10900     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10901     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10902   }
10903 }
10904 
~DisableAutoDeclareTargetRAII()10905 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10906   if (CGM.getLangOpts().OpenMPIsDevice)
10907     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10908 }
10909 
markAsGlobalTarget(GlobalDecl GD)10910 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10911   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10912     return true;
10913 
10914   const auto *D = cast<FunctionDecl>(GD.getDecl());
10915   // Do not to emit function if it is marked as declare target as it was already
10916   // emitted.
10917   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10918     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10919       if (auto *F = dyn_cast_or_null<llvm::Function>(
10920               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10921         return !F->isDeclaration();
10922       return false;
10923     }
10924     return true;
10925   }
10926 
10927   return !AlreadyEmittedTargetDecls.insert(D).second;
10928 }
10929 
emitRequiresDirectiveRegFun()10930 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10931   // If we don't have entries or if we are emitting code for the device, we
10932   // don't need to do anything.
10933   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10934       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10935       (OffloadEntriesInfoManager.empty() &&
10936        !HasEmittedDeclareTargetRegion &&
10937        !HasEmittedTargetRegion))
10938     return nullptr;
10939 
10940   // Create and register the function that handles the requires directives.
10941   ASTContext &C = CGM.getContext();
10942 
10943   llvm::Function *RequiresRegFn;
10944   {
10945     CodeGenFunction CGF(CGM);
10946     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10947     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10948     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10949     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10950     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10951     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10952     // TODO: check for other requires clauses.
10953     // The requires directive takes effect only when a target region is
10954     // present in the compilation unit. Otherwise it is ignored and not
10955     // passed to the runtime. This avoids the runtime from throwing an error
10956     // for mismatching requires clauses across compilation units that don't
10957     // contain at least 1 target region.
10958     assert((HasEmittedTargetRegion ||
10959             HasEmittedDeclareTargetRegion ||
10960             !OffloadEntriesInfoManager.empty()) &&
10961            "Target or declare target region expected.");
10962     if (HasRequiresUnifiedSharedMemory)
10963       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10964     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10965                             CGM.getModule(), OMPRTL___tgt_register_requires),
10966                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10967     CGF.FinishFunction();
10968   }
10969   return RequiresRegFn;
10970 }
10971 
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)10972 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10973                                     const OMPExecutableDirective &D,
10974                                     SourceLocation Loc,
10975                                     llvm::Function *OutlinedFn,
10976                                     ArrayRef<llvm::Value *> CapturedVars) {
10977   if (!CGF.HaveInsertPoint())
10978     return;
10979 
10980   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10981   CodeGenFunction::RunCleanupsScope Scope(CGF);
10982 
10983   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10984   llvm::Value *Args[] = {
10985       RTLoc,
10986       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10987       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10988   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10989   RealArgs.append(std::begin(Args), std::end(Args));
10990   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10991 
10992   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10993       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10994   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10995 }
10996 
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)10997 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10998                                          const Expr *NumTeams,
10999                                          const Expr *ThreadLimit,
11000                                          SourceLocation Loc) {
11001   if (!CGF.HaveInsertPoint())
11002     return;
11003 
11004   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11005 
11006   llvm::Value *NumTeamsVal =
11007       NumTeams
11008           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11009                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11010           : CGF.Builder.getInt32(0);
11011 
11012   llvm::Value *ThreadLimitVal =
11013       ThreadLimit
11014           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11015                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11016           : CGF.Builder.getInt32(0);
11017 
11018   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11019   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11020                                      ThreadLimitVal};
11021   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11022                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11023                       PushNumTeamsArgs);
11024 }
11025 
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,TargetDataInfo & Info)11026 void CGOpenMPRuntime::emitTargetDataCalls(
11027     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11028     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11029   if (!CGF.HaveInsertPoint())
11030     return;
11031 
11032   // Action used to replace the default codegen action and turn privatization
11033   // off.
11034   PrePostActionTy NoPrivAction;
11035 
11036   // Generate the code for the opening of the data environment. Capture all the
11037   // arguments of the runtime call by reference because they are used in the
11038   // closing of the region.
11039   auto &&BeginThenGen = [this, &D, Device, &Info,
11040                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11041     // Fill up the arrays with all the mapped variables.
11042     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11043 
11044     // Get map clause information.
11045     MappableExprsHandler MEHandler(D, CGF);
11046     MEHandler.generateAllInfo(CombinedInfo);
11047 
11048     // Fill up the arrays and create the arguments.
11049     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11050                          /*IsNonContiguous=*/true);
11051 
11052     llvm::Value *BasePointersArrayArg = nullptr;
11053     llvm::Value *PointersArrayArg = nullptr;
11054     llvm::Value *SizesArrayArg = nullptr;
11055     llvm::Value *MapTypesArrayArg = nullptr;
11056     llvm::Value *MapNamesArrayArg = nullptr;
11057     llvm::Value *MappersArrayArg = nullptr;
11058     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11059                                  SizesArrayArg, MapTypesArrayArg,
11060                                  MapNamesArrayArg, MappersArrayArg, Info);
11061 
11062     // Emit device ID if any.
11063     llvm::Value *DeviceID = nullptr;
11064     if (Device) {
11065       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11066                                            CGF.Int64Ty, /*isSigned=*/true);
11067     } else {
11068       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11069     }
11070 
11071     // Emit the number of elements in the offloading arrays.
11072     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11073     //
11074     // Source location for the ident struct
11075     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11076 
11077     llvm::Value *OffloadingArgs[] = {RTLoc,
11078                                      DeviceID,
11079                                      PointerNum,
11080                                      BasePointersArrayArg,
11081                                      PointersArrayArg,
11082                                      SizesArrayArg,
11083                                      MapTypesArrayArg,
11084                                      MapNamesArrayArg,
11085                                      MappersArrayArg};
11086     CGF.EmitRuntimeCall(
11087         OMPBuilder.getOrCreateRuntimeFunction(
11088             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11089         OffloadingArgs);
11090 
11091     // If device pointer privatization is required, emit the body of the region
11092     // here. It will have to be duplicated: with and without privatization.
11093     if (!Info.CaptureDeviceAddrMap.empty())
11094       CodeGen(CGF);
11095   };
11096 
11097   // Generate code for the closing of the data region.
11098   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11099                                                 PrePostActionTy &) {
11100     assert(Info.isValid() && "Invalid data environment closing arguments.");
11101 
11102     llvm::Value *BasePointersArrayArg = nullptr;
11103     llvm::Value *PointersArrayArg = nullptr;
11104     llvm::Value *SizesArrayArg = nullptr;
11105     llvm::Value *MapTypesArrayArg = nullptr;
11106     llvm::Value *MapNamesArrayArg = nullptr;
11107     llvm::Value *MappersArrayArg = nullptr;
11108     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11109                                  SizesArrayArg, MapTypesArrayArg,
11110                                  MapNamesArrayArg, MappersArrayArg, Info,
11111                                  {/*ForEndCall=*/true});
11112 
11113     // Emit device ID if any.
11114     llvm::Value *DeviceID = nullptr;
11115     if (Device) {
11116       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11117                                            CGF.Int64Ty, /*isSigned=*/true);
11118     } else {
11119       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11120     }
11121 
11122     // Emit the number of elements in the offloading arrays.
11123     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11124 
11125     // Source location for the ident struct
11126     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11127 
11128     llvm::Value *OffloadingArgs[] = {RTLoc,
11129                                      DeviceID,
11130                                      PointerNum,
11131                                      BasePointersArrayArg,
11132                                      PointersArrayArg,
11133                                      SizesArrayArg,
11134                                      MapTypesArrayArg,
11135                                      MapNamesArrayArg,
11136                                      MappersArrayArg};
11137     CGF.EmitRuntimeCall(
11138         OMPBuilder.getOrCreateRuntimeFunction(
11139             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11140         OffloadingArgs);
11141   };
11142 
11143   // If we need device pointer privatization, we need to emit the body of the
11144   // region with no privatization in the 'else' branch of the conditional.
11145   // Otherwise, we don't have to do anything.
11146   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11147                                                          PrePostActionTy &) {
11148     if (!Info.CaptureDeviceAddrMap.empty()) {
11149       CodeGen.setAction(NoPrivAction);
11150       CodeGen(CGF);
11151     }
11152   };
11153 
11154   // We don't have to do anything to close the region if the if clause evaluates
11155   // to false.
11156   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11157 
11158   if (IfCond) {
11159     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11160   } else {
11161     RegionCodeGenTy RCG(BeginThenGen);
11162     RCG(CGF);
11163   }
11164 
11165   // If we don't require privatization of device pointers, we emit the body in
11166   // between the runtime calls. This avoids duplicating the body code.
11167   if (Info.CaptureDeviceAddrMap.empty()) {
11168     CodeGen.setAction(NoPrivAction);
11169     CodeGen(CGF);
11170   }
11171 
11172   if (IfCond) {
11173     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11174   } else {
11175     RegionCodeGenTy RCG(EndThenGen);
11176     RCG(CGF);
11177   }
11178 }
11179 
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)11180 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11181     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11182     const Expr *Device) {
11183   if (!CGF.HaveInsertPoint())
11184     return;
11185 
11186   assert((isa<OMPTargetEnterDataDirective>(D) ||
11187           isa<OMPTargetExitDataDirective>(D) ||
11188           isa<OMPTargetUpdateDirective>(D)) &&
11189          "Expecting either target enter, exit data, or update directives.");
11190 
11191   CodeGenFunction::OMPTargetDataInfo InputInfo;
11192   llvm::Value *MapTypesArray = nullptr;
11193   llvm::Value *MapNamesArray = nullptr;
11194   // Generate the code for the opening of the data environment.
11195   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11196                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11197     // Emit device ID if any.
11198     llvm::Value *DeviceID = nullptr;
11199     if (Device) {
11200       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11201                                            CGF.Int64Ty, /*isSigned=*/true);
11202     } else {
11203       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11204     }
11205 
11206     // Emit the number of elements in the offloading arrays.
11207     llvm::Constant *PointerNum =
11208         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11209 
11210     // Source location for the ident struct
11211     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11212 
11213     llvm::Value *OffloadingArgs[] = {RTLoc,
11214                                      DeviceID,
11215                                      PointerNum,
11216                                      InputInfo.BasePointersArray.getPointer(),
11217                                      InputInfo.PointersArray.getPointer(),
11218                                      InputInfo.SizesArray.getPointer(),
11219                                      MapTypesArray,
11220                                      MapNamesArray,
11221                                      InputInfo.MappersArray.getPointer()};
11222 
11223     // Select the right runtime function call for each standalone
11224     // directive.
11225     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11226     RuntimeFunction RTLFn;
11227     switch (D.getDirectiveKind()) {
11228     case OMPD_target_enter_data:
11229       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11230                         : OMPRTL___tgt_target_data_begin_mapper;
11231       break;
11232     case OMPD_target_exit_data:
11233       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11234                         : OMPRTL___tgt_target_data_end_mapper;
11235       break;
11236     case OMPD_target_update:
11237       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11238                         : OMPRTL___tgt_target_data_update_mapper;
11239       break;
11240     case OMPD_parallel:
11241     case OMPD_for:
11242     case OMPD_parallel_for:
11243     case OMPD_parallel_master:
11244     case OMPD_parallel_sections:
11245     case OMPD_for_simd:
11246     case OMPD_parallel_for_simd:
11247     case OMPD_cancel:
11248     case OMPD_cancellation_point:
11249     case OMPD_ordered:
11250     case OMPD_threadprivate:
11251     case OMPD_allocate:
11252     case OMPD_task:
11253     case OMPD_simd:
11254     case OMPD_tile:
11255     case OMPD_unroll:
11256     case OMPD_sections:
11257     case OMPD_section:
11258     case OMPD_single:
11259     case OMPD_master:
11260     case OMPD_critical:
11261     case OMPD_taskyield:
11262     case OMPD_barrier:
11263     case OMPD_taskwait:
11264     case OMPD_taskgroup:
11265     case OMPD_atomic:
11266     case OMPD_flush:
11267     case OMPD_depobj:
11268     case OMPD_scan:
11269     case OMPD_teams:
11270     case OMPD_target_data:
11271     case OMPD_distribute:
11272     case OMPD_distribute_simd:
11273     case OMPD_distribute_parallel_for:
11274     case OMPD_distribute_parallel_for_simd:
11275     case OMPD_teams_distribute:
11276     case OMPD_teams_distribute_simd:
11277     case OMPD_teams_distribute_parallel_for:
11278     case OMPD_teams_distribute_parallel_for_simd:
11279     case OMPD_declare_simd:
11280     case OMPD_declare_variant:
11281     case OMPD_begin_declare_variant:
11282     case OMPD_end_declare_variant:
11283     case OMPD_declare_target:
11284     case OMPD_end_declare_target:
11285     case OMPD_declare_reduction:
11286     case OMPD_declare_mapper:
11287     case OMPD_taskloop:
11288     case OMPD_taskloop_simd:
11289     case OMPD_master_taskloop:
11290     case OMPD_master_taskloop_simd:
11291     case OMPD_parallel_master_taskloop:
11292     case OMPD_parallel_master_taskloop_simd:
11293     case OMPD_target:
11294     case OMPD_target_simd:
11295     case OMPD_target_teams_distribute:
11296     case OMPD_target_teams_distribute_simd:
11297     case OMPD_target_teams_distribute_parallel_for:
11298     case OMPD_target_teams_distribute_parallel_for_simd:
11299     case OMPD_target_teams:
11300     case OMPD_target_parallel:
11301     case OMPD_target_parallel_for:
11302     case OMPD_target_parallel_for_simd:
11303     case OMPD_requires:
11304     case OMPD_metadirective:
11305     case OMPD_unknown:
11306     default:
11307       llvm_unreachable("Unexpected standalone target data directive.");
11308       break;
11309     }
11310     CGF.EmitRuntimeCall(
11311         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11312         OffloadingArgs);
11313   };
11314 
11315   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11316                           &MapNamesArray](CodeGenFunction &CGF,
11317                                           PrePostActionTy &) {
11318     // Fill up the arrays with all the mapped variables.
11319     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11320 
11321     // Get map clause information.
11322     MappableExprsHandler MEHandler(D, CGF);
11323     MEHandler.generateAllInfo(CombinedInfo);
11324 
11325     TargetDataInfo Info;
11326     // Fill up the arrays and create the arguments.
11327     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11328                          /*IsNonContiguous=*/true);
11329     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11330                              D.hasClausesOfKind<OMPNowaitClause>();
11331     emitOffloadingArraysArgument(
11332         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11333         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11334         {/*ForEndCall=*/false});
11335     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11336     InputInfo.BasePointersArray =
11337         Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11338     InputInfo.PointersArray =
11339         Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11340     InputInfo.SizesArray =
11341         Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11342     InputInfo.MappersArray =
11343         Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11344     MapTypesArray = Info.MapTypesArray;
11345     MapNamesArray = Info.MapNamesArray;
11346     if (RequiresOuterTask)
11347       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11348     else
11349       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11350   };
11351 
11352   if (IfCond) {
11353     emitIfClause(CGF, IfCond, TargetThenGen,
11354                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11355   } else {
11356     RegionCodeGenTy ThenRCG(TargetThenGen);
11357     ThenRCG(CGF);
11358   }
11359 }
11360 
11361 namespace {
11362   /// Kind of parameter in a function with 'declare simd' directive.
11363 enum ParamKindTy {
11364   Linear,
11365   LinearRef,
11366   LinearUVal,
11367   LinearVal,
11368   Uniform,
11369   Vector,
11370 };
11371 /// Attribute set of the parameter.
11372 struct ParamAttrTy {
11373   ParamKindTy Kind = Vector;
11374   llvm::APSInt StrideOrArg;
11375   llvm::APSInt Alignment;
11376   bool HasVarStride = false;
11377 };
11378 } // namespace
11379 
evaluateCDTSize(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)11380 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11381                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11382   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11383   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11384   // of that clause. The VLEN value must be power of 2.
11385   // In other case the notion of the function`s "characteristic data type" (CDT)
11386   // is used to compute the vector length.
11387   // CDT is defined in the following order:
11388   //   a) For non-void function, the CDT is the return type.
11389   //   b) If the function has any non-uniform, non-linear parameters, then the
11390   //   CDT is the type of the first such parameter.
11391   //   c) If the CDT determined by a) or b) above is struct, union, or class
11392   //   type which is pass-by-value (except for the type that maps to the
11393   //   built-in complex data type), the characteristic data type is int.
11394   //   d) If none of the above three cases is applicable, the CDT is int.
11395   // The VLEN is then determined based on the CDT and the size of vector
11396   // register of that ISA for which current vector version is generated. The
11397   // VLEN is computed using the formula below:
11398   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11399   // where vector register size specified in section 3.2.1 Registers and the
11400   // Stack Frame of original AMD64 ABI document.
11401   QualType RetType = FD->getReturnType();
11402   if (RetType.isNull())
11403     return 0;
11404   ASTContext &C = FD->getASTContext();
11405   QualType CDT;
11406   if (!RetType.isNull() && !RetType->isVoidType()) {
11407     CDT = RetType;
11408   } else {
11409     unsigned Offset = 0;
11410     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11411       if (ParamAttrs[Offset].Kind == Vector)
11412         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11413       ++Offset;
11414     }
11415     if (CDT.isNull()) {
11416       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11417         if (ParamAttrs[I + Offset].Kind == Vector) {
11418           CDT = FD->getParamDecl(I)->getType();
11419           break;
11420         }
11421       }
11422     }
11423   }
11424   if (CDT.isNull())
11425     CDT = C.IntTy;
11426   CDT = CDT->getCanonicalTypeUnqualified();
11427   if (CDT->isRecordType() || CDT->isUnionType())
11428     CDT = C.IntTy;
11429   return C.getTypeSize(CDT);
11430 }
11431 
11432 /// Mangle the parameter part of the vector function name according to
11433 /// their OpenMP classification. The mangling function is defined in
11434 /// section 4.5 of the AAVFABI(2021Q1).
mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs)11435 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11436   SmallString<256> Buffer;
11437   llvm::raw_svector_ostream Out(Buffer);
11438   for (const auto &ParamAttr : ParamAttrs) {
11439     switch (ParamAttr.Kind) {
11440     case Linear:
11441       Out << 'l';
11442       break;
11443     case LinearRef:
11444       Out << 'R';
11445       break;
11446     case LinearUVal:
11447       Out << 'U';
11448       break;
11449     case LinearVal:
11450       Out << 'L';
11451       break;
11452     case Uniform:
11453       Out << 'u';
11454       break;
11455     case Vector:
11456       Out << 'v';
11457       break;
11458     }
11459     if (ParamAttr.HasVarStride)
11460       Out << "s" << ParamAttr.StrideOrArg;
11461     else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11462              ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11463       // Don't print the step value if it is not present or if it is
11464       // equal to 1.
11465       if (ParamAttr.StrideOrArg < 0)
11466         Out << 'n' << -ParamAttr.StrideOrArg;
11467       else if (ParamAttr.StrideOrArg != 1)
11468         Out << ParamAttr.StrideOrArg;
11469     }
11470 
11471     if (!!ParamAttr.Alignment)
11472       Out << 'a' << ParamAttr.Alignment;
11473   }
11474 
11475   return std::string(Out.str());
11476 }
11477 
11478 static void
emitX86DeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn,const llvm::APSInt & VLENVal,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State)11479 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11480                            const llvm::APSInt &VLENVal,
11481                            ArrayRef<ParamAttrTy> ParamAttrs,
11482                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11483   struct ISADataTy {
11484     char ISA;
11485     unsigned VecRegSize;
11486   };
11487   ISADataTy ISAData[] = {
11488       {
11489           'b', 128
11490       }, // SSE
11491       {
11492           'c', 256
11493       }, // AVX
11494       {
11495           'd', 256
11496       }, // AVX2
11497       {
11498           'e', 512
11499       }, // AVX512
11500   };
11501   llvm::SmallVector<char, 2> Masked;
11502   switch (State) {
11503   case OMPDeclareSimdDeclAttr::BS_Undefined:
11504     Masked.push_back('N');
11505     Masked.push_back('M');
11506     break;
11507   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11508     Masked.push_back('N');
11509     break;
11510   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11511     Masked.push_back('M');
11512     break;
11513   }
11514   for (char Mask : Masked) {
11515     for (const ISADataTy &Data : ISAData) {
11516       SmallString<256> Buffer;
11517       llvm::raw_svector_ostream Out(Buffer);
11518       Out << "_ZGV" << Data.ISA << Mask;
11519       if (!VLENVal) {
11520         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11521         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11522         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11523       } else {
11524         Out << VLENVal;
11525       }
11526       Out << mangleVectorParameters(ParamAttrs);
11527       Out << '_' << Fn->getName();
11528       Fn->addFnAttr(Out.str());
11529     }
11530   }
11531 }
11532 
11533 // This are the Functions that are needed to mangle the name of the
11534 // vector functions generated by the compiler, according to the rules
11535 // defined in the "Vector Function ABI specifications for AArch64",
11536 // available at
11537 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11538 
11539 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
getAArch64MTV(QualType QT,ParamKindTy Kind)11540 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11541   QT = QT.getCanonicalType();
11542 
11543   if (QT->isVoidType())
11544     return false;
11545 
11546   if (Kind == ParamKindTy::Uniform)
11547     return false;
11548 
11549   if (Kind == ParamKindTy::LinearUVal || ParamKindTy::LinearRef)
11550     return false;
11551 
11552   if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
11553       !QT->isReferenceType())
11554     return false;
11555 
11556   return true;
11557 }
11558 
11559 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
getAArch64PBV(QualType QT,ASTContext & C)11560 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11561   QT = QT.getCanonicalType();
11562   unsigned Size = C.getTypeSize(QT);
11563 
11564   // Only scalars and complex within 16 bytes wide set PVB to true.
11565   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11566     return false;
11567 
11568   if (QT->isFloatingType())
11569     return true;
11570 
11571   if (QT->isIntegerType())
11572     return true;
11573 
11574   if (QT->isPointerType())
11575     return true;
11576 
11577   // TODO: Add support for complex types (section 3.1.2, item 2).
11578 
11579   return false;
11580 }
11581 
11582 /// Computes the lane size (LS) of a return type or of an input parameter,
11583 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11584 /// TODO: Add support for references, section 3.2.1, item 1.
getAArch64LS(QualType QT,ParamKindTy Kind,ASTContext & C)11585 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11586   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11587     QualType PTy = QT.getCanonicalType()->getPointeeType();
11588     if (getAArch64PBV(PTy, C))
11589       return C.getTypeSize(PTy);
11590   }
11591   if (getAArch64PBV(QT, C))
11592     return C.getTypeSize(QT);
11593 
11594   return C.getTypeSize(C.getUIntPtrType());
11595 }
11596 
11597 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11598 // signature of the scalar function, as defined in 3.2.2 of the
11599 // AAVFABI.
11600 static std::tuple<unsigned, unsigned, bool>
getNDSWDS(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)11601 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11602   QualType RetType = FD->getReturnType().getCanonicalType();
11603 
11604   ASTContext &C = FD->getASTContext();
11605 
11606   bool OutputBecomesInput = false;
11607 
11608   llvm::SmallVector<unsigned, 8> Sizes;
11609   if (!RetType->isVoidType()) {
11610     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11611     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11612       OutputBecomesInput = true;
11613   }
11614   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11615     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11616     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11617   }
11618 
11619   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11620   // The LS of a function parameter / return value can only be a power
11621   // of 2, starting from 8 bits, up to 128.
11622   assert(llvm::all_of(Sizes,
11623                       [](unsigned Size) {
11624                         return Size == 8 || Size == 16 || Size == 32 ||
11625                                Size == 64 || Size == 128;
11626                       }) &&
11627          "Invalid size");
11628 
11629   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11630                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11631                          OutputBecomesInput);
11632 }
11633 
11634 // Function used to add the attribute. The parameter `VLEN` is
11635 // templated to allow the use of "x" when targeting scalable functions
11636 // for SVE.
11637 template <typename T>
addAArch64VectorName(T VLEN,StringRef LMask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)11638 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11639                                  char ISA, StringRef ParSeq,
11640                                  StringRef MangledName, bool OutputBecomesInput,
11641                                  llvm::Function *Fn) {
11642   SmallString<256> Buffer;
11643   llvm::raw_svector_ostream Out(Buffer);
11644   Out << Prefix << ISA << LMask << VLEN;
11645   if (OutputBecomesInput)
11646     Out << "v";
11647   Out << ParSeq << "_" << MangledName;
11648   Fn->addFnAttr(Out.str());
11649 }
11650 
11651 // Helper function to generate the Advanced SIMD names depending on
11652 // the value of the NDS when simdlen is not present.
addAArch64AdvSIMDNDSNames(unsigned NDS,StringRef Mask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)11653 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11654                                       StringRef Prefix, char ISA,
11655                                       StringRef ParSeq, StringRef MangledName,
11656                                       bool OutputBecomesInput,
11657                                       llvm::Function *Fn) {
11658   switch (NDS) {
11659   case 8:
11660     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11661                          OutputBecomesInput, Fn);
11662     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11663                          OutputBecomesInput, Fn);
11664     break;
11665   case 16:
11666     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11667                          OutputBecomesInput, Fn);
11668     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11669                          OutputBecomesInput, Fn);
11670     break;
11671   case 32:
11672     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11673                          OutputBecomesInput, Fn);
11674     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11675                          OutputBecomesInput, Fn);
11676     break;
11677   case 64:
11678   case 128:
11679     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11680                          OutputBecomesInput, Fn);
11681     break;
11682   default:
11683     llvm_unreachable("Scalar type is too wide.");
11684   }
11685 }
11686 
11687 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
emitAArch64DeclareSimdFunction(CodeGenModule & CGM,const FunctionDecl * FD,unsigned UserVLEN,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State,StringRef MangledName,char ISA,unsigned VecRegSize,llvm::Function * Fn,SourceLocation SLoc)11688 static void emitAArch64DeclareSimdFunction(
11689     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11690     ArrayRef<ParamAttrTy> ParamAttrs,
11691     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11692     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11693 
11694   // Get basic data for building the vector signature.
11695   const auto Data = getNDSWDS(FD, ParamAttrs);
11696   const unsigned NDS = std::get<0>(Data);
11697   const unsigned WDS = std::get<1>(Data);
11698   const bool OutputBecomesInput = std::get<2>(Data);
11699 
11700   // Check the values provided via `simdlen` by the user.
11701   // 1. A `simdlen(1)` doesn't produce vector signatures,
11702   if (UserVLEN == 1) {
11703     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11704         DiagnosticsEngine::Warning,
11705         "The clause simdlen(1) has no effect when targeting aarch64.");
11706     CGM.getDiags().Report(SLoc, DiagID);
11707     return;
11708   }
11709 
11710   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11711   // Advanced SIMD output.
11712   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11713     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11714         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11715                                     "power of 2 when targeting Advanced SIMD.");
11716     CGM.getDiags().Report(SLoc, DiagID);
11717     return;
11718   }
11719 
11720   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11721   // limits.
11722   if (ISA == 's' && UserVLEN != 0) {
11723     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11724       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11725           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11726                                       "lanes in the architectural constraints "
11727                                       "for SVE (min is 128-bit, max is "
11728                                       "2048-bit, by steps of 128-bit)");
11729       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11730       return;
11731     }
11732   }
11733 
11734   // Sort out parameter sequence.
11735   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11736   StringRef Prefix = "_ZGV";
11737   // Generate simdlen from user input (if any).
11738   if (UserVLEN) {
11739     if (ISA == 's') {
11740       // SVE generates only a masked function.
11741       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11742                            OutputBecomesInput, Fn);
11743     } else {
11744       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11745       // Advanced SIMD generates one or two functions, depending on
11746       // the `[not]inbranch` clause.
11747       switch (State) {
11748       case OMPDeclareSimdDeclAttr::BS_Undefined:
11749         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11750                              OutputBecomesInput, Fn);
11751         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11752                              OutputBecomesInput, Fn);
11753         break;
11754       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11755         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11756                              OutputBecomesInput, Fn);
11757         break;
11758       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11759         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11760                              OutputBecomesInput, Fn);
11761         break;
11762       }
11763     }
11764   } else {
11765     // If no user simdlen is provided, follow the AAVFABI rules for
11766     // generating the vector length.
11767     if (ISA == 's') {
11768       // SVE, section 3.4.1, item 1.
11769       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11770                            OutputBecomesInput, Fn);
11771     } else {
11772       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11773       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11774       // two vector names depending on the use of the clause
11775       // `[not]inbranch`.
11776       switch (State) {
11777       case OMPDeclareSimdDeclAttr::BS_Undefined:
11778         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11779                                   OutputBecomesInput, Fn);
11780         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11781                                   OutputBecomesInput, Fn);
11782         break;
11783       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11784         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11785                                   OutputBecomesInput, Fn);
11786         break;
11787       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11788         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11789                                   OutputBecomesInput, Fn);
11790         break;
11791       }
11792     }
11793   }
11794 }
11795 
emitDeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn)11796 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11797                                               llvm::Function *Fn) {
11798   ASTContext &C = CGM.getContext();
11799   FD = FD->getMostRecentDecl();
11800   while (FD) {
11801     // Map params to their positions in function decl.
11802     llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11803     if (isa<CXXMethodDecl>(FD))
11804       ParamPositions.try_emplace(FD, 0);
11805     unsigned ParamPos = ParamPositions.size();
11806     for (const ParmVarDecl *P : FD->parameters()) {
11807       ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11808       ++ParamPos;
11809     }
11810     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11811       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11812       // Mark uniform parameters.
11813       for (const Expr *E : Attr->uniforms()) {
11814         E = E->IgnoreParenImpCasts();
11815         unsigned Pos;
11816         if (isa<CXXThisExpr>(E)) {
11817           Pos = ParamPositions[FD];
11818         } else {
11819           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11820                                 ->getCanonicalDecl();
11821           auto It = ParamPositions.find(PVD);
11822           assert(It != ParamPositions.end() && "Function parameter not found");
11823           Pos = It->second;
11824         }
11825         ParamAttrs[Pos].Kind = Uniform;
11826       }
11827       // Get alignment info.
11828       auto *NI = Attr->alignments_begin();
11829       for (const Expr *E : Attr->aligneds()) {
11830         E = E->IgnoreParenImpCasts();
11831         unsigned Pos;
11832         QualType ParmTy;
11833         if (isa<CXXThisExpr>(E)) {
11834           Pos = ParamPositions[FD];
11835           ParmTy = E->getType();
11836         } else {
11837           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11838                                 ->getCanonicalDecl();
11839           auto It = ParamPositions.find(PVD);
11840           assert(It != ParamPositions.end() && "Function parameter not found");
11841           Pos = It->second;
11842           ParmTy = PVD->getType();
11843         }
11844         ParamAttrs[Pos].Alignment =
11845             (*NI)
11846                 ? (*NI)->EvaluateKnownConstInt(C)
11847                 : llvm::APSInt::getUnsigned(
11848                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11849                           .getQuantity());
11850         ++NI;
11851       }
11852       // Mark linear parameters.
11853       auto *SI = Attr->steps_begin();
11854       auto *MI = Attr->modifiers_begin();
11855       for (const Expr *E : Attr->linears()) {
11856         E = E->IgnoreParenImpCasts();
11857         unsigned Pos;
11858         bool IsReferenceType = false;
11859         // Rescaling factor needed to compute the linear parameter
11860         // value in the mangled name.
11861         unsigned PtrRescalingFactor = 1;
11862         if (isa<CXXThisExpr>(E)) {
11863           Pos = ParamPositions[FD];
11864           auto *P = cast<PointerType>(E->getType());
11865           PtrRescalingFactor = CGM.getContext()
11866                                    .getTypeSizeInChars(P->getPointeeType())
11867                                    .getQuantity();
11868         } else {
11869           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11870                                 ->getCanonicalDecl();
11871           auto It = ParamPositions.find(PVD);
11872           assert(It != ParamPositions.end() && "Function parameter not found");
11873           Pos = It->second;
11874           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11875             PtrRescalingFactor = CGM.getContext()
11876                                      .getTypeSizeInChars(P->getPointeeType())
11877                                      .getQuantity();
11878           else if (PVD->getType()->isReferenceType()) {
11879             IsReferenceType = true;
11880             PtrRescalingFactor =
11881                 CGM.getContext()
11882                     .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11883                     .getQuantity();
11884           }
11885         }
11886         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11887         if (*MI == OMPC_LINEAR_ref)
11888           ParamAttr.Kind = LinearRef;
11889         else if (*MI == OMPC_LINEAR_uval)
11890           ParamAttr.Kind = LinearUVal;
11891         else if (IsReferenceType)
11892           ParamAttr.Kind = LinearVal;
11893         else
11894           ParamAttr.Kind = Linear;
11895         // Assuming a stride of 1, for `linear` without modifiers.
11896         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11897         if (*SI) {
11898           Expr::EvalResult Result;
11899           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11900             if (const auto *DRE =
11901                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11902               if (const auto *StridePVD =
11903                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11904                 ParamAttr.HasVarStride = true;
11905                 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11906                 assert(It != ParamPositions.end() &&
11907                        "Function parameter not found");
11908                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11909               }
11910             }
11911           } else {
11912             ParamAttr.StrideOrArg = Result.Val.getInt();
11913           }
11914         }
11915         // If we are using a linear clause on a pointer, we need to
11916         // rescale the value of linear_step with the byte size of the
11917         // pointee type.
11918         if (!ParamAttr.HasVarStride &&
11919             (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11920           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11921         ++SI;
11922         ++MI;
11923       }
11924       llvm::APSInt VLENVal;
11925       SourceLocation ExprLoc;
11926       const Expr *VLENExpr = Attr->getSimdlen();
11927       if (VLENExpr) {
11928         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11929         ExprLoc = VLENExpr->getExprLoc();
11930       }
11931       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11932       if (CGM.getTriple().isX86()) {
11933         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11934       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11935         unsigned VLEN = VLENVal.getExtValue();
11936         StringRef MangledName = Fn->getName();
11937         if (CGM.getTarget().hasFeature("sve"))
11938           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11939                                          MangledName, 's', 128, Fn, ExprLoc);
11940         if (CGM.getTarget().hasFeature("neon"))
11941           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11942                                          MangledName, 'n', 128, Fn, ExprLoc);
11943       }
11944     }
11945     FD = FD->getPreviousDecl();
11946   }
11947 }
11948 
11949 namespace {
11950 /// Cleanup action for doacross support.
11951 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11952 public:
11953   static const int DoacrossFinArgs = 2;
11954 
11955 private:
11956   llvm::FunctionCallee RTLFn;
11957   llvm::Value *Args[DoacrossFinArgs];
11958 
11959 public:
DoacrossCleanupTy(llvm::FunctionCallee RTLFn,ArrayRef<llvm::Value * > CallArgs)11960   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11961                     ArrayRef<llvm::Value *> CallArgs)
11962       : RTLFn(RTLFn) {
11963     assert(CallArgs.size() == DoacrossFinArgs);
11964     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11965   }
Emit(CodeGenFunction & CGF,Flags)11966   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11967     if (!CGF.HaveInsertPoint())
11968       return;
11969     CGF.EmitRuntimeCall(RTLFn, Args);
11970   }
11971 };
11972 } // namespace
11973 
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)11974 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11975                                        const OMPLoopDirective &D,
11976                                        ArrayRef<Expr *> NumIterations) {
11977   if (!CGF.HaveInsertPoint())
11978     return;
11979 
11980   ASTContext &C = CGM.getContext();
11981   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11982   RecordDecl *RD;
11983   if (KmpDimTy.isNull()) {
11984     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11985     //  kmp_int64 lo; // lower
11986     //  kmp_int64 up; // upper
11987     //  kmp_int64 st; // stride
11988     // };
11989     RD = C.buildImplicitRecord("kmp_dim");
11990     RD->startDefinition();
11991     addFieldToRecordDecl(C, RD, Int64Ty);
11992     addFieldToRecordDecl(C, RD, Int64Ty);
11993     addFieldToRecordDecl(C, RD, Int64Ty);
11994     RD->completeDefinition();
11995     KmpDimTy = C.getRecordType(RD);
11996   } else {
11997     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11998   }
11999   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12000   QualType ArrayTy =
12001       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12002 
12003   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12004   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12005   enum { LowerFD = 0, UpperFD, StrideFD };
12006   // Fill dims with data.
12007   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12008     LValue DimsLVal = CGF.MakeAddrLValue(
12009         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12010     // dims.upper = num_iterations;
12011     LValue UpperLVal = CGF.EmitLValueForField(
12012         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12013     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12014         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12015         Int64Ty, NumIterations[I]->getExprLoc());
12016     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12017     // dims.stride = 1;
12018     LValue StrideLVal = CGF.EmitLValueForField(
12019         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12020     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12021                           StrideLVal);
12022   }
12023 
12024   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12025   // kmp_int32 num_dims, struct kmp_dim * dims);
12026   llvm::Value *Args[] = {
12027       emitUpdateLocation(CGF, D.getBeginLoc()),
12028       getThreadID(CGF, D.getBeginLoc()),
12029       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12030       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12031           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12032           CGM.VoidPtrTy)};
12033 
12034   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12035       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12036   CGF.EmitRuntimeCall(RTLFn, Args);
12037   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12038       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12039   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12040       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12041   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12042                                              llvm::makeArrayRef(FiniArgs));
12043 }
12044 
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)12045 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12046                                           const OMPDependClause *C) {
12047   QualType Int64Ty =
12048       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12049   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12050   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12051       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12052   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12053   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12054     const Expr *CounterVal = C->getLoopData(I);
12055     assert(CounterVal);
12056     llvm::Value *CntVal = CGF.EmitScalarConversion(
12057         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12058         CounterVal->getExprLoc());
12059     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12060                           /*Volatile=*/false, Int64Ty);
12061   }
12062   llvm::Value *Args[] = {
12063       emitUpdateLocation(CGF, C->getBeginLoc()),
12064       getThreadID(CGF, C->getBeginLoc()),
12065       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12066   llvm::FunctionCallee RTLFn;
12067   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12068     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12069                                                   OMPRTL___kmpc_doacross_post);
12070   } else {
12071     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12072     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12073                                                   OMPRTL___kmpc_doacross_wait);
12074   }
12075   CGF.EmitRuntimeCall(RTLFn, Args);
12076 }
12077 
emitCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee Callee,ArrayRef<llvm::Value * > Args) const12078 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12079                                llvm::FunctionCallee Callee,
12080                                ArrayRef<llvm::Value *> Args) const {
12081   assert(Loc.isValid() && "Outlined function call location must be valid.");
12082   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12083 
12084   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12085     if (Fn->doesNotThrow()) {
12086       CGF.EmitNounwindRuntimeCall(Fn, Args);
12087       return;
12088     }
12089   }
12090   CGF.EmitRuntimeCall(Callee, Args);
12091 }
12092 
emitOutlinedFunctionCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee OutlinedFn,ArrayRef<llvm::Value * > Args) const12093 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12094     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12095     ArrayRef<llvm::Value *> Args) const {
12096   emitCall(CGF, Loc, OutlinedFn, Args);
12097 }
12098 
emitFunctionProlog(CodeGenFunction & CGF,const Decl * D)12099 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12100   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12101     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12102       HasEmittedDeclareTargetRegion = true;
12103 }
12104 
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const12105 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12106                                              const VarDecl *NativeParam,
12107                                              const VarDecl *TargetParam) const {
12108   return CGF.GetAddrOfLocalVar(NativeParam);
12109 }
12110 
12111 /// Return allocator value from expression, or return a null allocator (default
12112 /// when no allocator specified).
getAllocatorVal(CodeGenFunction & CGF,const Expr * Allocator)12113 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12114                                     const Expr *Allocator) {
12115   llvm::Value *AllocVal;
12116   if (Allocator) {
12117     AllocVal = CGF.EmitScalarExpr(Allocator);
12118     // According to the standard, the original allocator type is a enum
12119     // (integer). Convert to pointer type, if required.
12120     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12121                                         CGF.getContext().VoidPtrTy,
12122                                         Allocator->getExprLoc());
12123   } else {
12124     // If no allocator specified, it defaults to the null allocator.
12125     AllocVal = llvm::Constant::getNullValue(
12126         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12127   }
12128   return AllocVal;
12129 }
12130 
12131 /// Return the alignment from an allocate directive if present.
getAlignmentValue(CodeGenModule & CGM,const VarDecl * VD)12132 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12133   llvm::Optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12134 
12135   if (!AllocateAlignment)
12136     return nullptr;
12137 
12138   return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
12139 }
12140 
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)12141 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12142                                                    const VarDecl *VD) {
12143   if (!VD)
12144     return Address::invalid();
12145   Address UntiedAddr = Address::invalid();
12146   Address UntiedRealAddr = Address::invalid();
12147   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12148   if (It != FunctionToUntiedTaskStackMap.end()) {
12149     const UntiedLocalVarsAddressesMap &UntiedData =
12150         UntiedLocalVarsStack[It->second];
12151     auto I = UntiedData.find(VD);
12152     if (I != UntiedData.end()) {
12153       UntiedAddr = I->second.first;
12154       UntiedRealAddr = I->second.second;
12155     }
12156   }
12157   const VarDecl *CVD = VD->getCanonicalDecl();
12158   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12159     // Use the default allocation.
12160     if (!isAllocatableDecl(VD))
12161       return UntiedAddr;
12162     llvm::Value *Size;
12163     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12164     if (CVD->getType()->isVariablyModifiedType()) {
12165       Size = CGF.getTypeSize(CVD->getType());
12166       // Align the size: ((size + align - 1) / align) * align
12167       Size = CGF.Builder.CreateNUWAdd(
12168           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12169       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12170       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12171     } else {
12172       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12173       Size = CGM.getSize(Sz.alignTo(Align));
12174     }
12175     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12176     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12177     const Expr *Allocator = AA->getAllocator();
12178     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12179     llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
12180     SmallVector<llvm::Value *, 4> Args;
12181     Args.push_back(ThreadID);
12182     if (Alignment)
12183       Args.push_back(Alignment);
12184     Args.push_back(Size);
12185     Args.push_back(AllocVal);
12186     llvm::omp::RuntimeFunction FnID =
12187         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12188     llvm::Value *Addr = CGF.EmitRuntimeCall(
12189         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12190         getName({CVD->getName(), ".void.addr"}));
12191     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12192         CGM.getModule(), OMPRTL___kmpc_free);
12193     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12194     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12195         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12196     if (UntiedAddr.isValid())
12197       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12198 
12199     // Cleanup action for allocate support.
12200     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12201       llvm::FunctionCallee RTLFn;
12202       SourceLocation::UIntTy LocEncoding;
12203       Address Addr;
12204       const Expr *AllocExpr;
12205 
12206     public:
12207       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12208                            SourceLocation::UIntTy LocEncoding, Address Addr,
12209                            const Expr *AllocExpr)
12210           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12211             AllocExpr(AllocExpr) {}
12212       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12213         if (!CGF.HaveInsertPoint())
12214           return;
12215         llvm::Value *Args[3];
12216         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12217             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12218         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12219             Addr.getPointer(), CGF.VoidPtrTy);
12220         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12221         Args[2] = AllocVal;
12222         CGF.EmitRuntimeCall(RTLFn, Args);
12223       }
12224     };
12225     Address VDAddr =
12226         UntiedRealAddr.isValid()
12227             ? UntiedRealAddr
12228             : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12229     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12230         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12231         VDAddr, Allocator);
12232     if (UntiedRealAddr.isValid())
12233       if (auto *Region =
12234               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12235         Region->emitUntiedSwitch(CGF);
12236     return VDAddr;
12237   }
12238   return UntiedAddr;
12239 }
12240 
isLocalVarInUntiedTask(CodeGenFunction & CGF,const VarDecl * VD) const12241 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12242                                              const VarDecl *VD) const {
12243   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12244   if (It == FunctionToUntiedTaskStackMap.end())
12245     return false;
12246   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12247 }
12248 
NontemporalDeclsRAII(CodeGenModule & CGM,const OMPLoopDirective & S)12249 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12250     CodeGenModule &CGM, const OMPLoopDirective &S)
12251     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12252   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12253   if (!NeedToPush)
12254     return;
12255   NontemporalDeclsSet &DS =
12256       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12257   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12258     for (const Stmt *Ref : C->private_refs()) {
12259       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12260       const ValueDecl *VD;
12261       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12262         VD = DRE->getDecl();
12263       } else {
12264         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12265         assert((ME->isImplicitCXXThis() ||
12266                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12267                "Expected member of current class.");
12268         VD = ME->getMemberDecl();
12269       }
12270       DS.insert(VD);
12271     }
12272   }
12273 }
12274 
~NontemporalDeclsRAII()12275 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12276   if (!NeedToPush)
12277     return;
12278   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12279 }
12280 
UntiedTaskLocalDeclsRAII(CodeGenFunction & CGF,const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,std::pair<Address,Address>> & LocalVars)12281 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12282     CodeGenFunction &CGF,
12283     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12284                           std::pair<Address, Address>> &LocalVars)
12285     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12286   if (!NeedToPush)
12287     return;
12288   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12289       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12290   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12291 }
12292 
~UntiedTaskLocalDeclsRAII()12293 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12294   if (!NeedToPush)
12295     return;
12296   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12297 }
12298 
isNontemporalDecl(const ValueDecl * VD) const12299 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12300   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12301 
12302   return llvm::any_of(
12303       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12304       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12305 }
12306 
tryToDisableInnerAnalysis(const OMPExecutableDirective & S,llvm::DenseSet<CanonicalDeclPtr<const Decl>> & NeedToAddForLPCsAsDisabled) const12307 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12308     const OMPExecutableDirective &S,
12309     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12310     const {
12311   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12312   // Vars in target/task regions must be excluded completely.
12313   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12314       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12315     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12316     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12317     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12318     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12319       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12320         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12321     }
12322   }
12323   // Exclude vars in private clauses.
12324   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12325     for (const Expr *Ref : C->varlists()) {
12326       if (!Ref->getType()->isScalarType())
12327         continue;
12328       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12329       if (!DRE)
12330         continue;
12331       NeedToCheckForLPCs.insert(DRE->getDecl());
12332     }
12333   }
12334   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12335     for (const Expr *Ref : C->varlists()) {
12336       if (!Ref->getType()->isScalarType())
12337         continue;
12338       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12339       if (!DRE)
12340         continue;
12341       NeedToCheckForLPCs.insert(DRE->getDecl());
12342     }
12343   }
12344   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12345     for (const Expr *Ref : C->varlists()) {
12346       if (!Ref->getType()->isScalarType())
12347         continue;
12348       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12349       if (!DRE)
12350         continue;
12351       NeedToCheckForLPCs.insert(DRE->getDecl());
12352     }
12353   }
12354   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12355     for (const Expr *Ref : C->varlists()) {
12356       if (!Ref->getType()->isScalarType())
12357         continue;
12358       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12359       if (!DRE)
12360         continue;
12361       NeedToCheckForLPCs.insert(DRE->getDecl());
12362     }
12363   }
12364   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12365     for (const Expr *Ref : C->varlists()) {
12366       if (!Ref->getType()->isScalarType())
12367         continue;
12368       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12369       if (!DRE)
12370         continue;
12371       NeedToCheckForLPCs.insert(DRE->getDecl());
12372     }
12373   }
12374   for (const Decl *VD : NeedToCheckForLPCs) {
12375     for (const LastprivateConditionalData &Data :
12376          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12377       if (Data.DeclToUniqueName.count(VD) > 0) {
12378         if (!Data.Disabled)
12379           NeedToAddForLPCsAsDisabled.insert(VD);
12380         break;
12381       }
12382     }
12383   }
12384 }
12385 
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S,LValue IVLVal)12386 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12387     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12388     : CGM(CGF.CGM),
12389       Action((CGM.getLangOpts().OpenMP >= 50 &&
12390               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12391                            [](const OMPLastprivateClause *C) {
12392                              return C->getKind() ==
12393                                     OMPC_LASTPRIVATE_conditional;
12394                            }))
12395                  ? ActionToDo::PushAsLastprivateConditional
12396                  : ActionToDo::DoNotPush) {
12397   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12398   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12399     return;
12400   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12401          "Expected a push action.");
12402   LastprivateConditionalData &Data =
12403       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12404   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12405     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12406       continue;
12407 
12408     for (const Expr *Ref : C->varlists()) {
12409       Data.DeclToUniqueName.insert(std::make_pair(
12410           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12411           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12412     }
12413   }
12414   Data.IVLVal = IVLVal;
12415   Data.Fn = CGF.CurFn;
12416 }
12417 
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S)12418 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12419     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12420     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12421   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12422   if (CGM.getLangOpts().OpenMP < 50)
12423     return;
12424   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12425   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12426   if (!NeedToAddForLPCsAsDisabled.empty()) {
12427     Action = ActionToDo::DisableLastprivateConditional;
12428     LastprivateConditionalData &Data =
12429         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12430     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12431       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12432     Data.Fn = CGF.CurFn;
12433     Data.Disabled = true;
12434   }
12435 }
12436 
12437 CGOpenMPRuntime::LastprivateConditionalRAII
disable(CodeGenFunction & CGF,const OMPExecutableDirective & S)12438 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12439     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12440   return LastprivateConditionalRAII(CGF, S);
12441 }
12442 
~LastprivateConditionalRAII()12443 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12444   if (CGM.getLangOpts().OpenMP < 50)
12445     return;
12446   if (Action == ActionToDo::DisableLastprivateConditional) {
12447     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12448            "Expected list of disabled private vars.");
12449     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12450   }
12451   if (Action == ActionToDo::PushAsLastprivateConditional) {
12452     assert(
12453         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12454         "Expected list of lastprivate conditional vars.");
12455     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12456   }
12457 }
12458 
emitLastprivateConditionalInit(CodeGenFunction & CGF,const VarDecl * VD)12459 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12460                                                         const VarDecl *VD) {
12461   ASTContext &C = CGM.getContext();
12462   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12463   if (I == LastprivateConditionalToTypes.end())
12464     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12465   QualType NewType;
12466   const FieldDecl *VDField;
12467   const FieldDecl *FiredField;
12468   LValue BaseLVal;
12469   auto VI = I->getSecond().find(VD);
12470   if (VI == I->getSecond().end()) {
12471     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12472     RD->startDefinition();
12473     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12474     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12475     RD->completeDefinition();
12476     NewType = C.getRecordType(RD);
12477     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12478     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12479     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12480   } else {
12481     NewType = std::get<0>(VI->getSecond());
12482     VDField = std::get<1>(VI->getSecond());
12483     FiredField = std::get<2>(VI->getSecond());
12484     BaseLVal = std::get<3>(VI->getSecond());
12485   }
12486   LValue FiredLVal =
12487       CGF.EmitLValueForField(BaseLVal, FiredField);
12488   CGF.EmitStoreOfScalar(
12489       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12490       FiredLVal);
12491   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12492 }
12493 
12494 namespace {
12495 /// Checks if the lastprivate conditional variable is referenced in LHS.
12496 class LastprivateConditionalRefChecker final
12497     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12498   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12499   const Expr *FoundE = nullptr;
12500   const Decl *FoundD = nullptr;
12501   StringRef UniqueDeclName;
12502   LValue IVLVal;
12503   llvm::Function *FoundFn = nullptr;
12504   SourceLocation Loc;
12505 
12506 public:
VisitDeclRefExpr(const DeclRefExpr * E)12507   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12508     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12509          llvm::reverse(LPM)) {
12510       auto It = D.DeclToUniqueName.find(E->getDecl());
12511       if (It == D.DeclToUniqueName.end())
12512         continue;
12513       if (D.Disabled)
12514         return false;
12515       FoundE = E;
12516       FoundD = E->getDecl()->getCanonicalDecl();
12517       UniqueDeclName = It->second;
12518       IVLVal = D.IVLVal;
12519       FoundFn = D.Fn;
12520       break;
12521     }
12522     return FoundE == E;
12523   }
VisitMemberExpr(const MemberExpr * E)12524   bool VisitMemberExpr(const MemberExpr *E) {
12525     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12526       return false;
12527     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12528          llvm::reverse(LPM)) {
12529       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12530       if (It == D.DeclToUniqueName.end())
12531         continue;
12532       if (D.Disabled)
12533         return false;
12534       FoundE = E;
12535       FoundD = E->getMemberDecl()->getCanonicalDecl();
12536       UniqueDeclName = It->second;
12537       IVLVal = D.IVLVal;
12538       FoundFn = D.Fn;
12539       break;
12540     }
12541     return FoundE == E;
12542   }
VisitStmt(const Stmt * S)12543   bool VisitStmt(const Stmt *S) {
12544     for (const Stmt *Child : S->children()) {
12545       if (!Child)
12546         continue;
12547       if (const auto *E = dyn_cast<Expr>(Child))
12548         if (!E->isGLValue())
12549           continue;
12550       if (Visit(Child))
12551         return true;
12552     }
12553     return false;
12554   }
LastprivateConditionalRefChecker(ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)12555   explicit LastprivateConditionalRefChecker(
12556       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12557       : LPM(LPM) {}
12558   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
getFoundData() const12559   getFoundData() const {
12560     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12561   }
12562 };
12563 } // namespace
12564 
emitLastprivateConditionalUpdate(CodeGenFunction & CGF,LValue IVLVal,StringRef UniqueDeclName,LValue LVal,SourceLocation Loc)12565 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12566                                                        LValue IVLVal,
12567                                                        StringRef UniqueDeclName,
12568                                                        LValue LVal,
12569                                                        SourceLocation Loc) {
12570   // Last updated loop counter for the lastprivate conditional var.
12571   // int<xx> last_iv = 0;
12572   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12573   llvm::Constant *LastIV =
12574       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12575   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12576       IVLVal.getAlignment().getAsAlign());
12577   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12578 
12579   // Last value of the lastprivate conditional.
12580   // decltype(priv_a) last_a;
12581   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12582       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12583   Last->setAlignment(LVal.getAlignment().getAsAlign());
12584   LValue LastLVal = CGF.MakeAddrLValue(
12585       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12586 
12587   // Global loop counter. Required to handle inner parallel-for regions.
12588   // iv
12589   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12590 
12591   // #pragma omp critical(a)
12592   // if (last_iv <= iv) {
12593   //   last_iv = iv;
12594   //   last_a = priv_a;
12595   // }
12596   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12597                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12598     Action.Enter(CGF);
12599     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12600     // (last_iv <= iv) ? Check if the variable is updated and store new
12601     // value in global var.
12602     llvm::Value *CmpRes;
12603     if (IVLVal.getType()->isSignedIntegerType()) {
12604       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12605     } else {
12606       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12607              "Loop iteration variable must be integer.");
12608       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12609     }
12610     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12611     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12612     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12613     // {
12614     CGF.EmitBlock(ThenBB);
12615 
12616     //   last_iv = iv;
12617     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12618 
12619     //   last_a = priv_a;
12620     switch (CGF.getEvaluationKind(LVal.getType())) {
12621     case TEK_Scalar: {
12622       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12623       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12624       break;
12625     }
12626     case TEK_Complex: {
12627       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12628       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12629       break;
12630     }
12631     case TEK_Aggregate:
12632       llvm_unreachable(
12633           "Aggregates are not supported in lastprivate conditional.");
12634     }
12635     // }
12636     CGF.EmitBranch(ExitBB);
12637     // There is no need to emit line number for unconditional branch.
12638     (void)ApplyDebugLocation::CreateEmpty(CGF);
12639     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12640   };
12641 
12642   if (CGM.getLangOpts().OpenMPSimd) {
12643     // Do not emit as a critical region as no parallel region could be emitted.
12644     RegionCodeGenTy ThenRCG(CodeGen);
12645     ThenRCG(CGF);
12646   } else {
12647     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12648   }
12649 }
12650 
checkAndEmitLastprivateConditional(CodeGenFunction & CGF,const Expr * LHS)12651 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12652                                                          const Expr *LHS) {
12653   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12654     return;
12655   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12656   if (!Checker.Visit(LHS))
12657     return;
12658   const Expr *FoundE;
12659   const Decl *FoundD;
12660   StringRef UniqueDeclName;
12661   LValue IVLVal;
12662   llvm::Function *FoundFn;
12663   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12664       Checker.getFoundData();
12665   if (FoundFn != CGF.CurFn) {
12666     // Special codegen for inner parallel regions.
12667     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12668     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12669     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12670            "Lastprivate conditional is not found in outer region.");
12671     QualType StructTy = std::get<0>(It->getSecond());
12672     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12673     LValue PrivLVal = CGF.EmitLValue(FoundE);
12674     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12675         PrivLVal.getAddress(CGF),
12676         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12677         CGF.ConvertTypeForMem(StructTy));
12678     LValue BaseLVal =
12679         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12680     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12681     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12682                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12683                         FiredLVal, llvm::AtomicOrdering::Unordered,
12684                         /*IsVolatile=*/true, /*isInit=*/false);
12685     return;
12686   }
12687 
12688   // Private address of the lastprivate conditional in the current context.
12689   // priv_a
12690   LValue LVal = CGF.EmitLValue(FoundE);
12691   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12692                                    FoundE->getExprLoc());
12693 }
12694 
checkAndEmitSharedLastprivateConditional(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> & IgnoredDecls)12695 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12696     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12697     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12698   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12699     return;
12700   auto Range = llvm::reverse(LastprivateConditionalStack);
12701   auto It = llvm::find_if(
12702       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12703   if (It == Range.end() || It->Fn != CGF.CurFn)
12704     return;
12705   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12706   assert(LPCI != LastprivateConditionalToTypes.end() &&
12707          "Lastprivates must be registered already.");
12708   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12709   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12710   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12711   for (const auto &Pair : It->DeclToUniqueName) {
12712     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12713     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12714       continue;
12715     auto I = LPCI->getSecond().find(Pair.first);
12716     assert(I != LPCI->getSecond().end() &&
12717            "Lastprivate must be rehistered already.");
12718     // bool Cmp = priv_a.Fired != 0;
12719     LValue BaseLVal = std::get<3>(I->getSecond());
12720     LValue FiredLVal =
12721         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12722     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12723     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12724     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12725     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12726     // if (Cmp) {
12727     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12728     CGF.EmitBlock(ThenBB);
12729     Address Addr = CGF.GetAddrOfLocalVar(VD);
12730     LValue LVal;
12731     if (VD->getType()->isReferenceType())
12732       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12733                                            AlignmentSource::Decl);
12734     else
12735       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12736                                 AlignmentSource::Decl);
12737     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12738                                      D.getBeginLoc());
12739     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12740     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12741     // }
12742   }
12743 }
12744 
emitLastprivateConditionalFinalUpdate(CodeGenFunction & CGF,LValue PrivLVal,const VarDecl * VD,SourceLocation Loc)12745 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12746     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12747     SourceLocation Loc) {
12748   if (CGF.getLangOpts().OpenMP < 50)
12749     return;
12750   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12751   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12752          "Unknown lastprivate conditional variable.");
12753   StringRef UniqueName = It->second;
12754   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12755   // The variable was not updated in the region - exit.
12756   if (!GV)
12757     return;
12758   LValue LPLVal = CGF.MakeAddrLValue(
12759       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12760       PrivLVal.getType().getNonReferenceType());
12761   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12762   CGF.EmitStoreOfScalar(Res, PrivLVal);
12763 }
12764 
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)12765 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12766     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12767     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12768   llvm_unreachable("Not supported in SIMD-only mode");
12769 }
12770 
emitTeamsOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)12771 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12772     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12773     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12774   llvm_unreachable("Not supported in SIMD-only mode");
12775 }
12776 
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)12777 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12778     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12779     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12780     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12781     bool Tied, unsigned &NumberOfParts) {
12782   llvm_unreachable("Not supported in SIMD-only mode");
12783 }
12784 
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond,llvm::Value * NumThreads)12785 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12786                                            SourceLocation Loc,
12787                                            llvm::Function *OutlinedFn,
12788                                            ArrayRef<llvm::Value *> CapturedVars,
12789                                            const Expr *IfCond,
12790                                            llvm::Value *NumThreads) {
12791   llvm_unreachable("Not supported in SIMD-only mode");
12792 }
12793 
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)12794 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12795     CodeGenFunction &CGF, StringRef CriticalName,
12796     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12797     const Expr *Hint) {
12798   llvm_unreachable("Not supported in SIMD-only mode");
12799 }
12800 
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)12801 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12802                                            const RegionCodeGenTy &MasterOpGen,
12803                                            SourceLocation Loc) {
12804   llvm_unreachable("Not supported in SIMD-only mode");
12805 }
12806 
emitMaskedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc,const Expr * Filter)12807 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12808                                            const RegionCodeGenTy &MasterOpGen,
12809                                            SourceLocation Loc,
12810                                            const Expr *Filter) {
12811   llvm_unreachable("Not supported in SIMD-only mode");
12812 }
12813 
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)12814 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12815                                             SourceLocation Loc) {
12816   llvm_unreachable("Not supported in SIMD-only mode");
12817 }
12818 
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)12819 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12820     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12821     SourceLocation Loc) {
12822   llvm_unreachable("Not supported in SIMD-only mode");
12823 }
12824 
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps)12825 void CGOpenMPSIMDRuntime::emitSingleRegion(
12826     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12827     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12828     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12829     ArrayRef<const Expr *> AssignmentOps) {
12830   llvm_unreachable("Not supported in SIMD-only mode");
12831 }
12832 
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)12833 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12834                                             const RegionCodeGenTy &OrderedOpGen,
12835                                             SourceLocation Loc,
12836                                             bool IsThreads) {
12837   llvm_unreachable("Not supported in SIMD-only mode");
12838 }
12839 
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)12840 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12841                                           SourceLocation Loc,
12842                                           OpenMPDirectiveKind Kind,
12843                                           bool EmitChecks,
12844                                           bool ForceSimpleCall) {
12845   llvm_unreachable("Not supported in SIMD-only mode");
12846 }
12847 
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)12848 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12849     CodeGenFunction &CGF, SourceLocation Loc,
12850     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12851     bool Ordered, const DispatchRTInput &DispatchValues) {
12852   llvm_unreachable("Not supported in SIMD-only mode");
12853 }
12854 
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)12855 void CGOpenMPSIMDRuntime::emitForStaticInit(
12856     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12857     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12858   llvm_unreachable("Not supported in SIMD-only mode");
12859 }
12860 
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const StaticRTInput & Values)12861 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12862     CodeGenFunction &CGF, SourceLocation Loc,
12863     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12864   llvm_unreachable("Not supported in SIMD-only mode");
12865 }
12866 
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)12867 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12868                                                      SourceLocation Loc,
12869                                                      unsigned IVSize,
12870                                                      bool IVSigned) {
12871   llvm_unreachable("Not supported in SIMD-only mode");
12872 }
12873 
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)12874 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12875                                               SourceLocation Loc,
12876                                               OpenMPDirectiveKind DKind) {
12877   llvm_unreachable("Not supported in SIMD-only mode");
12878 }
12879 
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)12880 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12881                                               SourceLocation Loc,
12882                                               unsigned IVSize, bool IVSigned,
12883                                               Address IL, Address LB,
12884                                               Address UB, Address ST) {
12885   llvm_unreachable("Not supported in SIMD-only mode");
12886 }
12887 
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)12888 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12889                                                llvm::Value *NumThreads,
12890                                                SourceLocation Loc) {
12891   llvm_unreachable("Not supported in SIMD-only mode");
12892 }
12893 
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)12894 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12895                                              ProcBindKind ProcBind,
12896                                              SourceLocation Loc) {
12897   llvm_unreachable("Not supported in SIMD-only mode");
12898 }
12899 
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)12900 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12901                                                     const VarDecl *VD,
12902                                                     Address VDAddr,
12903                                                     SourceLocation Loc) {
12904   llvm_unreachable("Not supported in SIMD-only mode");
12905 }
12906 
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)12907 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12908     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12909     CodeGenFunction *CGF) {
12910   llvm_unreachable("Not supported in SIMD-only mode");
12911 }
12912 
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)12913 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12914     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12915   llvm_unreachable("Not supported in SIMD-only mode");
12916 }
12917 
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * > Vars,SourceLocation Loc,llvm::AtomicOrdering AO)12918 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12919                                     ArrayRef<const Expr *> Vars,
12920                                     SourceLocation Loc,
12921                                     llvm::AtomicOrdering AO) {
12922   llvm_unreachable("Not supported in SIMD-only mode");
12923 }
12924 
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)12925 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12926                                        const OMPExecutableDirective &D,
12927                                        llvm::Function *TaskFunction,
12928                                        QualType SharedsTy, Address Shareds,
12929                                        const Expr *IfCond,
12930                                        const OMPTaskDataTy &Data) {
12931   llvm_unreachable("Not supported in SIMD-only mode");
12932 }
12933 
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)12934 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12935     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12936     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12937     const Expr *IfCond, const OMPTaskDataTy &Data) {
12938   llvm_unreachable("Not supported in SIMD-only mode");
12939 }
12940 
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)12941 void CGOpenMPSIMDRuntime::emitReduction(
12942     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12943     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12944     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12945   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12946   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12947                                  ReductionOps, Options);
12948 }
12949 
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)12950 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12951     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12952     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12953   llvm_unreachable("Not supported in SIMD-only mode");
12954 }
12955 
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)12956 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12957                                                 SourceLocation Loc,
12958                                                 bool IsWorksharingReduction) {
12959   llvm_unreachable("Not supported in SIMD-only mode");
12960 }
12961 
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)12962 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12963                                                   SourceLocation Loc,
12964                                                   ReductionCodeGen &RCG,
12965                                                   unsigned N) {
12966   llvm_unreachable("Not supported in SIMD-only mode");
12967 }
12968 
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)12969 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12970                                                   SourceLocation Loc,
12971                                                   llvm::Value *ReductionsPtr,
12972                                                   LValue SharedLVal) {
12973   llvm_unreachable("Not supported in SIMD-only mode");
12974 }
12975 
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPTaskDataTy & Data)12976 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12977                                            SourceLocation Loc,
12978                                            const OMPTaskDataTy &Data) {
12979   llvm_unreachable("Not supported in SIMD-only mode");
12980 }
12981 
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)12982 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12983     CodeGenFunction &CGF, SourceLocation Loc,
12984     OpenMPDirectiveKind CancelRegion) {
12985   llvm_unreachable("Not supported in SIMD-only mode");
12986 }
12987 
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)12988 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12989                                          SourceLocation Loc, const Expr *IfCond,
12990                                          OpenMPDirectiveKind CancelRegion) {
12991   llvm_unreachable("Not supported in SIMD-only mode");
12992 }
12993 
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)12994 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12995     const OMPExecutableDirective &D, StringRef ParentName,
12996     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12997     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12998   llvm_unreachable("Not supported in SIMD-only mode");
12999 }
13000 
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)13001 void CGOpenMPSIMDRuntime::emitTargetCall(
13002     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13003     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13004     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13005     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13006                                      const OMPLoopDirective &D)>
13007         SizeEmitter) {
13008   llvm_unreachable("Not supported in SIMD-only mode");
13009 }
13010 
emitTargetFunctions(GlobalDecl GD)13011 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13012   llvm_unreachable("Not supported in SIMD-only mode");
13013 }
13014 
emitTargetGlobalVariable(GlobalDecl GD)13015 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13016   llvm_unreachable("Not supported in SIMD-only mode");
13017 }
13018 
emitTargetGlobal(GlobalDecl GD)13019 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13020   return false;
13021 }
13022 
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)13023 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13024                                         const OMPExecutableDirective &D,
13025                                         SourceLocation Loc,
13026                                         llvm::Function *OutlinedFn,
13027                                         ArrayRef<llvm::Value *> CapturedVars) {
13028   llvm_unreachable("Not supported in SIMD-only mode");
13029 }
13030 
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)13031 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13032                                              const Expr *NumTeams,
13033                                              const Expr *ThreadLimit,
13034                                              SourceLocation Loc) {
13035   llvm_unreachable("Not supported in SIMD-only mode");
13036 }
13037 
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,TargetDataInfo & Info)13038 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13039     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13040     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13041   llvm_unreachable("Not supported in SIMD-only mode");
13042 }
13043 
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)13044 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13045     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13046     const Expr *Device) {
13047   llvm_unreachable("Not supported in SIMD-only mode");
13048 }
13049 
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)13050 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13051                                            const OMPLoopDirective &D,
13052                                            ArrayRef<Expr *> NumIterations) {
13053   llvm_unreachable("Not supported in SIMD-only mode");
13054 }
13055 
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)13056 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13057                                               const OMPDependClause *C) {
13058   llvm_unreachable("Not supported in SIMD-only mode");
13059 }
13060 
13061 const VarDecl *
translateParameter(const FieldDecl * FD,const VarDecl * NativeParam) const13062 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13063                                         const VarDecl *NativeParam) const {
13064   llvm_unreachable("Not supported in SIMD-only mode");
13065 }
13066 
13067 Address
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const13068 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13069                                          const VarDecl *NativeParam,
13070                                          const VarDecl *TargetParam) const {
13071   llvm_unreachable("Not supported in SIMD-only mode");
13072 }
13073