1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Decl.h"
19 #include "clang/AST/StmtOpenMP.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/Bitcode/ReaderWriter.h"
22 #include "llvm/IR/CallSite.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/GlobalValue.h"
25 #include "llvm/IR/Value.h"
26 #include "llvm/Support/Format.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cassert>
29 
30 using namespace clang;
31 using namespace CodeGen;
32 
33 namespace {
34 /// \brief Base class for handling code generation inside OpenMP regions.
35 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
36 public:
37   /// \brief Kinds of OpenMP regions used in codegen.
38   enum CGOpenMPRegionKind {
39     /// \brief Region with outlined function for standalone 'parallel'
40     /// directive.
41     ParallelOutlinedRegion,
42     /// \brief Region with outlined function for standalone 'task' directive.
43     TaskOutlinedRegion,
44     /// \brief Region for constructs that do not require function outlining,
45     /// like 'for', 'sections', 'atomic' etc. directives.
46     InlinedRegion,
47     /// \brief Region with outlined function for standalone 'target' directive.
48     TargetRegion,
49   };
50 
51   CGOpenMPRegionInfo(const CapturedStmt &CS,
52                      const CGOpenMPRegionKind RegionKind,
53                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
54                      bool HasCancel)
55       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
56         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
57 
58   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
59                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
60                      bool HasCancel)
61       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
62         Kind(Kind), HasCancel(HasCancel) {}
63 
64   /// \brief Get a variable or parameter for storing global thread id
65   /// inside OpenMP construct.
66   virtual const VarDecl *getThreadIDVariable() const = 0;
67 
68   /// \brief Emit the captured statement body.
69   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
70 
71   /// \brief Get an LValue for the current ThreadID variable.
72   /// \return LValue for thread id variable. This LValue always has type int32*.
73   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
74 
75   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
76 
77   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
78 
79   bool hasCancel() const { return HasCancel; }
80 
81   static bool classof(const CGCapturedStmtInfo *Info) {
82     return Info->getKind() == CR_OpenMP;
83   }
84 
85 protected:
86   CGOpenMPRegionKind RegionKind;
87   RegionCodeGenTy CodeGen;
88   OpenMPDirectiveKind Kind;
89   bool HasCancel;
90 };
91 
92 /// \brief API for captured statement code generation in OpenMP constructs.
93 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
94 public:
95   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
96                              const RegionCodeGenTy &CodeGen,
97                              OpenMPDirectiveKind Kind, bool HasCancel)
98       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
99                            HasCancel),
100         ThreadIDVar(ThreadIDVar) {
101     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
102   }
103   /// \brief Get a variable or parameter for storing global thread id
104   /// inside OpenMP construct.
105   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
106 
107   /// \brief Get the name of the capture helper.
108   StringRef getHelperName() const override { return ".omp_outlined."; }
109 
110   static bool classof(const CGCapturedStmtInfo *Info) {
111     return CGOpenMPRegionInfo::classof(Info) &&
112            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
113                ParallelOutlinedRegion;
114   }
115 
116 private:
117   /// \brief A variable or parameter storing global thread id for OpenMP
118   /// constructs.
119   const VarDecl *ThreadIDVar;
120 };
121 
122 /// \brief API for captured statement code generation in OpenMP constructs.
123 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
124 public:
125   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
126                                  const VarDecl *ThreadIDVar,
127                                  const RegionCodeGenTy &CodeGen,
128                                  OpenMPDirectiveKind Kind, bool HasCancel)
129       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
130         ThreadIDVar(ThreadIDVar) {
131     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
132   }
133   /// \brief Get a variable or parameter for storing global thread id
134   /// inside OpenMP construct.
135   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
136 
137   /// \brief Get an LValue for the current ThreadID variable.
138   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
139 
140   /// \brief Get the name of the capture helper.
141   StringRef getHelperName() const override { return ".omp_outlined."; }
142 
143   static bool classof(const CGCapturedStmtInfo *Info) {
144     return CGOpenMPRegionInfo::classof(Info) &&
145            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
146                TaskOutlinedRegion;
147   }
148 
149 private:
150   /// \brief A variable or parameter storing global thread id for OpenMP
151   /// constructs.
152   const VarDecl *ThreadIDVar;
153 };
154 
155 /// \brief API for inlined captured statement code generation in OpenMP
156 /// constructs.
157 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
158 public:
159   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
160                             const RegionCodeGenTy &CodeGen,
161                             OpenMPDirectiveKind Kind, bool HasCancel)
162       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
163         OldCSI(OldCSI),
164         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
165   // \brief Retrieve the value of the context parameter.
166   llvm::Value *getContextValue() const override {
167     if (OuterRegionInfo)
168       return OuterRegionInfo->getContextValue();
169     llvm_unreachable("No context value for inlined OpenMP region");
170   }
171   void setContextValue(llvm::Value *V) override {
172     if (OuterRegionInfo) {
173       OuterRegionInfo->setContextValue(V);
174       return;
175     }
176     llvm_unreachable("No context value for inlined OpenMP region");
177   }
178   /// \brief Lookup the captured field decl for a variable.
179   const FieldDecl *lookup(const VarDecl *VD) const override {
180     if (OuterRegionInfo)
181       return OuterRegionInfo->lookup(VD);
182     // If there is no outer outlined region,no need to lookup in a list of
183     // captured variables, we can use the original one.
184     return nullptr;
185   }
186   FieldDecl *getThisFieldDecl() const override {
187     if (OuterRegionInfo)
188       return OuterRegionInfo->getThisFieldDecl();
189     return nullptr;
190   }
191   /// \brief Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override {
194     if (OuterRegionInfo)
195       return OuterRegionInfo->getThreadIDVariable();
196     return nullptr;
197   }
198 
199   /// \brief Get the name of the capture helper.
200   StringRef getHelperName() const override {
201     if (auto *OuterRegionInfo = getOldCSI())
202       return OuterRegionInfo->getHelperName();
203     llvm_unreachable("No helper name for inlined OpenMP construct");
204   }
205 
206   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
207 
208   static bool classof(const CGCapturedStmtInfo *Info) {
209     return CGOpenMPRegionInfo::classof(Info) &&
210            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
211   }
212 
213 private:
214   /// \brief CodeGen info about outer OpenMP region.
215   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
216   CGOpenMPRegionInfo *OuterRegionInfo;
217 };
218 
219 /// \brief API for captured statement code generation in OpenMP target
220 /// constructs. For this captures, implicit parameters are used instead of the
221 /// captured fields. The name of the target region has to be unique in a given
222 /// application so it is provided by the client, because only the client has
223 /// the information to generate that.
224 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
225 public:
226   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
227                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
228       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
229                            /*HasCancel=*/false),
230         HelperName(HelperName) {}
231 
232   /// \brief This is unused for target regions because each starts executing
233   /// with a single thread.
234   const VarDecl *getThreadIDVariable() const override { return nullptr; }
235 
236   /// \brief Get the name of the capture helper.
237   StringRef getHelperName() const override { return HelperName; }
238 
239   static bool classof(const CGCapturedStmtInfo *Info) {
240     return CGOpenMPRegionInfo::classof(Info) &&
241            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
242   }
243 
244 private:
245   StringRef HelperName;
246 };
247 
248 /// \brief RAII for emitting code of OpenMP constructs.
249 class InlinedOpenMPRegionRAII {
250   CodeGenFunction &CGF;
251 
252 public:
253   /// \brief Constructs region for combined constructs.
254   /// \param CodeGen Code generation sequence for combined directives. Includes
255   /// a list of functions used for code generation of implicitly inlined
256   /// regions.
257   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
258                           OpenMPDirectiveKind Kind, bool HasCancel)
259       : CGF(CGF) {
260     // Start emission for the construct.
261     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
262         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
263   }
264   ~InlinedOpenMPRegionRAII() {
265     // Restore original CapturedStmtInfo only if we're done with code emission.
266     auto *OldCSI =
267         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
268     delete CGF.CapturedStmtInfo;
269     CGF.CapturedStmtInfo = OldCSI;
270   }
271 };
272 
273 } // anonymous namespace
274 
275 static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr,
276                                       QualType Ty) {
277   AlignmentSource Source;
278   CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source);
279   return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align),
280                             Ty->getPointeeType(), Source);
281 }
282 
283 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
284   return emitLoadOfPointerLValue(CGF,
285                                  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
286                                  getThreadIDVariable()->getType());
287 }
288 
289 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
290   if (!CGF.HaveInsertPoint())
291     return;
292   // 1.2.2 OpenMP Language Terminology
293   // Structured block - An executable statement with a single entry at the
294   // top and a single exit at the bottom.
295   // The point of exit cannot be a branch out of the structured block.
296   // longjmp() and throw() must not violate the entry/exit criteria.
297   CGF.EHStack.pushTerminate();
298   {
299     CodeGenFunction::RunCleanupsScope Scope(CGF);
300     CodeGen(CGF);
301   }
302   CGF.EHStack.popTerminate();
303 }
304 
305 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
306     CodeGenFunction &CGF) {
307   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
308                             getThreadIDVariable()->getType(),
309                             AlignmentSource::Decl);
310 }
311 
312 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
313     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr),
314       OffloadEntriesInfoManager(CGM) {
315   IdentTy = llvm::StructType::create(
316       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
317       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
318       CGM.Int8PtrTy /* psource */, nullptr);
319   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
320   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
321                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
322   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
323   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
324 
325   loadOffloadInfoMetadata();
326 }
327 
328 void CGOpenMPRuntime::clear() {
329   InternalVars.clear();
330 }
331 
332 // Layout information for ident_t.
333 static CharUnits getIdentAlign(CodeGenModule &CGM) {
334   return CGM.getPointerAlign();
335 }
336 static CharUnits getIdentSize(CodeGenModule &CGM) {
337   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
338   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
339 }
340 static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) {
341   // All the fields except the last are i32, so this works beautifully.
342   return unsigned(Field) * CharUnits::fromQuantity(4);
343 }
344 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
345                                    CGOpenMPRuntime::IdentFieldIndex Field,
346                                    const llvm::Twine &Name = "") {
347   auto Offset = getOffsetOfIdentField(Field);
348   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
349 }
350 
351 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
352     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
353     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
354   assert(ThreadIDVar->getType()->isPointerType() &&
355          "thread id variable must be of type kmp_int32 *");
356   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
357   CodeGenFunction CGF(CGM, true);
358   bool HasCancel = false;
359   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
360     HasCancel = OPD->hasCancel();
361   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
362     HasCancel = OPSD->hasCancel();
363   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
364     HasCancel = OPFD->hasCancel();
365   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
366                                     HasCancel);
367   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
368   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
369 }
370 
371 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
372     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
373     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
374   assert(!ThreadIDVar->getType()->isPointerType() &&
375          "thread id variable must be of type kmp_int32 for tasks");
376   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
377   CodeGenFunction CGF(CGM, true);
378   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
379                                         InnermostKind,
380                                         cast<OMPTaskDirective>(D).hasCancel());
381   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
382   return CGF.GenerateCapturedStmtFunction(*CS);
383 }
384 
385 Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
386   CharUnits Align = getIdentAlign(CGM);
387   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
388   if (!Entry) {
389     if (!DefaultOpenMPPSource) {
390       // Initialize default location for psource field of ident_t structure of
391       // all ident_t objects. Format is ";file;function;line;column;;".
392       // Taken from
393       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
394       DefaultOpenMPPSource =
395           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
396       DefaultOpenMPPSource =
397           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
398     }
399     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
400         CGM.getModule(), IdentTy, /*isConstant*/ true,
401         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
402     DefaultOpenMPLocation->setUnnamedAddr(true);
403     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
404 
405     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
406     llvm::Constant *Values[] = {Zero,
407                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
408                                 Zero, Zero, DefaultOpenMPPSource};
409     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
410     DefaultOpenMPLocation->setInitializer(Init);
411     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
412   }
413   return Address(Entry, Align);
414 }
415 
416 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
417                                                  SourceLocation Loc,
418                                                  OpenMPLocationFlags Flags) {
419   // If no debug info is generated - return global default location.
420   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
421       Loc.isInvalid())
422     return getOrCreateDefaultLocation(Flags).getPointer();
423 
424   assert(CGF.CurFn && "No function in current CodeGenFunction.");
425 
426   Address LocValue = Address::invalid();
427   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
428   if (I != OpenMPLocThreadIDMap.end())
429     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
430 
431   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
432   // GetOpenMPThreadID was called before this routine.
433   if (!LocValue.isValid()) {
434     // Generate "ident_t .kmpc_loc.addr;"
435     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
436                                       ".kmpc_loc.addr");
437     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
438     Elem.second.DebugLoc = AI.getPointer();
439     LocValue = AI;
440 
441     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
442     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
443     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
444                              CGM.getSize(getIdentSize(CGF.CGM)));
445   }
446 
447   // char **psource = &.kmpc_loc_<flags>.addr.psource;
448   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
449 
450   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
451   if (OMPDebugLoc == nullptr) {
452     SmallString<128> Buffer2;
453     llvm::raw_svector_ostream OS2(Buffer2);
454     // Build debug location
455     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
456     OS2 << ";" << PLoc.getFilename() << ";";
457     if (const FunctionDecl *FD =
458             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
459       OS2 << FD->getQualifiedNameAsString();
460     }
461     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
462     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
463     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
464   }
465   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
466   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
467 
468   // Our callers always pass this to a runtime function, so for
469   // convenience, go ahead and return a naked pointer.
470   return LocValue.getPointer();
471 }
472 
473 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
474                                           SourceLocation Loc) {
475   assert(CGF.CurFn && "No function in current CodeGenFunction.");
476 
477   llvm::Value *ThreadID = nullptr;
478   // Check whether we've already cached a load of the thread id in this
479   // function.
480   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
481   if (I != OpenMPLocThreadIDMap.end()) {
482     ThreadID = I->second.ThreadID;
483     if (ThreadID != nullptr)
484       return ThreadID;
485   }
486   if (auto *OMPRegionInfo =
487           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
488     if (OMPRegionInfo->getThreadIDVariable()) {
489       // Check if this an outlined function with thread id passed as argument.
490       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
491       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
492       // If value loaded in entry block, cache it and use it everywhere in
493       // function.
494       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
495         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
496         Elem.second.ThreadID = ThreadID;
497       }
498       return ThreadID;
499     }
500   }
501 
502   // This is not an outlined function region - need to call __kmpc_int32
503   // kmpc_global_thread_num(ident_t *loc).
504   // Generate thread id value and cache this value for use across the
505   // function.
506   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
507   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
508   ThreadID =
509       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
510                           emitUpdateLocation(CGF, Loc));
511   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
512   Elem.second.ThreadID = ThreadID;
513   return ThreadID;
514 }
515 
516 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
517   assert(CGF.CurFn && "No function in current CodeGenFunction.");
518   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
519     OpenMPLocThreadIDMap.erase(CGF.CurFn);
520 }
521 
522 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
523   return llvm::PointerType::getUnqual(IdentTy);
524 }
525 
526 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
527   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
528 }
529 
530 llvm::Constant *
531 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
532   llvm::Constant *RTLFn = nullptr;
533   switch (Function) {
534   case OMPRTL__kmpc_fork_call: {
535     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
536     // microtask, ...);
537     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
538                                 getKmpc_MicroPointerTy()};
539     llvm::FunctionType *FnTy =
540         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
541     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
542     break;
543   }
544   case OMPRTL__kmpc_global_thread_num: {
545     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
546     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
547     llvm::FunctionType *FnTy =
548         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
549     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
550     break;
551   }
552   case OMPRTL__kmpc_threadprivate_cached: {
553     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
554     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
555     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
556                                 CGM.VoidPtrTy, CGM.SizeTy,
557                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
558     llvm::FunctionType *FnTy =
559         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
560     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
561     break;
562   }
563   case OMPRTL__kmpc_critical: {
564     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
565     // kmp_critical_name *crit);
566     llvm::Type *TypeParams[] = {
567         getIdentTyPointerTy(), CGM.Int32Ty,
568         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
569     llvm::FunctionType *FnTy =
570         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
571     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
572     break;
573   }
574   case OMPRTL__kmpc_critical_with_hint: {
575     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
576     // kmp_critical_name *crit, uintptr_t hint);
577     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
578                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
579                                 CGM.IntPtrTy};
580     llvm::FunctionType *FnTy =
581         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
582     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
583     break;
584   }
585   case OMPRTL__kmpc_threadprivate_register: {
586     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
587     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
588     // typedef void *(*kmpc_ctor)(void *);
589     auto KmpcCtorTy =
590         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
591                                 /*isVarArg*/ false)->getPointerTo();
592     // typedef void *(*kmpc_cctor)(void *, void *);
593     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
594     auto KmpcCopyCtorTy =
595         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
596                                 /*isVarArg*/ false)->getPointerTo();
597     // typedef void (*kmpc_dtor)(void *);
598     auto KmpcDtorTy =
599         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
600             ->getPointerTo();
601     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
602                               KmpcCopyCtorTy, KmpcDtorTy};
603     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
604                                         /*isVarArg*/ false);
605     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
606     break;
607   }
608   case OMPRTL__kmpc_end_critical: {
609     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
610     // kmp_critical_name *crit);
611     llvm::Type *TypeParams[] = {
612         getIdentTyPointerTy(), CGM.Int32Ty,
613         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
614     llvm::FunctionType *FnTy =
615         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
616     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
617     break;
618   }
619   case OMPRTL__kmpc_cancel_barrier: {
620     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
621     // global_tid);
622     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
623     llvm::FunctionType *FnTy =
624         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
625     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
626     break;
627   }
628   case OMPRTL__kmpc_barrier: {
629     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
630     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
631     llvm::FunctionType *FnTy =
632         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
633     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
634     break;
635   }
636   case OMPRTL__kmpc_for_static_fini: {
637     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
638     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
639     llvm::FunctionType *FnTy =
640         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
641     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
642     break;
643   }
644   case OMPRTL__kmpc_push_num_threads: {
645     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
646     // kmp_int32 num_threads)
647     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
648                                 CGM.Int32Ty};
649     llvm::FunctionType *FnTy =
650         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
651     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
652     break;
653   }
654   case OMPRTL__kmpc_serialized_parallel: {
655     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
656     // global_tid);
657     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
658     llvm::FunctionType *FnTy =
659         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
660     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
661     break;
662   }
663   case OMPRTL__kmpc_end_serialized_parallel: {
664     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
665     // global_tid);
666     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
667     llvm::FunctionType *FnTy =
668         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
669     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
670     break;
671   }
672   case OMPRTL__kmpc_flush: {
673     // Build void __kmpc_flush(ident_t *loc);
674     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
675     llvm::FunctionType *FnTy =
676         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
677     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
678     break;
679   }
680   case OMPRTL__kmpc_master: {
681     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
682     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
683     llvm::FunctionType *FnTy =
684         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
685     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
686     break;
687   }
688   case OMPRTL__kmpc_end_master: {
689     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
690     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
691     llvm::FunctionType *FnTy =
692         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
693     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
694     break;
695   }
696   case OMPRTL__kmpc_omp_taskyield: {
697     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
698     // int end_part);
699     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
700     llvm::FunctionType *FnTy =
701         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
702     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
703     break;
704   }
705   case OMPRTL__kmpc_single: {
706     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
707     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
708     llvm::FunctionType *FnTy =
709         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
710     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
711     break;
712   }
713   case OMPRTL__kmpc_end_single: {
714     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
715     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
716     llvm::FunctionType *FnTy =
717         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
718     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
719     break;
720   }
721   case OMPRTL__kmpc_omp_task_alloc: {
722     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
723     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
724     // kmp_routine_entry_t *task_entry);
725     assert(KmpRoutineEntryPtrTy != nullptr &&
726            "Type kmp_routine_entry_t must be created.");
727     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
728                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
729     // Return void * and then cast to particular kmp_task_t type.
730     llvm::FunctionType *FnTy =
731         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
732     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
733     break;
734   }
735   case OMPRTL__kmpc_omp_task: {
736     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
737     // *new_task);
738     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
739                                 CGM.VoidPtrTy};
740     llvm::FunctionType *FnTy =
741         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
742     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
743     break;
744   }
745   case OMPRTL__kmpc_copyprivate: {
746     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
747     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
748     // kmp_int32 didit);
749     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
750     auto *CpyFnTy =
751         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
752     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
753                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
754                                 CGM.Int32Ty};
755     llvm::FunctionType *FnTy =
756         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
757     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
758     break;
759   }
760   case OMPRTL__kmpc_reduce: {
761     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
762     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
763     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
764     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
765     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
766                                                /*isVarArg=*/false);
767     llvm::Type *TypeParams[] = {
768         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
769         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
770         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
771     llvm::FunctionType *FnTy =
772         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
773     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
774     break;
775   }
776   case OMPRTL__kmpc_reduce_nowait: {
777     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
778     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
779     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
780     // *lck);
781     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
782     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
783                                                /*isVarArg=*/false);
784     llvm::Type *TypeParams[] = {
785         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
786         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
787         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
788     llvm::FunctionType *FnTy =
789         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
790     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
791     break;
792   }
793   case OMPRTL__kmpc_end_reduce: {
794     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
795     // kmp_critical_name *lck);
796     llvm::Type *TypeParams[] = {
797         getIdentTyPointerTy(), CGM.Int32Ty,
798         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
799     llvm::FunctionType *FnTy =
800         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
801     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
802     break;
803   }
804   case OMPRTL__kmpc_end_reduce_nowait: {
805     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
806     // kmp_critical_name *lck);
807     llvm::Type *TypeParams[] = {
808         getIdentTyPointerTy(), CGM.Int32Ty,
809         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
810     llvm::FunctionType *FnTy =
811         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
812     RTLFn =
813         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
814     break;
815   }
816   case OMPRTL__kmpc_omp_task_begin_if0: {
817     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
818     // *new_task);
819     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
820                                 CGM.VoidPtrTy};
821     llvm::FunctionType *FnTy =
822         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
823     RTLFn =
824         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
825     break;
826   }
827   case OMPRTL__kmpc_omp_task_complete_if0: {
828     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
829     // *new_task);
830     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
831                                 CGM.VoidPtrTy};
832     llvm::FunctionType *FnTy =
833         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
834     RTLFn = CGM.CreateRuntimeFunction(FnTy,
835                                       /*Name=*/"__kmpc_omp_task_complete_if0");
836     break;
837   }
838   case OMPRTL__kmpc_ordered: {
839     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
840     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
841     llvm::FunctionType *FnTy =
842         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
843     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
844     break;
845   }
846   case OMPRTL__kmpc_end_ordered: {
847     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
848     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
849     llvm::FunctionType *FnTy =
850         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
851     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
852     break;
853   }
854   case OMPRTL__kmpc_omp_taskwait: {
855     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
856     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
857     llvm::FunctionType *FnTy =
858         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
859     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
860     break;
861   }
862   case OMPRTL__kmpc_taskgroup: {
863     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
864     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
865     llvm::FunctionType *FnTy =
866         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
867     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
868     break;
869   }
870   case OMPRTL__kmpc_end_taskgroup: {
871     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
872     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
873     llvm::FunctionType *FnTy =
874         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
875     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
876     break;
877   }
878   case OMPRTL__kmpc_push_proc_bind: {
879     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
880     // int proc_bind)
881     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
882     llvm::FunctionType *FnTy =
883         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
884     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
885     break;
886   }
887   case OMPRTL__kmpc_omp_task_with_deps: {
888     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
889     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
890     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
891     llvm::Type *TypeParams[] = {
892         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
893         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
894     llvm::FunctionType *FnTy =
895         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
896     RTLFn =
897         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
898     break;
899   }
900   case OMPRTL__kmpc_omp_wait_deps: {
901     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
902     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
903     // kmp_depend_info_t *noalias_dep_list);
904     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
905                                 CGM.Int32Ty,           CGM.VoidPtrTy,
906                                 CGM.Int32Ty,           CGM.VoidPtrTy};
907     llvm::FunctionType *FnTy =
908         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
909     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
910     break;
911   }
912   case OMPRTL__kmpc_cancellationpoint: {
913     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
914     // global_tid, kmp_int32 cncl_kind)
915     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
916     llvm::FunctionType *FnTy =
917         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
918     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
919     break;
920   }
921   case OMPRTL__kmpc_cancel: {
922     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
923     // kmp_int32 cncl_kind)
924     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
925     llvm::FunctionType *FnTy =
926         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
927     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
928     break;
929   }
930   case OMPRTL__tgt_target: {
931     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
932     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
933     // *arg_types);
934     llvm::Type *TypeParams[] = {CGM.Int32Ty,
935                                 CGM.VoidPtrTy,
936                                 CGM.Int32Ty,
937                                 CGM.VoidPtrPtrTy,
938                                 CGM.VoidPtrPtrTy,
939                                 CGM.SizeTy->getPointerTo(),
940                                 CGM.Int32Ty->getPointerTo()};
941     llvm::FunctionType *FnTy =
942         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
943     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
944     break;
945   }
946   case OMPRTL__tgt_register_lib: {
947     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
948     QualType ParamTy =
949         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
950     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
951     llvm::FunctionType *FnTy =
952         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
953     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
954     break;
955   }
956   case OMPRTL__tgt_unregister_lib: {
957     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
958     QualType ParamTy =
959         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
960     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
961     llvm::FunctionType *FnTy =
962         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
963     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
964     break;
965   }
966   }
967   return RTLFn;
968 }
969 
970 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
971                                                              bool IVSigned) {
972   assert((IVSize == 32 || IVSize == 64) &&
973          "IV size is not compatible with the omp runtime");
974   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
975                                        : "__kmpc_for_static_init_4u")
976                            : (IVSigned ? "__kmpc_for_static_init_8"
977                                        : "__kmpc_for_static_init_8u");
978   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
979   auto PtrTy = llvm::PointerType::getUnqual(ITy);
980   llvm::Type *TypeParams[] = {
981     getIdentTyPointerTy(),                     // loc
982     CGM.Int32Ty,                               // tid
983     CGM.Int32Ty,                               // schedtype
984     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
985     PtrTy,                                     // p_lower
986     PtrTy,                                     // p_upper
987     PtrTy,                                     // p_stride
988     ITy,                                       // incr
989     ITy                                        // chunk
990   };
991   llvm::FunctionType *FnTy =
992       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
993   return CGM.CreateRuntimeFunction(FnTy, Name);
994 }
995 
996 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
997                                                             bool IVSigned) {
998   assert((IVSize == 32 || IVSize == 64) &&
999          "IV size is not compatible with the omp runtime");
1000   auto Name =
1001       IVSize == 32
1002           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1003           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1004   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1005   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1006                                CGM.Int32Ty,           // tid
1007                                CGM.Int32Ty,           // schedtype
1008                                ITy,                   // lower
1009                                ITy,                   // upper
1010                                ITy,                   // stride
1011                                ITy                    // chunk
1012   };
1013   llvm::FunctionType *FnTy =
1014       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1015   return CGM.CreateRuntimeFunction(FnTy, Name);
1016 }
1017 
1018 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
1019                                                             bool IVSigned) {
1020   assert((IVSize == 32 || IVSize == 64) &&
1021          "IV size is not compatible with the omp runtime");
1022   auto Name =
1023       IVSize == 32
1024           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1025           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1026   llvm::Type *TypeParams[] = {
1027       getIdentTyPointerTy(), // loc
1028       CGM.Int32Ty,           // tid
1029   };
1030   llvm::FunctionType *FnTy =
1031       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1032   return CGM.CreateRuntimeFunction(FnTy, Name);
1033 }
1034 
1035 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
1036                                                             bool IVSigned) {
1037   assert((IVSize == 32 || IVSize == 64) &&
1038          "IV size is not compatible with the omp runtime");
1039   auto Name =
1040       IVSize == 32
1041           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1042           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1043   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1044   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1045   llvm::Type *TypeParams[] = {
1046     getIdentTyPointerTy(),                     // loc
1047     CGM.Int32Ty,                               // tid
1048     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1049     PtrTy,                                     // p_lower
1050     PtrTy,                                     // p_upper
1051     PtrTy                                      // p_stride
1052   };
1053   llvm::FunctionType *FnTy =
1054       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1055   return CGM.CreateRuntimeFunction(FnTy, Name);
1056 }
1057 
1058 llvm::Constant *
1059 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1060   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1061          !CGM.getContext().getTargetInfo().isTLSSupported());
1062   // Lookup the entry, lazily creating it if necessary.
1063   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1064                                      Twine(CGM.getMangledName(VD)) + ".cache.");
1065 }
1066 
1067 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1068                                                 const VarDecl *VD,
1069                                                 Address VDAddr,
1070                                                 SourceLocation Loc) {
1071   if (CGM.getLangOpts().OpenMPUseTLS &&
1072       CGM.getContext().getTargetInfo().isTLSSupported())
1073     return VDAddr;
1074 
1075   auto VarTy = VDAddr.getElementType();
1076   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1077                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1078                                                        CGM.Int8PtrTy),
1079                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1080                          getOrCreateThreadPrivateCache(VD)};
1081   return Address(CGF.EmitRuntimeCall(
1082       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1083                  VDAddr.getAlignment());
1084 }
1085 
1086 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1087     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1088     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1089   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1090   // library.
1091   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1092   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1093                       OMPLoc);
1094   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1095   // to register constructor/destructor for variable.
1096   llvm::Value *Args[] = {OMPLoc,
1097                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1098                                                        CGM.VoidPtrTy),
1099                          Ctor, CopyCtor, Dtor};
1100   CGF.EmitRuntimeCall(
1101       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1102 }
1103 
1104 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1105     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1106     bool PerformInit, CodeGenFunction *CGF) {
1107   if (CGM.getLangOpts().OpenMPUseTLS &&
1108       CGM.getContext().getTargetInfo().isTLSSupported())
1109     return nullptr;
1110 
1111   VD = VD->getDefinition(CGM.getContext());
1112   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1113     ThreadPrivateWithDefinition.insert(VD);
1114     QualType ASTTy = VD->getType();
1115 
1116     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1117     auto Init = VD->getAnyInitializer();
1118     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1119       // Generate function that re-emits the declaration's initializer into the
1120       // threadprivate copy of the variable VD
1121       CodeGenFunction CtorCGF(CGM);
1122       FunctionArgList Args;
1123       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1124                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1125       Args.push_back(&Dst);
1126 
1127       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1128           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
1129           /*isVariadic=*/false);
1130       auto FTy = CGM.getTypes().GetFunctionType(FI);
1131       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1132           FTy, ".__kmpc_global_ctor_.", FI, Loc);
1133       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1134                             Args, SourceLocation());
1135       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1136           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1137           CGM.getContext().VoidPtrTy, Dst.getLocation());
1138       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1139       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1140                                              CtorCGF.ConvertTypeForMem(ASTTy));
1141       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1142                                /*IsInitializer=*/true);
1143       ArgVal = CtorCGF.EmitLoadOfScalar(
1144           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1145           CGM.getContext().VoidPtrTy, Dst.getLocation());
1146       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1147       CtorCGF.FinishFunction();
1148       Ctor = Fn;
1149     }
1150     if (VD->getType().isDestructedType() != QualType::DK_none) {
1151       // Generate function that emits destructor call for the threadprivate copy
1152       // of the variable VD
1153       CodeGenFunction DtorCGF(CGM);
1154       FunctionArgList Args;
1155       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1156                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1157       Args.push_back(&Dst);
1158 
1159       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1160           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
1161           /*isVariadic=*/false);
1162       auto FTy = CGM.getTypes().GetFunctionType(FI);
1163       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1164           FTy, ".__kmpc_global_dtor_.", FI, Loc);
1165       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1166                             SourceLocation());
1167       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1168           DtorCGF.GetAddrOfLocalVar(&Dst),
1169           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1170       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1171                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1172                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1173       DtorCGF.FinishFunction();
1174       Dtor = Fn;
1175     }
1176     // Do not emit init function if it is not required.
1177     if (!Ctor && !Dtor)
1178       return nullptr;
1179 
1180     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1181     auto CopyCtorTy =
1182         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1183                                 /*isVarArg=*/false)->getPointerTo();
1184     // Copying constructor for the threadprivate variable.
1185     // Must be NULL - reserved by runtime, but currently it requires that this
1186     // parameter is always NULL. Otherwise it fires assertion.
1187     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1188     if (Ctor == nullptr) {
1189       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1190                                             /*isVarArg=*/false)->getPointerTo();
1191       Ctor = llvm::Constant::getNullValue(CtorTy);
1192     }
1193     if (Dtor == nullptr) {
1194       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1195                                             /*isVarArg=*/false)->getPointerTo();
1196       Dtor = llvm::Constant::getNullValue(DtorTy);
1197     }
1198     if (!CGF) {
1199       auto InitFunctionTy =
1200           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1201       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1202           InitFunctionTy, ".__omp_threadprivate_init_.",
1203           CGM.getTypes().arrangeNullaryFunction());
1204       CodeGenFunction InitCGF(CGM);
1205       FunctionArgList ArgList;
1206       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1207                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1208                             Loc);
1209       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1210       InitCGF.FinishFunction();
1211       return InitFunction;
1212     }
1213     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1214   }
1215   return nullptr;
1216 }
1217 
1218 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1219 /// function. Here is the logic:
1220 /// if (Cond) {
1221 ///   ThenGen();
1222 /// } else {
1223 ///   ElseGen();
1224 /// }
1225 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1226                             const RegionCodeGenTy &ThenGen,
1227                             const RegionCodeGenTy &ElseGen) {
1228   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1229 
1230   // If the condition constant folds and can be elided, try to avoid emitting
1231   // the condition and the dead arm of the if/else.
1232   bool CondConstant;
1233   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1234     CodeGenFunction::RunCleanupsScope Scope(CGF);
1235     if (CondConstant) {
1236       ThenGen(CGF);
1237     } else {
1238       ElseGen(CGF);
1239     }
1240     return;
1241   }
1242 
1243   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1244   // emit the conditional branch.
1245   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1246   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1247   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1248   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1249 
1250   // Emit the 'then' code.
1251   CGF.EmitBlock(ThenBlock);
1252   {
1253     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1254     ThenGen(CGF);
1255   }
1256   CGF.EmitBranch(ContBlock);
1257   // Emit the 'else' code if present.
1258   {
1259     // There is no need to emit line number for unconditional branch.
1260     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1261     CGF.EmitBlock(ElseBlock);
1262   }
1263   {
1264     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1265     ElseGen(CGF);
1266   }
1267   {
1268     // There is no need to emit line number for unconditional branch.
1269     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1270     CGF.EmitBranch(ContBlock);
1271   }
1272   // Emit the continuation block for code after the if.
1273   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1274 }
1275 
1276 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1277                                        llvm::Value *OutlinedFn,
1278                                        ArrayRef<llvm::Value *> CapturedVars,
1279                                        const Expr *IfCond) {
1280   if (!CGF.HaveInsertPoint())
1281     return;
1282   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1283   auto &&ThenGen = [this, OutlinedFn, CapturedVars,
1284                     RTLoc](CodeGenFunction &CGF) {
1285     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1286     llvm::Value *Args[] = {
1287         RTLoc,
1288         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1289         CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
1290     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1291     RealArgs.append(std::begin(Args), std::end(Args));
1292     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1293 
1294     auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
1295     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1296   };
1297   auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc,
1298                     Loc](CodeGenFunction &CGF) {
1299     auto ThreadID = getThreadID(CGF, Loc);
1300     // Build calls:
1301     // __kmpc_serialized_parallel(&Loc, GTid);
1302     llvm::Value *Args[] = {RTLoc, ThreadID};
1303     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
1304                         Args);
1305 
1306     // OutlinedFn(&GTid, &zero, CapturedStruct);
1307     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
1308     Address ZeroAddr =
1309       CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1310                            /*Name*/ ".zero.addr");
1311     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1312     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1313     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1314     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1315     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1316     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1317 
1318     // __kmpc_end_serialized_parallel(&Loc, GTid);
1319     llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
1320     CGF.EmitRuntimeCall(
1321         createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
1322   };
1323   if (IfCond) {
1324     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1325   } else {
1326     CodeGenFunction::RunCleanupsScope Scope(CGF);
1327     ThenGen(CGF);
1328   }
1329 }
1330 
1331 // If we're inside an (outlined) parallel region, use the region info's
1332 // thread-ID variable (it is passed in a first argument of the outlined function
1333 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1334 // regular serial code region, get thread ID by calling kmp_int32
1335 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1336 // return the address of that temp.
1337 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1338                                              SourceLocation Loc) {
1339   if (auto *OMPRegionInfo =
1340           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1341     if (OMPRegionInfo->getThreadIDVariable())
1342       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1343 
1344   auto ThreadID = getThreadID(CGF, Loc);
1345   auto Int32Ty =
1346       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1347   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1348   CGF.EmitStoreOfScalar(ThreadID,
1349                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1350 
1351   return ThreadIDTemp;
1352 }
1353 
1354 llvm::Constant *
1355 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1356                                              const llvm::Twine &Name) {
1357   SmallString<256> Buffer;
1358   llvm::raw_svector_ostream Out(Buffer);
1359   Out << Name;
1360   auto RuntimeName = Out.str();
1361   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1362   if (Elem.second) {
1363     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1364            "OMP internal variable has different type than requested");
1365     return &*Elem.second;
1366   }
1367 
1368   return Elem.second = new llvm::GlobalVariable(
1369              CGM.getModule(), Ty, /*IsConstant*/ false,
1370              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1371              Elem.first());
1372 }
1373 
1374 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1375   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1376   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1377 }
1378 
1379 namespace {
1380 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup {
1381   llvm::Value *Callee;
1382   llvm::Value *Args[N];
1383 
1384 public:
1385   CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
1386       : Callee(Callee) {
1387     assert(CleanupArgs.size() == N);
1388     std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
1389   }
1390   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1391     if (!CGF.HaveInsertPoint())
1392       return;
1393     CGF.EmitRuntimeCall(Callee, Args);
1394   }
1395 };
1396 } // anonymous namespace
1397 
1398 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1399                                          StringRef CriticalName,
1400                                          const RegionCodeGenTy &CriticalOpGen,
1401                                          SourceLocation Loc, const Expr *Hint) {
1402   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1403   // CriticalOpGen();
1404   // __kmpc_end_critical(ident_t *, gtid, Lock);
1405   // Prepare arguments and build a call to __kmpc_critical
1406   if (!CGF.HaveInsertPoint())
1407     return;
1408   CodeGenFunction::RunCleanupsScope Scope(CGF);
1409   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1410                          getCriticalRegionLock(CriticalName)};
1411   if (Hint) {
1412     llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args),
1413                                                      std::end(Args));
1414     auto *HintVal = CGF.EmitScalarExpr(Hint);
1415     ArgsWithHint.push_back(
1416         CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false));
1417     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint),
1418                         ArgsWithHint);
1419   } else
1420     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1421   // Build a call to __kmpc_end_critical
1422   CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1423       NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1424       llvm::makeArrayRef(Args));
1425   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
1426 }
1427 
1428 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1429                        OpenMPDirectiveKind Kind, SourceLocation Loc,
1430                        const RegionCodeGenTy &BodyOpGen) {
1431   llvm::Value *CallBool = CGF.EmitScalarConversion(
1432       IfCond,
1433       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1434       CGF.getContext().BoolTy, Loc);
1435 
1436   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1437   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1438   // Generate the branch (If-stmt)
1439   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1440   CGF.EmitBlock(ThenBlock);
1441   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
1442   // Emit the rest of bblocks/branches
1443   CGF.EmitBranch(ContBlock);
1444   CGF.EmitBlock(ContBlock, true);
1445 }
1446 
1447 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1448                                        const RegionCodeGenTy &MasterOpGen,
1449                                        SourceLocation Loc) {
1450   if (!CGF.HaveInsertPoint())
1451     return;
1452   // if(__kmpc_master(ident_t *, gtid)) {
1453   //   MasterOpGen();
1454   //   __kmpc_end_master(ident_t *, gtid);
1455   // }
1456   // Prepare arguments and build a call to __kmpc_master
1457   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1458   auto *IsMaster =
1459       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1460   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1461       MasterCallEndCleanup;
1462   emitIfStmt(
1463       CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void {
1464         CodeGenFunction::RunCleanupsScope Scope(CGF);
1465         CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
1466             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1467             llvm::makeArrayRef(Args));
1468         MasterOpGen(CGF);
1469       });
1470 }
1471 
1472 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1473                                         SourceLocation Loc) {
1474   if (!CGF.HaveInsertPoint())
1475     return;
1476   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1477   llvm::Value *Args[] = {
1478       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1479       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1480   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1481 }
1482 
1483 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
1484                                           const RegionCodeGenTy &TaskgroupOpGen,
1485                                           SourceLocation Loc) {
1486   if (!CGF.HaveInsertPoint())
1487     return;
1488   // __kmpc_taskgroup(ident_t *, gtid);
1489   // TaskgroupOpGen();
1490   // __kmpc_end_taskgroup(ident_t *, gtid);
1491   // Prepare arguments and build a call to __kmpc_taskgroup
1492   {
1493     CodeGenFunction::RunCleanupsScope Scope(CGF);
1494     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1495     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
1496     // Build a call to __kmpc_end_taskgroup
1497     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1498         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
1499         llvm::makeArrayRef(Args));
1500     emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
1501   }
1502 }
1503 
1504 /// Given an array of pointers to variables, project the address of a
1505 /// given variable.
1506 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
1507                                       unsigned Index, const VarDecl *Var) {
1508   // Pull out the pointer to the variable.
1509   Address PtrAddr =
1510       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
1511   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
1512 
1513   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
1514   Addr = CGF.Builder.CreateElementBitCast(
1515       Addr, CGF.ConvertTypeForMem(Var->getType()));
1516   return Addr;
1517 }
1518 
1519 static llvm::Value *emitCopyprivateCopyFunction(
1520     CodeGenModule &CGM, llvm::Type *ArgsType,
1521     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1522     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1523   auto &C = CGM.getContext();
1524   // void copy_func(void *LHSArg, void *RHSArg);
1525   FunctionArgList Args;
1526   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1527                            C.VoidPtrTy);
1528   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1529                            C.VoidPtrTy);
1530   Args.push_back(&LHSArg);
1531   Args.push_back(&RHSArg);
1532   FunctionType::ExtInfo EI;
1533   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1534       C.VoidTy, Args, EI, /*isVariadic=*/false);
1535   auto *Fn = llvm::Function::Create(
1536       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1537       ".omp.copyprivate.copy_func", &CGM.getModule());
1538   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
1539   CodeGenFunction CGF(CGM);
1540   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1541   // Dest = (void*[n])(LHSArg);
1542   // Src = (void*[n])(RHSArg);
1543   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1544       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
1545       ArgsType), CGF.getPointerAlign());
1546   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1547       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
1548       ArgsType), CGF.getPointerAlign());
1549   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1550   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1551   // ...
1552   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1553   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1554     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
1555     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
1556 
1557     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
1558     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
1559 
1560     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
1561     QualType Type = VD->getType();
1562     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
1563   }
1564   CGF.FinishFunction();
1565   return Fn;
1566 }
1567 
1568 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1569                                        const RegionCodeGenTy &SingleOpGen,
1570                                        SourceLocation Loc,
1571                                        ArrayRef<const Expr *> CopyprivateVars,
1572                                        ArrayRef<const Expr *> SrcExprs,
1573                                        ArrayRef<const Expr *> DstExprs,
1574                                        ArrayRef<const Expr *> AssignmentOps) {
1575   if (!CGF.HaveInsertPoint())
1576     return;
1577   assert(CopyprivateVars.size() == SrcExprs.size() &&
1578          CopyprivateVars.size() == DstExprs.size() &&
1579          CopyprivateVars.size() == AssignmentOps.size());
1580   auto &C = CGM.getContext();
1581   // int32 did_it = 0;
1582   // if(__kmpc_single(ident_t *, gtid)) {
1583   //   SingleOpGen();
1584   //   __kmpc_end_single(ident_t *, gtid);
1585   //   did_it = 1;
1586   // }
1587   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1588   // <copy_func>, did_it);
1589 
1590   Address DidIt = Address::invalid();
1591   if (!CopyprivateVars.empty()) {
1592     // int32 did_it = 0;
1593     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1594     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1595     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
1596   }
1597   // Prepare arguments and build a call to __kmpc_single
1598   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1599   auto *IsSingle =
1600       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1601   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1602       SingleCallEndCleanup;
1603   emitIfStmt(
1604       CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void {
1605         CodeGenFunction::RunCleanupsScope Scope(CGF);
1606         CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
1607             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1608             llvm::makeArrayRef(Args));
1609         SingleOpGen(CGF);
1610         if (DidIt.isValid()) {
1611           // did_it = 1;
1612           CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
1613         }
1614       });
1615   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1616   // <copy_func>, did_it);
1617   if (DidIt.isValid()) {
1618     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1619     auto CopyprivateArrayTy =
1620         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1621                                /*IndexTypeQuals=*/0);
1622     // Create a list of all private variables for copyprivate.
1623     Address CopyprivateList =
1624         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1625     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1626       Address Elem = CGF.Builder.CreateConstArrayGEP(
1627           CopyprivateList, I, CGF.getPointerSize());
1628       CGF.Builder.CreateStore(
1629           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1630               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
1631           Elem);
1632     }
1633     // Build function that copies private values from single region to all other
1634     // threads in the corresponding parallel region.
1635     auto *CpyFn = emitCopyprivateCopyFunction(
1636         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1637         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1638     auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
1639     Address CL =
1640       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1641                                                       CGF.VoidPtrTy);
1642     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
1643     llvm::Value *Args[] = {
1644         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1645         getThreadID(CGF, Loc),        // i32 <gtid>
1646         BufSize,                      // size_t <buf_size>
1647         CL.getPointer(),              // void *<copyprivate list>
1648         CpyFn,                        // void (*) (void *, void *) <copy_func>
1649         DidItVal                      // i32 did_it
1650     };
1651     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1652   }
1653 }
1654 
1655 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
1656                                         const RegionCodeGenTy &OrderedOpGen,
1657                                         SourceLocation Loc, bool IsThreads) {
1658   if (!CGF.HaveInsertPoint())
1659     return;
1660   // __kmpc_ordered(ident_t *, gtid);
1661   // OrderedOpGen();
1662   // __kmpc_end_ordered(ident_t *, gtid);
1663   // Prepare arguments and build a call to __kmpc_ordered
1664   CodeGenFunction::RunCleanupsScope Scope(CGF);
1665   if (IsThreads) {
1666     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1667     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
1668     // Build a call to __kmpc_end_ordered
1669     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1670         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
1671         llvm::makeArrayRef(Args));
1672   }
1673   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
1674 }
1675 
1676 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1677                                       OpenMPDirectiveKind Kind, bool EmitChecks,
1678                                       bool ForceSimpleCall) {
1679   if (!CGF.HaveInsertPoint())
1680     return;
1681   // Build call __kmpc_cancel_barrier(loc, thread_id);
1682   // Build call __kmpc_barrier(loc, thread_id);
1683   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1684   if (Kind == OMPD_for) {
1685     Flags =
1686         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1687   } else if (Kind == OMPD_sections) {
1688     Flags = static_cast<OpenMPLocationFlags>(Flags |
1689                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
1690   } else if (Kind == OMPD_single) {
1691     Flags =
1692         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1693   } else if (Kind == OMPD_barrier) {
1694     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1695   } else {
1696     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1697   }
1698   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
1699   // thread_id);
1700   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1701                          getThreadID(CGF, Loc)};
1702   if (auto *OMPRegionInfo =
1703           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1704     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
1705       auto *Result = CGF.EmitRuntimeCall(
1706           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1707       if (EmitChecks) {
1708         // if (__kmpc_cancel_barrier()) {
1709         //   exit from construct;
1710         // }
1711         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
1712         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
1713         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
1714         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
1715         CGF.EmitBlock(ExitBB);
1716         //   exit from construct;
1717         auto CancelDestination =
1718             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
1719         CGF.EmitBranchThroughCleanup(CancelDestination);
1720         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
1721       }
1722       return;
1723     }
1724   }
1725   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
1726 }
1727 
1728 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1729 /// the enum sched_type in kmp.h).
1730 enum OpenMPSchedType {
1731   /// \brief Lower bound for default (unordered) versions.
1732   OMP_sch_lower = 32,
1733   OMP_sch_static_chunked = 33,
1734   OMP_sch_static = 34,
1735   OMP_sch_dynamic_chunked = 35,
1736   OMP_sch_guided_chunked = 36,
1737   OMP_sch_runtime = 37,
1738   OMP_sch_auto = 38,
1739   /// \brief Lower bound for 'ordered' versions.
1740   OMP_ord_lower = 64,
1741   OMP_ord_static_chunked = 65,
1742   OMP_ord_static = 66,
1743   OMP_ord_dynamic_chunked = 67,
1744   OMP_ord_guided_chunked = 68,
1745   OMP_ord_runtime = 69,
1746   OMP_ord_auto = 70,
1747   OMP_sch_default = OMP_sch_static,
1748 };
1749 
1750 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
1751 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1752                                           bool Chunked, bool Ordered) {
1753   switch (ScheduleKind) {
1754   case OMPC_SCHEDULE_static:
1755     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
1756                    : (Ordered ? OMP_ord_static : OMP_sch_static);
1757   case OMPC_SCHEDULE_dynamic:
1758     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
1759   case OMPC_SCHEDULE_guided:
1760     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
1761   case OMPC_SCHEDULE_runtime:
1762     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
1763   case OMPC_SCHEDULE_auto:
1764     return Ordered ? OMP_ord_auto : OMP_sch_auto;
1765   case OMPC_SCHEDULE_unknown:
1766     assert(!Chunked && "chunk was specified but schedule kind not known");
1767     return Ordered ? OMP_ord_static : OMP_sch_static;
1768   }
1769   llvm_unreachable("Unexpected runtime schedule");
1770 }
1771 
1772 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1773                                          bool Chunked) const {
1774   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
1775   return Schedule == OMP_sch_static;
1776 }
1777 
1778 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1779   auto Schedule =
1780       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
1781   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1782   return Schedule != OMP_sch_static;
1783 }
1784 
1785 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
1786                                           SourceLocation Loc,
1787                                           OpenMPScheduleClauseKind ScheduleKind,
1788                                           unsigned IVSize, bool IVSigned,
1789                                           bool Ordered, llvm::Value *UB,
1790                                           llvm::Value *Chunk) {
1791   if (!CGF.HaveInsertPoint())
1792     return;
1793   OpenMPSchedType Schedule =
1794       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1795   assert(Ordered ||
1796          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
1797           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
1798   // Call __kmpc_dispatch_init(
1799   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1800   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1801   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1802 
1803   // If the Chunk was not specified in the clause - use default value 1.
1804   if (Chunk == nullptr)
1805     Chunk = CGF.Builder.getIntN(IVSize, 1);
1806   llvm::Value *Args[] = {
1807     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1808     getThreadID(CGF, Loc),
1809     CGF.Builder.getInt32(Schedule), // Schedule type
1810     CGF.Builder.getIntN(IVSize, 0), // Lower
1811     UB,                             // Upper
1812     CGF.Builder.getIntN(IVSize, 1), // Stride
1813     Chunk                           // Chunk
1814   };
1815   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1816 }
1817 
1818 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
1819                                         SourceLocation Loc,
1820                                         OpenMPScheduleClauseKind ScheduleKind,
1821                                         unsigned IVSize, bool IVSigned,
1822                                         bool Ordered, Address IL, Address LB,
1823                                         Address UB, Address ST,
1824                                         llvm::Value *Chunk) {
1825   if (!CGF.HaveInsertPoint())
1826     return;
1827   OpenMPSchedType Schedule =
1828     getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1829   assert(!Ordered);
1830   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
1831          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked);
1832 
1833   // Call __kmpc_for_static_init(
1834   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1835   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1836   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1837   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1838   if (Chunk == nullptr) {
1839     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
1840            "expected static non-chunked schedule");
1841     // If the Chunk was not specified in the clause - use default value 1.
1842       Chunk = CGF.Builder.getIntN(IVSize, 1);
1843   } else {
1844     assert((Schedule == OMP_sch_static_chunked ||
1845             Schedule == OMP_ord_static_chunked) &&
1846            "expected static chunked schedule");
1847   }
1848   llvm::Value *Args[] = {
1849     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1850     getThreadID(CGF, Loc),
1851     CGF.Builder.getInt32(Schedule), // Schedule type
1852     IL.getPointer(),                // &isLastIter
1853     LB.getPointer(),                // &LB
1854     UB.getPointer(),                // &UB
1855     ST.getPointer(),                // &Stride
1856     CGF.Builder.getIntN(IVSize, 1), // Incr
1857     Chunk                           // Chunk
1858   };
1859   CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1860 }
1861 
1862 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
1863                                           SourceLocation Loc) {
1864   if (!CGF.HaveInsertPoint())
1865     return;
1866   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1867   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1868                          getThreadID(CGF, Loc)};
1869   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1870                       Args);
1871 }
1872 
1873 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
1874                                                  SourceLocation Loc,
1875                                                  unsigned IVSize,
1876                                                  bool IVSigned) {
1877   if (!CGF.HaveInsertPoint())
1878     return;
1879   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
1880   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1881                          getThreadID(CGF, Loc)};
1882   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
1883 }
1884 
1885 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1886                                           SourceLocation Loc, unsigned IVSize,
1887                                           bool IVSigned, Address IL,
1888                                           Address LB, Address UB,
1889                                           Address ST) {
1890   // Call __kmpc_dispatch_next(
1891   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1892   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1893   //          kmp_int[32|64] *p_stride);
1894   llvm::Value *Args[] = {
1895       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1896       IL.getPointer(), // &isLastIter
1897       LB.getPointer(), // &Lower
1898       UB.getPointer(), // &Upper
1899       ST.getPointer()  // &Stride
1900   };
1901   llvm::Value *Call =
1902       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1903   return CGF.EmitScalarConversion(
1904       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1905       CGF.getContext().BoolTy, Loc);
1906 }
1907 
1908 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1909                                            llvm::Value *NumThreads,
1910                                            SourceLocation Loc) {
1911   if (!CGF.HaveInsertPoint())
1912     return;
1913   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1914   llvm::Value *Args[] = {
1915       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1916       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1917   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1918                       Args);
1919 }
1920 
1921 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
1922                                          OpenMPProcBindClauseKind ProcBind,
1923                                          SourceLocation Loc) {
1924   if (!CGF.HaveInsertPoint())
1925     return;
1926   // Constants for proc bind value accepted by the runtime.
1927   enum ProcBindTy {
1928     ProcBindFalse = 0,
1929     ProcBindTrue,
1930     ProcBindMaster,
1931     ProcBindClose,
1932     ProcBindSpread,
1933     ProcBindIntel,
1934     ProcBindDefault
1935   } RuntimeProcBind;
1936   switch (ProcBind) {
1937   case OMPC_PROC_BIND_master:
1938     RuntimeProcBind = ProcBindMaster;
1939     break;
1940   case OMPC_PROC_BIND_close:
1941     RuntimeProcBind = ProcBindClose;
1942     break;
1943   case OMPC_PROC_BIND_spread:
1944     RuntimeProcBind = ProcBindSpread;
1945     break;
1946   case OMPC_PROC_BIND_unknown:
1947     llvm_unreachable("Unsupported proc_bind value.");
1948   }
1949   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
1950   llvm::Value *Args[] = {
1951       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1952       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
1953   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
1954 }
1955 
1956 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1957                                 SourceLocation Loc) {
1958   if (!CGF.HaveInsertPoint())
1959     return;
1960   // Build call void __kmpc_flush(ident_t *loc)
1961   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1962                       emitUpdateLocation(CGF, Loc));
1963 }
1964 
1965 namespace {
1966 /// \brief Indexes of fields for type kmp_task_t.
1967 enum KmpTaskTFields {
1968   /// \brief List of shared variables.
1969   KmpTaskTShareds,
1970   /// \brief Task routine.
1971   KmpTaskTRoutine,
1972   /// \brief Partition id for the untied tasks.
1973   KmpTaskTPartId,
1974   /// \brief Function with call of destructors for private variables.
1975   KmpTaskTDestructors,
1976 };
1977 } // anonymous namespace
1978 
1979 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
1980   // FIXME: Add other entries type when they become supported.
1981   return OffloadEntriesTargetRegion.empty();
1982 }
1983 
1984 /// \brief Initialize target region entry.
1985 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
1986     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
1987                                     StringRef ParentName, unsigned LineNum,
1988                                     unsigned ColNum, unsigned Order) {
1989   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
1990                                              "only required for the device "
1991                                              "code generation.");
1992   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] =
1993       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
1994   ++OffloadingEntriesNum;
1995 }
1996 
1997 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
1998     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
1999                                   StringRef ParentName, unsigned LineNum,
2000                                   unsigned ColNum, llvm::Constant *Addr,
2001                                   llvm::Constant *ID) {
2002   // If we are emitting code for a target, the entry is already initialized,
2003   // only has to be registered.
2004   if (CGM.getLangOpts().OpenMPIsDevice) {
2005     assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2006                                     ColNum) &&
2007            "Entry must exist.");
2008     auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName]
2009                                             [LineNum][ColNum];
2010     assert(Entry.isValid() && "Entry not initialized!");
2011     Entry.setAddress(Addr);
2012     Entry.setID(ID);
2013     return;
2014   } else {
2015     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
2016     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] =
2017         Entry;
2018   }
2019 }
2020 
2021 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2022     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
2023     unsigned ColNum) const {
2024   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2025   if (PerDevice == OffloadEntriesTargetRegion.end())
2026     return false;
2027   auto PerFile = PerDevice->second.find(FileID);
2028   if (PerFile == PerDevice->second.end())
2029     return false;
2030   auto PerParentName = PerFile->second.find(ParentName);
2031   if (PerParentName == PerFile->second.end())
2032     return false;
2033   auto PerLine = PerParentName->second.find(LineNum);
2034   if (PerLine == PerParentName->second.end())
2035     return false;
2036   auto PerColumn = PerLine->second.find(ColNum);
2037   if (PerColumn == PerLine->second.end())
2038     return false;
2039   // Fail if this entry is already registered.
2040   if (PerColumn->second.getAddress() || PerColumn->second.getID())
2041     return false;
2042   return true;
2043 }
2044 
2045 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2046     const OffloadTargetRegionEntryInfoActTy &Action) {
2047   // Scan all target region entries and perform the provided action.
2048   for (auto &D : OffloadEntriesTargetRegion)
2049     for (auto &F : D.second)
2050       for (auto &P : F.second)
2051         for (auto &L : P.second)
2052           for (auto &C : L.second)
2053             Action(D.first, F.first, P.first(), L.first, C.first, C.second);
2054 }
2055 
2056 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
2057 /// \a Codegen. This is used to emit the two functions that register and
2058 /// unregister the descriptor of the current compilation unit.
2059 static llvm::Function *
2060 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
2061                                          const RegionCodeGenTy &Codegen) {
2062   auto &C = CGM.getContext();
2063   FunctionArgList Args;
2064   ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
2065                              /*Id=*/nullptr, C.VoidPtrTy);
2066   Args.push_back(&DummyPtr);
2067 
2068   CodeGenFunction CGF(CGM);
2069   GlobalDecl();
2070   auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
2071       C.VoidTy, Args, FunctionType::ExtInfo(),
2072       /*isVariadic=*/false);
2073   auto FTy = CGM.getTypes().GetFunctionType(FI);
2074   auto *Fn =
2075       CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
2076   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
2077   Codegen(CGF);
2078   CGF.FinishFunction();
2079   return Fn;
2080 }
2081 
2082 llvm::Function *
2083 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
2084 
2085   // If we don't have entries or if we are emitting code for the device, we
2086   // don't need to do anything.
2087   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
2088     return nullptr;
2089 
2090   auto &M = CGM.getModule();
2091   auto &C = CGM.getContext();
2092 
2093   // Get list of devices we care about
2094   auto &Devices = CGM.getLangOpts().OMPTargetTriples;
2095 
2096   // We should be creating an offloading descriptor only if there are devices
2097   // specified.
2098   assert(!Devices.empty() && "No OpenMP offloading devices??");
2099 
2100   // Create the external variables that will point to the begin and end of the
2101   // host entries section. These will be defined by the linker.
2102   auto *OffloadEntryTy =
2103       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
2104   llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
2105       M, OffloadEntryTy, /*isConstant=*/true,
2106       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2107       ".omp_offloading.entries_begin");
2108   llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
2109       M, OffloadEntryTy, /*isConstant=*/true,
2110       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2111       ".omp_offloading.entries_end");
2112 
2113   // Create all device images
2114   llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires;
2115   auto *DeviceImageTy = cast<llvm::StructType>(
2116       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
2117 
2118   for (unsigned i = 0; i < Devices.size(); ++i) {
2119     StringRef T = Devices[i].getTriple();
2120     auto *ImgBegin = new llvm::GlobalVariable(
2121         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2122         /*Initializer=*/nullptr,
2123         Twine(".omp_offloading.img_start.") + Twine(T));
2124     auto *ImgEnd = new llvm::GlobalVariable(
2125         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2126         /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
2127 
2128     llvm::Constant *Dev =
2129         llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd,
2130                                   HostEntriesBegin, HostEntriesEnd, nullptr);
2131     DeviceImagesEntires.push_back(Dev);
2132   }
2133 
2134   // Create device images global array.
2135   llvm::ArrayType *DeviceImagesInitTy =
2136       llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size());
2137   llvm::Constant *DeviceImagesInit =
2138       llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires);
2139 
2140   llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable(
2141       M, DeviceImagesInitTy, /*isConstant=*/true,
2142       llvm::GlobalValue::InternalLinkage, DeviceImagesInit,
2143       ".omp_offloading.device_images");
2144   DeviceImages->setUnnamedAddr(true);
2145 
2146   // This is a Zero array to be used in the creation of the constant expressions
2147   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
2148                              llvm::Constant::getNullValue(CGM.Int32Ty)};
2149 
2150   // Create the target region descriptor.
2151   auto *BinaryDescriptorTy = cast<llvm::StructType>(
2152       CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
2153   llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get(
2154       BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
2155       llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages,
2156                                            Index),
2157       HostEntriesBegin, HostEntriesEnd, nullptr);
2158 
2159   auto *Desc = new llvm::GlobalVariable(
2160       M, BinaryDescriptorTy, /*isConstant=*/true,
2161       llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit,
2162       ".omp_offloading.descriptor");
2163 
2164   // Emit code to register or unregister the descriptor at execution
2165   // startup or closing, respectively.
2166 
2167   // Create a variable to drive the registration and unregistration of the
2168   // descriptor, so we can reuse the logic that emits Ctors and Dtors.
2169   auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
2170   ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
2171                                 IdentInfo, C.CharTy);
2172 
2173   auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
2174       CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF) {
2175         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
2176                              Desc);
2177       });
2178   auto *RegFn = createOffloadingBinaryDescriptorFunction(
2179       CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF) {
2180         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
2181                              Desc);
2182         CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
2183       });
2184   return RegFn;
2185 }
2186 
2187 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name,
2188                                          uint64_t Size) {
2189   auto *TgtOffloadEntryType = cast<llvm::StructType>(
2190       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
2191   llvm::LLVMContext &C = CGM.getModule().getContext();
2192   llvm::Module &M = CGM.getModule();
2193 
2194   // Make sure the address has the right type.
2195   llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(Addr, CGM.VoidPtrTy);
2196 
2197   // Create constant string with the name.
2198   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
2199 
2200   llvm::GlobalVariable *Str =
2201       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
2202                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
2203                                ".omp_offloading.entry_name");
2204   Str->setUnnamedAddr(true);
2205   llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
2206 
2207   // Create the entry struct.
2208   llvm::Constant *EntryInit = llvm::ConstantStruct::get(
2209       TgtOffloadEntryType, AddrPtr, StrPtr,
2210       llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr);
2211   llvm::GlobalVariable *Entry = new llvm::GlobalVariable(
2212       M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage,
2213       EntryInit, ".omp_offloading.entry");
2214 
2215   // The entry has to be created in the section the linker expects it to be.
2216   Entry->setSection(".omp_offloading.entries");
2217   // We can't have any padding between symbols, so we need to have 1-byte
2218   // alignment.
2219   Entry->setAlignment(1);
2220   return;
2221 }
2222 
2223 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2224   // Emit the offloading entries and metadata so that the device codegen side
2225   // can
2226   // easily figure out what to emit. The produced metadata looks like this:
2227   //
2228   // !omp_offload.info = !{!1, ...}
2229   //
2230   // Right now we only generate metadata for function that contain target
2231   // regions.
2232 
2233   // If we do not have entries, we dont need to do anything.
2234   if (OffloadEntriesInfoManager.empty())
2235     return;
2236 
2237   llvm::Module &M = CGM.getModule();
2238   llvm::LLVMContext &C = M.getContext();
2239   SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
2240       OrderedEntries(OffloadEntriesInfoManager.size());
2241 
2242   // Create the offloading info metadata node.
2243   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
2244 
2245   // Auxiliar methods to create metadata values and strings.
2246   auto getMDInt = [&](unsigned v) {
2247     return llvm::ConstantAsMetadata::get(
2248         llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
2249   };
2250 
2251   auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
2252 
2253   // Create function that emits metadata for each target region entry;
2254   auto &&TargetRegionMetadataEmitter = [&](
2255       unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
2256       unsigned Column,
2257       OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
2258     llvm::SmallVector<llvm::Metadata *, 32> Ops;
2259     // Generate metadata for target regions. Each entry of this metadata
2260     // contains:
2261     // - Entry 0 -> Kind of this type of metadata (0).
2262     // - Entry 1 -> Device ID of the file where the entry was identified.
2263     // - Entry 2 -> File ID of the file where the entry was identified.
2264     // - Entry 3 -> Mangled name of the function where the entry was identified.
2265     // - Entry 4 -> Line in the file where the entry was identified.
2266     // - Entry 5 -> Column in the file where the entry was identified.
2267     // - Entry 6 -> Order the entry was created.
2268     // The first element of the metadata node is the kind.
2269     Ops.push_back(getMDInt(E.getKind()));
2270     Ops.push_back(getMDInt(DeviceID));
2271     Ops.push_back(getMDInt(FileID));
2272     Ops.push_back(getMDString(ParentName));
2273     Ops.push_back(getMDInt(Line));
2274     Ops.push_back(getMDInt(Column));
2275     Ops.push_back(getMDInt(E.getOrder()));
2276 
2277     // Save this entry in the right position of the ordered entries array.
2278     OrderedEntries[E.getOrder()] = &E;
2279 
2280     // Add metadata to the named metadata node.
2281     MD->addOperand(llvm::MDNode::get(C, Ops));
2282   };
2283 
2284   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
2285       TargetRegionMetadataEmitter);
2286 
2287   for (auto *E : OrderedEntries) {
2288     assert(E && "All ordered entries must exist!");
2289     if (auto *CE =
2290             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
2291                 E)) {
2292       assert(CE->getID() && CE->getAddress() &&
2293              "Entry ID and Addr are invalid!");
2294       createOffloadEntry(CE->getID(), CE->getAddress()->getName(), /*Size=*/0);
2295     } else
2296       llvm_unreachable("Unsupported entry kind.");
2297   }
2298 }
2299 
2300 /// \brief Loads all the offload entries information from the host IR
2301 /// metadata.
2302 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
2303   // If we are in target mode, load the metadata from the host IR. This code has
2304   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
2305 
2306   if (!CGM.getLangOpts().OpenMPIsDevice)
2307     return;
2308 
2309   if (CGM.getLangOpts().OMPHostIRFile.empty())
2310     return;
2311 
2312   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
2313   if (Buf.getError())
2314     return;
2315 
2316   llvm::LLVMContext C;
2317   auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C);
2318 
2319   if (ME.getError())
2320     return;
2321 
2322   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
2323   if (!MD)
2324     return;
2325 
2326   for (auto I : MD->operands()) {
2327     llvm::MDNode *MN = cast<llvm::MDNode>(I);
2328 
2329     auto getMDInt = [&](unsigned Idx) {
2330       llvm::ConstantAsMetadata *V =
2331           cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
2332       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
2333     };
2334 
2335     auto getMDString = [&](unsigned Idx) {
2336       llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
2337       return V->getString();
2338     };
2339 
2340     switch (getMDInt(0)) {
2341     default:
2342       llvm_unreachable("Unexpected metadata!");
2343       break;
2344     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
2345         OFFLOAD_ENTRY_INFO_TARGET_REGION:
2346       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
2347           /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
2348           /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
2349           /*Column=*/getMDInt(5), /*Order=*/getMDInt(6));
2350       break;
2351     }
2352   }
2353 }
2354 
2355 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2356   if (!KmpRoutineEntryPtrTy) {
2357     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2358     auto &C = CGM.getContext();
2359     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2360     FunctionProtoType::ExtProtoInfo EPI;
2361     KmpRoutineEntryPtrQTy = C.getPointerType(
2362         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2363     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2364   }
2365 }
2366 
2367 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
2368                                        QualType FieldTy) {
2369   auto *Field = FieldDecl::Create(
2370       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
2371       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
2372       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
2373   Field->setAccess(AS_public);
2374   DC->addDecl(Field);
2375   return Field;
2376 }
2377 
2378 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
2379 
2380   // Make sure the type of the entry is already created. This is the type we
2381   // have to create:
2382   // struct __tgt_offload_entry{
2383   //   void      *addr;       // Pointer to the offload entry info.
2384   //                          // (function or global)
2385   //   char      *name;       // Name of the function or global.
2386   //   size_t     size;       // Size of the entry info (0 if it a function).
2387   // };
2388   if (TgtOffloadEntryQTy.isNull()) {
2389     ASTContext &C = CGM.getContext();
2390     auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
2391     RD->startDefinition();
2392     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2393     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
2394     addFieldToRecordDecl(C, RD, C.getSizeType());
2395     RD->completeDefinition();
2396     TgtOffloadEntryQTy = C.getRecordType(RD);
2397   }
2398   return TgtOffloadEntryQTy;
2399 }
2400 
2401 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
2402   // These are the types we need to build:
2403   // struct __tgt_device_image{
2404   // void   *ImageStart;       // Pointer to the target code start.
2405   // void   *ImageEnd;         // Pointer to the target code end.
2406   // // We also add the host entries to the device image, as it may be useful
2407   // // for the target runtime to have access to that information.
2408   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
2409   //                                       // the entries.
2410   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
2411   //                                       // entries (non inclusive).
2412   // };
2413   if (TgtDeviceImageQTy.isNull()) {
2414     ASTContext &C = CGM.getContext();
2415     auto *RD = C.buildImplicitRecord("__tgt_device_image");
2416     RD->startDefinition();
2417     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2418     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2419     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2420     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2421     RD->completeDefinition();
2422     TgtDeviceImageQTy = C.getRecordType(RD);
2423   }
2424   return TgtDeviceImageQTy;
2425 }
2426 
2427 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
2428   // struct __tgt_bin_desc{
2429   //   int32_t              NumDevices;      // Number of devices supported.
2430   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
2431   //                                         // (one per device).
2432   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
2433   //                                         // entries.
2434   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
2435   //                                         // entries (non inclusive).
2436   // };
2437   if (TgtBinaryDescriptorQTy.isNull()) {
2438     ASTContext &C = CGM.getContext();
2439     auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
2440     RD->startDefinition();
2441     addFieldToRecordDecl(
2442         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
2443     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
2444     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2445     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2446     RD->completeDefinition();
2447     TgtBinaryDescriptorQTy = C.getRecordType(RD);
2448   }
2449   return TgtBinaryDescriptorQTy;
2450 }
2451 
2452 namespace {
2453 struct PrivateHelpersTy {
2454   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
2455                    const VarDecl *PrivateElemInit)
2456       : Original(Original), PrivateCopy(PrivateCopy),
2457         PrivateElemInit(PrivateElemInit) {}
2458   const VarDecl *Original;
2459   const VarDecl *PrivateCopy;
2460   const VarDecl *PrivateElemInit;
2461 };
2462 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2463 } // anonymous namespace
2464 
2465 static RecordDecl *
2466 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2467   if (!Privates.empty()) {
2468     auto &C = CGM.getContext();
2469     // Build struct .kmp_privates_t. {
2470     //         /*  private vars  */
2471     //       };
2472     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
2473     RD->startDefinition();
2474     for (auto &&Pair : Privates) {
2475       auto *VD = Pair.second.Original;
2476       auto Type = VD->getType();
2477       Type = Type.getNonReferenceType();
2478       auto *FD = addFieldToRecordDecl(C, RD, Type);
2479       if (VD->hasAttrs()) {
2480         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2481              E(VD->getAttrs().end());
2482              I != E; ++I)
2483           FD->addAttr(*I);
2484       }
2485     }
2486     RD->completeDefinition();
2487     return RD;
2488   }
2489   return nullptr;
2490 }
2491 
2492 static RecordDecl *
2493 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
2494                          QualType KmpRoutineEntryPointerQTy) {
2495   auto &C = CGM.getContext();
2496   // Build struct kmp_task_t {
2497   //         void *              shareds;
2498   //         kmp_routine_entry_t routine;
2499   //         kmp_int32           part_id;
2500   //         kmp_routine_entry_t destructors;
2501   //       };
2502   auto *RD = C.buildImplicitRecord("kmp_task_t");
2503   RD->startDefinition();
2504   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2505   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2506   addFieldToRecordDecl(C, RD, KmpInt32Ty);
2507   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2508   RD->completeDefinition();
2509   return RD;
2510 }
2511 
2512 static RecordDecl *
2513 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2514                                      ArrayRef<PrivateDataTy> Privates) {
2515   auto &C = CGM.getContext();
2516   // Build struct kmp_task_t_with_privates {
2517   //         kmp_task_t task_data;
2518   //         .kmp_privates_t. privates;
2519   //       };
2520   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2521   RD->startDefinition();
2522   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2523   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
2524     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2525   }
2526   RD->completeDefinition();
2527   return RD;
2528 }
2529 
2530 /// \brief Emit a proxy function which accepts kmp_task_t as the second
2531 /// argument.
2532 /// \code
2533 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2534 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
2535 ///   tt->shareds);
2536 ///   return 0;
2537 /// }
2538 /// \endcode
2539 static llvm::Value *
2540 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2541                       QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
2542                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2543                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
2544                       llvm::Value *TaskPrivatesMap) {
2545   auto &C = CGM.getContext();
2546   FunctionArgList Args;
2547   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2548   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2549                                 /*Id=*/nullptr,
2550                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2551   Args.push_back(&GtidArg);
2552   Args.push_back(&TaskTypeArg);
2553   FunctionType::ExtInfo Info;
2554   auto &TaskEntryFnInfo =
2555       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2556                                                     /*isVariadic=*/false);
2557   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2558   auto *TaskEntry =
2559       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
2560                              ".omp_task_entry.", &CGM.getModule());
2561   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
2562   CodeGenFunction CGF(CGM);
2563   CGF.disableDebugInfo();
2564   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
2565 
2566   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
2567   // tt->task_data.shareds);
2568   auto *GtidParam = CGF.EmitLoadOfScalar(
2569       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
2570   LValue TDBase = emitLoadOfPointerLValue(
2571       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2572   auto *KmpTaskTWithPrivatesQTyRD =
2573       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2574   LValue Base =
2575       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
2576   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2577   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
2578   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
2579   auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
2580 
2581   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
2582   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
2583   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2584       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
2585       CGF.ConvertTypeForMem(SharedsPtrTy));
2586 
2587   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
2588   llvm::Value *PrivatesParam;
2589   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
2590     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
2591     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2592         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
2593   } else {
2594     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2595   }
2596 
2597   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
2598                              TaskPrivatesMap, SharedsParam};
2599   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
2600   CGF.EmitStoreThroughLValue(
2601       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
2602       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
2603   CGF.FinishFunction();
2604   return TaskEntry;
2605 }
2606 
2607 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
2608                                             SourceLocation Loc,
2609                                             QualType KmpInt32Ty,
2610                                             QualType KmpTaskTWithPrivatesPtrQTy,
2611                                             QualType KmpTaskTWithPrivatesQTy) {
2612   auto &C = CGM.getContext();
2613   FunctionArgList Args;
2614   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2615   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2616                                 /*Id=*/nullptr,
2617                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2618   Args.push_back(&GtidArg);
2619   Args.push_back(&TaskTypeArg);
2620   FunctionType::ExtInfo Info;
2621   auto &DestructorFnInfo =
2622       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2623                                                     /*isVariadic=*/false);
2624   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
2625   auto *DestructorFn =
2626       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
2627                              ".omp_task_destructor.", &CGM.getModule());
2628   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
2629                                     DestructorFnInfo);
2630   CodeGenFunction CGF(CGM);
2631   CGF.disableDebugInfo();
2632   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
2633                     Args);
2634 
2635   LValue Base = emitLoadOfPointerLValue(
2636       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2637   auto *KmpTaskTWithPrivatesQTyRD =
2638       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2639   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2640   Base = CGF.EmitLValueForField(Base, *FI);
2641   for (auto *Field :
2642        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
2643     if (auto DtorKind = Field->getType().isDestructedType()) {
2644       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
2645       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
2646     }
2647   }
2648   CGF.FinishFunction();
2649   return DestructorFn;
2650 }
2651 
2652 /// \brief Emit a privates mapping function for correct handling of private and
2653 /// firstprivate variables.
2654 /// \code
2655 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
2656 /// **noalias priv1,...,  <tyn> **noalias privn) {
2657 ///   *priv1 = &.privates.priv1;
2658 ///   ...;
2659 ///   *privn = &.privates.privn;
2660 /// }
2661 /// \endcode
2662 static llvm::Value *
2663 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
2664                                ArrayRef<const Expr *> PrivateVars,
2665                                ArrayRef<const Expr *> FirstprivateVars,
2666                                QualType PrivatesQTy,
2667                                ArrayRef<PrivateDataTy> Privates) {
2668   auto &C = CGM.getContext();
2669   FunctionArgList Args;
2670   ImplicitParamDecl TaskPrivatesArg(
2671       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2672       C.getPointerType(PrivatesQTy).withConst().withRestrict());
2673   Args.push_back(&TaskPrivatesArg);
2674   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
2675   unsigned Counter = 1;
2676   for (auto *E: PrivateVars) {
2677     Args.push_back(ImplicitParamDecl::Create(
2678         C, /*DC=*/nullptr, Loc,
2679         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2680                             .withConst()
2681                             .withRestrict()));
2682     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2683     PrivateVarsPos[VD] = Counter;
2684     ++Counter;
2685   }
2686   for (auto *E : FirstprivateVars) {
2687     Args.push_back(ImplicitParamDecl::Create(
2688         C, /*DC=*/nullptr, Loc,
2689         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2690                             .withConst()
2691                             .withRestrict()));
2692     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2693     PrivateVarsPos[VD] = Counter;
2694     ++Counter;
2695   }
2696   FunctionType::ExtInfo Info;
2697   auto &TaskPrivatesMapFnInfo =
2698       CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
2699                                                     /*isVariadic=*/false);
2700   auto *TaskPrivatesMapTy =
2701       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
2702   auto *TaskPrivatesMap = llvm::Function::Create(
2703       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
2704       ".omp_task_privates_map.", &CGM.getModule());
2705   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
2706                                     TaskPrivatesMapFnInfo);
2707   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
2708   CodeGenFunction CGF(CGM);
2709   CGF.disableDebugInfo();
2710   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
2711                     TaskPrivatesMapFnInfo, Args);
2712 
2713   // *privi = &.privates.privi;
2714   LValue Base = emitLoadOfPointerLValue(
2715       CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType());
2716   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
2717   Counter = 0;
2718   for (auto *Field : PrivatesQTyRD->fields()) {
2719     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
2720     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
2721     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
2722     auto RefLoadLVal =
2723         emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType());
2724     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
2725     ++Counter;
2726   }
2727   CGF.FinishFunction();
2728   return TaskPrivatesMap;
2729 }
2730 
2731 static int array_pod_sort_comparator(const PrivateDataTy *P1,
2732                                      const PrivateDataTy *P2) {
2733   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
2734 }
2735 
2736 void CGOpenMPRuntime::emitTaskCall(
2737     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
2738     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
2739     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
2740     const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
2741     ArrayRef<const Expr *> PrivateCopies,
2742     ArrayRef<const Expr *> FirstprivateVars,
2743     ArrayRef<const Expr *> FirstprivateCopies,
2744     ArrayRef<const Expr *> FirstprivateInits,
2745     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
2746   if (!CGF.HaveInsertPoint())
2747     return;
2748   auto &C = CGM.getContext();
2749   llvm::SmallVector<PrivateDataTy, 8> Privates;
2750   // Aggregate privates and sort them by the alignment.
2751   auto I = PrivateCopies.begin();
2752   for (auto *E : PrivateVars) {
2753     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2754     Privates.push_back(std::make_pair(
2755         C.getDeclAlign(VD),
2756         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2757                          /*PrivateElemInit=*/nullptr)));
2758     ++I;
2759   }
2760   I = FirstprivateCopies.begin();
2761   auto IElemInitRef = FirstprivateInits.begin();
2762   for (auto *E : FirstprivateVars) {
2763     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2764     Privates.push_back(std::make_pair(
2765         C.getDeclAlign(VD),
2766         PrivateHelpersTy(
2767             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2768             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
2769     ++I, ++IElemInitRef;
2770   }
2771   llvm::array_pod_sort(Privates.begin(), Privates.end(),
2772                        array_pod_sort_comparator);
2773   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2774   // Build type kmp_routine_entry_t (if not built yet).
2775   emitKmpRoutineEntryT(KmpInt32Ty);
2776   // Build type kmp_task_t (if not built yet).
2777   if (KmpTaskTQTy.isNull()) {
2778     KmpTaskTQTy = C.getRecordType(
2779         createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
2780   }
2781   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2782   // Build particular struct kmp_task_t for the given task.
2783   auto *KmpTaskTWithPrivatesQTyRD =
2784       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
2785   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
2786   QualType KmpTaskTWithPrivatesPtrQTy =
2787       C.getPointerType(KmpTaskTWithPrivatesQTy);
2788   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
2789   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
2790   auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
2791   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
2792 
2793   // Emit initial values for private copies (if any).
2794   llvm::Value *TaskPrivatesMap = nullptr;
2795   auto *TaskPrivatesMapTy =
2796       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
2797                 3)
2798           ->getType();
2799   if (!Privates.empty()) {
2800     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2801     TaskPrivatesMap = emitTaskPrivateMappingFunction(
2802         CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
2803     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2804         TaskPrivatesMap, TaskPrivatesMapTy);
2805   } else {
2806     TaskPrivatesMap = llvm::ConstantPointerNull::get(
2807         cast<llvm::PointerType>(TaskPrivatesMapTy));
2808   }
2809   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
2810   // kmp_task_t *tt);
2811   auto *TaskEntry = emitProxyTaskFunction(
2812       CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
2813       KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
2814 
2815   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2816   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2817   // kmp_routine_entry_t *task_entry);
2818   // Task flags. Format is taken from
2819   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
2820   // description of kmp_tasking_flags struct.
2821   const unsigned TiedFlag = 0x1;
2822   const unsigned FinalFlag = 0x2;
2823   unsigned Flags = Tied ? TiedFlag : 0;
2824   auto *TaskFlags =
2825       Final.getPointer()
2826           ? CGF.Builder.CreateSelect(Final.getPointer(),
2827                                      CGF.Builder.getInt32(FinalFlag),
2828                                      CGF.Builder.getInt32(/*C=*/0))
2829           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
2830   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
2831   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
2832   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
2833                               getThreadID(CGF, Loc), TaskFlags,
2834                               KmpTaskTWithPrivatesTySize, SharedsSize,
2835                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2836                                   TaskEntry, KmpRoutineEntryPtrTy)};
2837   auto *NewTask = CGF.EmitRuntimeCall(
2838       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
2839   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2840       NewTask, KmpTaskTWithPrivatesPtrTy);
2841   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
2842                                                KmpTaskTWithPrivatesQTy);
2843   LValue TDBase =
2844       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
2845   // Fill the data in the resulting kmp_task_t record.
2846   // Copy shareds if there are any.
2847   Address KmpTaskSharedsPtr = Address::invalid();
2848   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
2849     KmpTaskSharedsPtr =
2850         Address(CGF.EmitLoadOfScalar(
2851                     CGF.EmitLValueForField(
2852                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
2853                                            KmpTaskTShareds)),
2854                     Loc),
2855                 CGF.getNaturalTypeAlignment(SharedsTy));
2856     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
2857   }
2858   // Emit initial values for private copies (if any).
2859   bool NeedsCleanup = false;
2860   if (!Privates.empty()) {
2861     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2862     auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
2863     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
2864     LValue SharedsBase;
2865     if (!FirstprivateVars.empty()) {
2866       SharedsBase = CGF.MakeAddrLValue(
2867           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2868               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
2869           SharedsTy);
2870     }
2871     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
2872         cast<CapturedStmt>(*D.getAssociatedStmt()));
2873     for (auto &&Pair : Privates) {
2874       auto *VD = Pair.second.PrivateCopy;
2875       auto *Init = VD->getAnyInitializer();
2876       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
2877       if (Init) {
2878         if (auto *Elem = Pair.second.PrivateElemInit) {
2879           auto *OriginalVD = Pair.second.Original;
2880           auto *SharedField = CapturesInfo.lookup(OriginalVD);
2881           auto SharedRefLValue =
2882               CGF.EmitLValueForField(SharedsBase, SharedField);
2883           SharedRefLValue = CGF.MakeAddrLValue(
2884               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
2885               SharedRefLValue.getType(), AlignmentSource::Decl);
2886           QualType Type = OriginalVD->getType();
2887           if (Type->isArrayType()) {
2888             // Initialize firstprivate array.
2889             if (!isa<CXXConstructExpr>(Init) ||
2890                 CGF.isTrivialInitializer(Init)) {
2891               // Perform simple memcpy.
2892               CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
2893                                       SharedRefLValue.getAddress(), Type);
2894             } else {
2895               // Initialize firstprivate array using element-by-element
2896               // intialization.
2897               CGF.EmitOMPAggregateAssign(
2898                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
2899                   Type, [&CGF, Elem, Init, &CapturesInfo](
2900                             Address DestElement, Address SrcElement) {
2901                     // Clean up any temporaries needed by the initialization.
2902                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
2903                     InitScope.addPrivate(Elem, [SrcElement]() -> Address {
2904                       return SrcElement;
2905                     });
2906                     (void)InitScope.Privatize();
2907                     // Emit initialization for single element.
2908                     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
2909                         CGF, &CapturesInfo);
2910                     CGF.EmitAnyExprToMem(Init, DestElement,
2911                                          Init->getType().getQualifiers(),
2912                                          /*IsInitializer=*/false);
2913                   });
2914             }
2915           } else {
2916             CodeGenFunction::OMPPrivateScope InitScope(CGF);
2917             InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
2918               return SharedRefLValue.getAddress();
2919             });
2920             (void)InitScope.Privatize();
2921             CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
2922             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
2923                                /*capturedByInit=*/false);
2924           }
2925         } else {
2926           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
2927         }
2928       }
2929       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
2930       ++FI;
2931     }
2932   }
2933   // Provide pointer to function with destructors for privates.
2934   llvm::Value *DestructorFn =
2935       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
2936                                              KmpTaskTWithPrivatesPtrQTy,
2937                                              KmpTaskTWithPrivatesQTy)
2938                    : llvm::ConstantPointerNull::get(
2939                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
2940   LValue Destructor = CGF.EmitLValueForField(
2941       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
2942   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2943                             DestructorFn, KmpRoutineEntryPtrTy),
2944                         Destructor);
2945 
2946   // Process list of dependences.
2947   Address DependenciesArray = Address::invalid();
2948   unsigned NumDependencies = Dependences.size();
2949   if (NumDependencies) {
2950     // Dependence kind for RTL.
2951     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
2952     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
2953     RecordDecl *KmpDependInfoRD;
2954     QualType FlagsTy =
2955         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
2956     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
2957     if (KmpDependInfoTy.isNull()) {
2958       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
2959       KmpDependInfoRD->startDefinition();
2960       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
2961       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
2962       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
2963       KmpDependInfoRD->completeDefinition();
2964       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
2965     } else {
2966       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
2967     }
2968     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
2969     // Define type kmp_depend_info[<Dependences.size()>];
2970     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
2971         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
2972         ArrayType::Normal, /*IndexTypeQuals=*/0);
2973     // kmp_depend_info[<Dependences.size()>] deps;
2974     DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
2975     for (unsigned i = 0; i < NumDependencies; ++i) {
2976       const Expr *E = Dependences[i].second;
2977       auto Addr = CGF.EmitLValue(E);
2978       llvm::Value *Size;
2979       QualType Ty = E->getType();
2980       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
2981         LValue UpAddrLVal =
2982             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
2983         llvm::Value *UpAddr =
2984             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
2985         llvm::Value *LowIntPtr =
2986             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
2987         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
2988         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
2989       } else
2990         Size = CGF.getTypeSize(Ty);
2991       auto Base = CGF.MakeAddrLValue(
2992           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
2993           KmpDependInfoTy);
2994       // deps[i].base_addr = &<Dependences[i].second>;
2995       auto BaseAddrLVal = CGF.EmitLValueForField(
2996           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
2997       CGF.EmitStoreOfScalar(
2998           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
2999           BaseAddrLVal);
3000       // deps[i].len = sizeof(<Dependences[i].second>);
3001       auto LenLVal = CGF.EmitLValueForField(
3002           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
3003       CGF.EmitStoreOfScalar(Size, LenLVal);
3004       // deps[i].flags = <Dependences[i].first>;
3005       RTLDependenceKindTy DepKind;
3006       switch (Dependences[i].first) {
3007       case OMPC_DEPEND_in:
3008         DepKind = DepIn;
3009         break;
3010       // Out and InOut dependencies must use the same code.
3011       case OMPC_DEPEND_out:
3012       case OMPC_DEPEND_inout:
3013         DepKind = DepInOut;
3014         break;
3015       case OMPC_DEPEND_source:
3016       case OMPC_DEPEND_sink:
3017       case OMPC_DEPEND_unknown:
3018         llvm_unreachable("Unknown task dependence type");
3019       }
3020       auto FlagsLVal = CGF.EmitLValueForField(
3021           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
3022       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
3023                             FlagsLVal);
3024     }
3025     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3026         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
3027         CGF.VoidPtrTy);
3028   }
3029 
3030   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
3031   // libcall.
3032   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
3033   // *new_task);
3034   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
3035   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
3036   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
3037   // list is not empty
3038   auto *ThreadID = getThreadID(CGF, Loc);
3039   auto *UpLoc = emitUpdateLocation(CGF, Loc);
3040   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
3041   llvm::Value *DepTaskArgs[7];
3042   if (NumDependencies) {
3043     DepTaskArgs[0] = UpLoc;
3044     DepTaskArgs[1] = ThreadID;
3045     DepTaskArgs[2] = NewTask;
3046     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
3047     DepTaskArgs[4] = DependenciesArray.getPointer();
3048     DepTaskArgs[5] = CGF.Builder.getInt32(0);
3049     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3050   }
3051   auto &&ThenCodeGen = [this, NumDependencies,
3052                         &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
3053     // TODO: add check for untied tasks.
3054     if (NumDependencies) {
3055       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
3056                           DepTaskArgs);
3057     } else {
3058       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
3059                           TaskArgs);
3060     }
3061   };
3062   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
3063       IfCallEndCleanup;
3064 
3065   llvm::Value *DepWaitTaskArgs[6];
3066   if (NumDependencies) {
3067     DepWaitTaskArgs[0] = UpLoc;
3068     DepWaitTaskArgs[1] = ThreadID;
3069     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
3070     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
3071     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
3072     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3073   }
3074   auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
3075                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
3076     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
3077     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
3078     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
3079     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
3080     // is specified.
3081     if (NumDependencies)
3082       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
3083                           DepWaitTaskArgs);
3084     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
3085     // kmp_task_t *new_task);
3086     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
3087                         TaskArgs);
3088     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
3089     // kmp_task_t *new_task);
3090     CGF.EHStack.pushCleanup<IfCallEndCleanup>(
3091         NormalAndEHCleanup,
3092         createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
3093         llvm::makeArrayRef(TaskArgs));
3094 
3095     // Call proxy_task_entry(gtid, new_task);
3096     llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
3097     CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
3098   };
3099 
3100   if (IfCond) {
3101     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
3102   } else {
3103     CodeGenFunction::RunCleanupsScope Scope(CGF);
3104     ThenCodeGen(CGF);
3105   }
3106 }
3107 
3108 /// \brief Emit reduction operation for each element of array (required for
3109 /// array sections) LHS op = RHS.
3110 /// \param Type Type of array.
3111 /// \param LHSVar Variable on the left side of the reduction operation
3112 /// (references element of array in original variable).
3113 /// \param RHSVar Variable on the right side of the reduction operation
3114 /// (references element of array in original variable).
3115 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
3116 /// RHSVar.
3117 static void EmitOMPAggregateReduction(
3118     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
3119     const VarDecl *RHSVar,
3120     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
3121                                   const Expr *, const Expr *)> &RedOpGen,
3122     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
3123     const Expr *UpExpr = nullptr) {
3124   // Perform element-by-element initialization.
3125   QualType ElementTy;
3126   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
3127   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
3128 
3129   // Drill down to the base element type on both arrays.
3130   auto ArrayTy = Type->getAsArrayTypeUnsafe();
3131   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
3132 
3133   auto RHSBegin = RHSAddr.getPointer();
3134   auto LHSBegin = LHSAddr.getPointer();
3135   // Cast from pointer to array type to pointer to single element.
3136   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
3137   // The basic structure here is a while-do loop.
3138   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
3139   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
3140   auto IsEmpty =
3141       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
3142   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
3143 
3144   // Enter the loop body, making that address the current address.
3145   auto EntryBB = CGF.Builder.GetInsertBlock();
3146   CGF.EmitBlock(BodyBB);
3147 
3148   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
3149 
3150   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
3151       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
3152   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
3153   Address RHSElementCurrent =
3154       Address(RHSElementPHI,
3155               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
3156 
3157   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
3158       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
3159   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
3160   Address LHSElementCurrent =
3161       Address(LHSElementPHI,
3162               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
3163 
3164   // Emit copy.
3165   CodeGenFunction::OMPPrivateScope Scope(CGF);
3166   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
3167   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
3168   Scope.Privatize();
3169   RedOpGen(CGF, XExpr, EExpr, UpExpr);
3170   Scope.ForceCleanup();
3171 
3172   // Shift the address forward by one element.
3173   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
3174       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
3175   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
3176       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
3177   // Check whether we've reached the end.
3178   auto Done =
3179       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
3180   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
3181   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
3182   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
3183 
3184   // Done.
3185   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
3186 }
3187 
3188 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
3189                                           llvm::Type *ArgsType,
3190                                           ArrayRef<const Expr *> Privates,
3191                                           ArrayRef<const Expr *> LHSExprs,
3192                                           ArrayRef<const Expr *> RHSExprs,
3193                                           ArrayRef<const Expr *> ReductionOps) {
3194   auto &C = CGM.getContext();
3195 
3196   // void reduction_func(void *LHSArg, void *RHSArg);
3197   FunctionArgList Args;
3198   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
3199                            C.VoidPtrTy);
3200   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
3201                            C.VoidPtrTy);
3202   Args.push_back(&LHSArg);
3203   Args.push_back(&RHSArg);
3204   FunctionType::ExtInfo EI;
3205   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
3206       C.VoidTy, Args, EI, /*isVariadic=*/false);
3207   auto *Fn = llvm::Function::Create(
3208       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
3209       ".omp.reduction.reduction_func", &CGM.getModule());
3210   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
3211   CodeGenFunction CGF(CGM);
3212   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
3213 
3214   // Dst = (void*[n])(LHSArg);
3215   // Src = (void*[n])(RHSArg);
3216   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3217       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3218       ArgsType), CGF.getPointerAlign());
3219   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3220       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3221       ArgsType), CGF.getPointerAlign());
3222 
3223   //  ...
3224   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
3225   //  ...
3226   CodeGenFunction::OMPPrivateScope Scope(CGF);
3227   auto IPriv = Privates.begin();
3228   unsigned Idx = 0;
3229   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
3230     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
3231     Scope.addPrivate(RHSVar, [&]() -> Address {
3232       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
3233     });
3234     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
3235     Scope.addPrivate(LHSVar, [&]() -> Address {
3236       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
3237     });
3238     QualType PrivTy = (*IPriv)->getType();
3239     if (PrivTy->isVariablyModifiedType()) {
3240       // Get array size and emit VLA type.
3241       ++Idx;
3242       Address Elem =
3243           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
3244       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
3245       auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
3246       auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
3247       CodeGenFunction::OpaqueValueMapping OpaqueMap(
3248           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
3249       CGF.EmitVariablyModifiedType(PrivTy);
3250     }
3251   }
3252   Scope.Privatize();
3253   IPriv = Privates.begin();
3254   auto ILHS = LHSExprs.begin();
3255   auto IRHS = RHSExprs.begin();
3256   for (auto *E : ReductionOps) {
3257     if ((*IPriv)->getType()->isArrayType()) {
3258       // Emit reduction for array section.
3259       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3260       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3261       EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3262                                 [=](CodeGenFunction &CGF, const Expr *,
3263                                     const Expr *,
3264                                     const Expr *) { CGF.EmitIgnoredExpr(E); });
3265     } else
3266       // Emit reduction for array subscript or single variable.
3267       CGF.EmitIgnoredExpr(E);
3268     ++IPriv, ++ILHS, ++IRHS;
3269   }
3270   Scope.ForceCleanup();
3271   CGF.FinishFunction();
3272   return Fn;
3273 }
3274 
3275 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
3276                                     ArrayRef<const Expr *> Privates,
3277                                     ArrayRef<const Expr *> LHSExprs,
3278                                     ArrayRef<const Expr *> RHSExprs,
3279                                     ArrayRef<const Expr *> ReductionOps,
3280                                     bool WithNowait, bool SimpleReduction) {
3281   if (!CGF.HaveInsertPoint())
3282     return;
3283   // Next code should be emitted for reduction:
3284   //
3285   // static kmp_critical_name lock = { 0 };
3286   //
3287   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
3288   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
3289   //  ...
3290   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
3291   //  *(Type<n>-1*)rhs[<n>-1]);
3292   // }
3293   //
3294   // ...
3295   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
3296   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
3297   // RedList, reduce_func, &<lock>)) {
3298   // case 1:
3299   //  ...
3300   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
3301   //  ...
3302   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
3303   // break;
3304   // case 2:
3305   //  ...
3306   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
3307   //  ...
3308   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
3309   // break;
3310   // default:;
3311   // }
3312   //
3313   // if SimpleReduction is true, only the next code is generated:
3314   //  ...
3315   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
3316   //  ...
3317 
3318   auto &C = CGM.getContext();
3319 
3320   if (SimpleReduction) {
3321     CodeGenFunction::RunCleanupsScope Scope(CGF);
3322     auto IPriv = Privates.begin();
3323     auto ILHS = LHSExprs.begin();
3324     auto IRHS = RHSExprs.begin();
3325     for (auto *E : ReductionOps) {
3326       if ((*IPriv)->getType()->isArrayType()) {
3327         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3328         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3329         EmitOMPAggregateReduction(
3330             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3331             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
3332                 const Expr *) { CGF.EmitIgnoredExpr(E); });
3333       } else
3334         CGF.EmitIgnoredExpr(E);
3335       ++IPriv, ++ILHS, ++IRHS;
3336     }
3337     return;
3338   }
3339 
3340   // 1. Build a list of reduction variables.
3341   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
3342   auto Size = RHSExprs.size();
3343   for (auto *E : Privates) {
3344     if (E->getType()->isVariablyModifiedType())
3345       // Reserve place for array size.
3346       ++Size;
3347   }
3348   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
3349   QualType ReductionArrayTy =
3350       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3351                              /*IndexTypeQuals=*/0);
3352   Address ReductionList =
3353       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
3354   auto IPriv = Privates.begin();
3355   unsigned Idx = 0;
3356   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
3357     Address Elem =
3358       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
3359     CGF.Builder.CreateStore(
3360         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3361             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
3362         Elem);
3363     if ((*IPriv)->getType()->isVariablyModifiedType()) {
3364       // Store array size.
3365       ++Idx;
3366       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
3367                                              CGF.getPointerSize());
3368       llvm::Value *Size = CGF.Builder.CreateIntCast(
3369           CGF.getVLASize(
3370                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
3371               .first,
3372           CGF.SizeTy, /*isSigned=*/false);
3373       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
3374                               Elem);
3375     }
3376   }
3377 
3378   // 2. Emit reduce_func().
3379   auto *ReductionFn = emitReductionFunction(
3380       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
3381       LHSExprs, RHSExprs, ReductionOps);
3382 
3383   // 3. Create static kmp_critical_name lock = { 0 };
3384   auto *Lock = getCriticalRegionLock(".reduction");
3385 
3386   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
3387   // RedList, reduce_func, &<lock>);
3388   auto *IdentTLoc = emitUpdateLocation(
3389       CGF, Loc,
3390       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
3391   auto *ThreadId = getThreadID(CGF, Loc);
3392   auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
3393   auto *RL =
3394     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
3395                                                     CGF.VoidPtrTy);
3396   llvm::Value *Args[] = {
3397       IdentTLoc,                             // ident_t *<loc>
3398       ThreadId,                              // i32 <gtid>
3399       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
3400       ReductionArrayTySize,                  // size_type sizeof(RedList)
3401       RL,                                    // void *RedList
3402       ReductionFn, // void (*) (void *, void *) <reduce_func>
3403       Lock         // kmp_critical_name *&<lock>
3404   };
3405   auto Res = CGF.EmitRuntimeCall(
3406       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
3407                                        : OMPRTL__kmpc_reduce),
3408       Args);
3409 
3410   // 5. Build switch(res)
3411   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
3412   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
3413 
3414   // 6. Build case 1:
3415   //  ...
3416   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
3417   //  ...
3418   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
3419   // break;
3420   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
3421   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
3422   CGF.EmitBlock(Case1BB);
3423 
3424   {
3425     CodeGenFunction::RunCleanupsScope Scope(CGF);
3426     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
3427     llvm::Value *EndArgs[] = {
3428         IdentTLoc, // ident_t *<loc>
3429         ThreadId,  // i32 <gtid>
3430         Lock       // kmp_critical_name *&<lock>
3431     };
3432     CGF.EHStack
3433         .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
3434             NormalAndEHCleanup,
3435             createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
3436                                              : OMPRTL__kmpc_end_reduce),
3437             llvm::makeArrayRef(EndArgs));
3438     auto IPriv = Privates.begin();
3439     auto ILHS = LHSExprs.begin();
3440     auto IRHS = RHSExprs.begin();
3441     for (auto *E : ReductionOps) {
3442       if ((*IPriv)->getType()->isArrayType()) {
3443         // Emit reduction for array section.
3444         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3445         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3446         EmitOMPAggregateReduction(
3447             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3448             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
3449                 const Expr *) { CGF.EmitIgnoredExpr(E); });
3450       } else
3451         // Emit reduction for array subscript or single variable.
3452         CGF.EmitIgnoredExpr(E);
3453       ++IPriv, ++ILHS, ++IRHS;
3454     }
3455   }
3456 
3457   CGF.EmitBranch(DefaultBB);
3458 
3459   // 7. Build case 2:
3460   //  ...
3461   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
3462   //  ...
3463   // break;
3464   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
3465   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
3466   CGF.EmitBlock(Case2BB);
3467 
3468   {
3469     CodeGenFunction::RunCleanupsScope Scope(CGF);
3470     if (!WithNowait) {
3471       // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
3472       llvm::Value *EndArgs[] = {
3473           IdentTLoc, // ident_t *<loc>
3474           ThreadId,  // i32 <gtid>
3475           Lock       // kmp_critical_name *&<lock>
3476       };
3477       CGF.EHStack
3478           .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
3479               NormalAndEHCleanup,
3480               createRuntimeFunction(OMPRTL__kmpc_end_reduce),
3481               llvm::makeArrayRef(EndArgs));
3482     }
3483     auto ILHS = LHSExprs.begin();
3484     auto IRHS = RHSExprs.begin();
3485     auto IPriv = Privates.begin();
3486     for (auto *E : ReductionOps) {
3487         const Expr *XExpr = nullptr;
3488         const Expr *EExpr = nullptr;
3489         const Expr *UpExpr = nullptr;
3490         BinaryOperatorKind BO = BO_Comma;
3491         if (auto *BO = dyn_cast<BinaryOperator>(E)) {
3492           if (BO->getOpcode() == BO_Assign) {
3493             XExpr = BO->getLHS();
3494             UpExpr = BO->getRHS();
3495           }
3496         }
3497         // Try to emit update expression as a simple atomic.
3498         auto *RHSExpr = UpExpr;
3499         if (RHSExpr) {
3500           // Analyze RHS part of the whole expression.
3501           if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
3502                   RHSExpr->IgnoreParenImpCasts())) {
3503             // If this is a conditional operator, analyze its condition for
3504             // min/max reduction operator.
3505             RHSExpr = ACO->getCond();
3506           }
3507           if (auto *BORHS =
3508                   dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
3509             EExpr = BORHS->getRHS();
3510             BO = BORHS->getOpcode();
3511           }
3512         }
3513         if (XExpr) {
3514           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3515           auto &&AtomicRedGen = [this, BO, VD, IPriv,
3516                                  Loc](CodeGenFunction &CGF, const Expr *XExpr,
3517                                       const Expr *EExpr, const Expr *UpExpr) {
3518             LValue X = CGF.EmitLValue(XExpr);
3519             RValue E;
3520             if (EExpr)
3521               E = CGF.EmitAnyExpr(EExpr);
3522             CGF.EmitOMPAtomicSimpleUpdateExpr(
3523                 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
3524                 [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
3525                   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3526                   PrivateScope.addPrivate(
3527                       VD, [&CGF, VD, XRValue, Loc]() -> Address {
3528                         Address LHSTemp = CGF.CreateMemTemp(VD->getType());
3529                         CGF.emitOMPSimpleStore(
3530                             CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
3531                             VD->getType().getNonReferenceType(), Loc);
3532                         return LHSTemp;
3533                       });
3534                   (void)PrivateScope.Privatize();
3535                   return CGF.EmitAnyExpr(UpExpr);
3536                 });
3537           };
3538           if ((*IPriv)->getType()->isArrayType()) {
3539             // Emit atomic reduction for array section.
3540             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3541             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
3542                                       AtomicRedGen, XExpr, EExpr, UpExpr);
3543           } else
3544             // Emit atomic reduction for array subscript or single variable.
3545             AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
3546         } else {
3547           // Emit as a critical region.
3548           auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *,
3549                                              const Expr *, const Expr *) {
3550             emitCriticalRegion(
3551                 CGF, ".atomic_reduction",
3552                 [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc);
3553           };
3554           if ((*IPriv)->getType()->isArrayType()) {
3555             auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3556             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3557             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3558                                       CritRedGen);
3559           } else
3560             CritRedGen(CGF, nullptr, nullptr, nullptr);
3561         }
3562       ++ILHS, ++IRHS, ++IPriv;
3563     }
3564   }
3565 
3566   CGF.EmitBranch(DefaultBB);
3567   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
3568 }
3569 
3570 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
3571                                        SourceLocation Loc) {
3572   if (!CGF.HaveInsertPoint())
3573     return;
3574   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
3575   // global_tid);
3576   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3577   // Ignore return result until untied tasks are supported.
3578   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
3579 }
3580 
3581 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
3582                                            OpenMPDirectiveKind InnerKind,
3583                                            const RegionCodeGenTy &CodeGen,
3584                                            bool HasCancel) {
3585   if (!CGF.HaveInsertPoint())
3586     return;
3587   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
3588   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
3589 }
3590 
3591 namespace {
3592 enum RTCancelKind {
3593   CancelNoreq = 0,
3594   CancelParallel = 1,
3595   CancelLoop = 2,
3596   CancelSections = 3,
3597   CancelTaskgroup = 4
3598 };
3599 }
3600 
3601 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
3602   RTCancelKind CancelKind = CancelNoreq;
3603   if (CancelRegion == OMPD_parallel)
3604     CancelKind = CancelParallel;
3605   else if (CancelRegion == OMPD_for)
3606     CancelKind = CancelLoop;
3607   else if (CancelRegion == OMPD_sections)
3608     CancelKind = CancelSections;
3609   else {
3610     assert(CancelRegion == OMPD_taskgroup);
3611     CancelKind = CancelTaskgroup;
3612   }
3613   return CancelKind;
3614 }
3615 
3616 void CGOpenMPRuntime::emitCancellationPointCall(
3617     CodeGenFunction &CGF, SourceLocation Loc,
3618     OpenMPDirectiveKind CancelRegion) {
3619   if (!CGF.HaveInsertPoint())
3620     return;
3621   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
3622   // global_tid, kmp_int32 cncl_kind);
3623   if (auto *OMPRegionInfo =
3624           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3625     if (OMPRegionInfo->hasCancel()) {
3626       llvm::Value *Args[] = {
3627           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3628           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
3629       // Ignore return result until untied tasks are supported.
3630       auto *Result = CGF.EmitRuntimeCall(
3631           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
3632       // if (__kmpc_cancellationpoint()) {
3633       //  __kmpc_cancel_barrier();
3634       //   exit from construct;
3635       // }
3636       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3637       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3638       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3639       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3640       CGF.EmitBlock(ExitBB);
3641       // __kmpc_cancel_barrier();
3642       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
3643       // exit from construct;
3644       auto CancelDest =
3645           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3646       CGF.EmitBranchThroughCleanup(CancelDest);
3647       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3648     }
3649   }
3650 }
3651 
3652 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
3653                                      const Expr *IfCond,
3654                                      OpenMPDirectiveKind CancelRegion) {
3655   if (!CGF.HaveInsertPoint())
3656     return;
3657   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
3658   // kmp_int32 cncl_kind);
3659   if (auto *OMPRegionInfo =
3660           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3661     auto &&ThenGen = [this, Loc, CancelRegion,
3662                       OMPRegionInfo](CodeGenFunction &CGF) {
3663       llvm::Value *Args[] = {
3664           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3665           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
3666       // Ignore return result until untied tasks are supported.
3667       auto *Result =
3668           CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
3669       // if (__kmpc_cancel()) {
3670       //  __kmpc_cancel_barrier();
3671       //   exit from construct;
3672       // }
3673       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3674       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3675       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3676       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3677       CGF.EmitBlock(ExitBB);
3678       // __kmpc_cancel_barrier();
3679       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
3680       // exit from construct;
3681       auto CancelDest =
3682           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3683       CGF.EmitBranchThroughCleanup(CancelDest);
3684       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3685     };
3686     if (IfCond)
3687       emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {});
3688     else
3689       ThenGen(CGF);
3690   }
3691 }
3692 
3693 /// \brief Obtain information that uniquely identifies a target entry. This
3694 /// consists of the file and device IDs as well as line and column numbers
3695 /// associated with the relevant entry source location.
3696 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
3697                                      unsigned &DeviceID, unsigned &FileID,
3698                                      unsigned &LineNum, unsigned &ColumnNum) {
3699 
3700   auto &SM = C.getSourceManager();
3701 
3702   // The loc should be always valid and have a file ID (the user cannot use
3703   // #pragma directives in macros)
3704 
3705   assert(Loc.isValid() && "Source location is expected to be always valid.");
3706   assert(Loc.isFileID() && "Source location is expected to refer to a file.");
3707 
3708   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
3709   assert(PLoc.isValid() && "Source location is expected to be always valid.");
3710 
3711   llvm::sys::fs::UniqueID ID;
3712   if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
3713     llvm_unreachable("Source file with target region no longer exists!");
3714 
3715   DeviceID = ID.getDevice();
3716   FileID = ID.getFile();
3717   LineNum = PLoc.getLine();
3718   ColumnNum = PLoc.getColumn();
3719   return;
3720 }
3721 
3722 void CGOpenMPRuntime::emitTargetOutlinedFunction(
3723     const OMPExecutableDirective &D, StringRef ParentName,
3724     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
3725     bool IsOffloadEntry) {
3726 
3727   assert(!ParentName.empty() && "Invalid target region parent name!");
3728 
3729   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3730 
3731   // Emit target region as a standalone region.
3732   auto &&CodeGen = [&CS](CodeGenFunction &CGF) {
3733     CGF.EmitStmt(CS.getCapturedStmt());
3734   };
3735 
3736   // Create a unique name for the proxy/entry function that using the source
3737   // location information of the current target region. The name will be
3738   // something like:
3739   //
3740   // .omp_offloading.DD_FFFF.PP.lBB.cCC
3741   //
3742   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
3743   // mangled name of the function that encloses the target region, BB is the
3744   // line number of the target region, and CC is the column number of the target
3745   // region.
3746 
3747   unsigned DeviceID;
3748   unsigned FileID;
3749   unsigned Line;
3750   unsigned Column;
3751   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
3752                            Line, Column);
3753   SmallString<64> EntryFnName;
3754   {
3755     llvm::raw_svector_ostream OS(EntryFnName);
3756     OS << ".omp_offloading" << llvm::format(".%x", DeviceID)
3757        << llvm::format(".%x.", FileID) << ParentName << ".l" << Line << ".c"
3758        << Column;
3759   }
3760 
3761   CodeGenFunction CGF(CGM, true);
3762   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
3763   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
3764 
3765   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
3766 
3767   // If this target outline function is not an offload entry, we don't need to
3768   // register it.
3769   if (!IsOffloadEntry)
3770     return;
3771 
3772   // The target region ID is used by the runtime library to identify the current
3773   // target region, so it only has to be unique and not necessarily point to
3774   // anything. It could be the pointer to the outlined function that implements
3775   // the target region, but we aren't using that so that the compiler doesn't
3776   // need to keep that, and could therefore inline the host function if proven
3777   // worthwhile during optimization. In the other hand, if emitting code for the
3778   // device, the ID has to be the function address so that it can retrieved from
3779   // the offloading entry and launched by the runtime library. We also mark the
3780   // outlined function to have external linkage in case we are emitting code for
3781   // the device, because these functions will be entry points to the device.
3782 
3783   if (CGM.getLangOpts().OpenMPIsDevice) {
3784     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
3785     OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
3786   } else
3787     OutlinedFnID = new llvm::GlobalVariable(
3788         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3789         llvm::GlobalValue::PrivateLinkage,
3790         llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
3791 
3792   // Register the information for the entry associated with this target region.
3793   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3794       DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID);
3795   return;
3796 }
3797 
3798 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
3799                                      const OMPExecutableDirective &D,
3800                                      llvm::Value *OutlinedFn,
3801                                      llvm::Value *OutlinedFnID,
3802                                      const Expr *IfCond, const Expr *Device,
3803                                      ArrayRef<llvm::Value *> CapturedVars) {
3804   if (!CGF.HaveInsertPoint())
3805     return;
3806   /// \brief Values for bit flags used to specify the mapping type for
3807   /// offloading.
3808   enum OpenMPOffloadMappingFlags {
3809     /// \brief Allocate memory on the device and move data from host to device.
3810     OMP_MAP_TO = 0x01,
3811     /// \brief Allocate memory on the device and move data from device to host.
3812     OMP_MAP_FROM = 0x02,
3813     /// \brief The element passed to the device is a pointer.
3814     OMP_MAP_PTR = 0x20,
3815     /// \brief Pass the element to the device by value.
3816     OMP_MAP_BYCOPY = 0x80,
3817   };
3818 
3819   enum OpenMPOffloadingReservedDeviceIDs {
3820     /// \brief Device ID if the device was not defined, runtime should get it
3821     /// from environment variables in the spec.
3822     OMP_DEVICEID_UNDEF = -1,
3823   };
3824 
3825   assert(OutlinedFn && "Invalid outlined function!");
3826 
3827   auto &Ctx = CGF.getContext();
3828 
3829   // Fill up the arrays with the all the captured variables.
3830   SmallVector<llvm::Value *, 16> BasePointers;
3831   SmallVector<llvm::Value *, 16> Pointers;
3832   SmallVector<llvm::Value *, 16> Sizes;
3833   SmallVector<unsigned, 16> MapTypes;
3834 
3835   bool hasVLACaptures = false;
3836 
3837   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3838   auto RI = CS.getCapturedRecordDecl()->field_begin();
3839   // auto II = CS.capture_init_begin();
3840   auto CV = CapturedVars.begin();
3841   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
3842                                             CE = CS.capture_end();
3843        CI != CE; ++CI, ++RI, ++CV) {
3844     StringRef Name;
3845     QualType Ty;
3846     llvm::Value *BasePointer;
3847     llvm::Value *Pointer;
3848     llvm::Value *Size;
3849     unsigned MapType;
3850 
3851     // VLA sizes are passed to the outlined region by copy.
3852     if (CI->capturesVariableArrayType()) {
3853       BasePointer = Pointer = *CV;
3854       Size = CGF.getTypeSize(RI->getType());
3855       // Copy to the device as an argument. No need to retrieve it.
3856       MapType = OMP_MAP_BYCOPY;
3857       hasVLACaptures = true;
3858     } else if (CI->capturesThis()) {
3859       BasePointer = Pointer = *CV;
3860       const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr());
3861       Size = CGF.getTypeSize(PtrTy->getPointeeType());
3862       // Default map type.
3863       MapType = OMP_MAP_TO | OMP_MAP_FROM;
3864     } else if (CI->capturesVariableByCopy()) {
3865       MapType = OMP_MAP_BYCOPY;
3866       if (!RI->getType()->isAnyPointerType()) {
3867         // If the field is not a pointer, we need to save the actual value and
3868         // load it as a void pointer.
3869         auto DstAddr = CGF.CreateMemTemp(
3870             Ctx.getUIntPtrType(),
3871             Twine(CI->getCapturedVar()->getName()) + ".casted");
3872         LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
3873 
3874         auto *SrcAddrVal = CGF.EmitScalarConversion(
3875             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
3876             Ctx.getPointerType(RI->getType()), SourceLocation());
3877         LValue SrcLV =
3878             CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType());
3879 
3880         // Store the value using the source type pointer.
3881         CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV);
3882 
3883         // Load the value using the destination type pointer.
3884         BasePointer = Pointer =
3885             CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
3886       } else {
3887         MapType |= OMP_MAP_PTR;
3888         BasePointer = Pointer = *CV;
3889       }
3890       Size = CGF.getTypeSize(RI->getType());
3891     } else {
3892       assert(CI->capturesVariable() && "Expected captured reference.");
3893       BasePointer = Pointer = *CV;
3894 
3895       const ReferenceType *PtrTy =
3896           cast<ReferenceType>(RI->getType().getTypePtr());
3897       QualType ElementType = PtrTy->getPointeeType();
3898       Size = CGF.getTypeSize(ElementType);
3899       // The default map type for a scalar/complex type is 'to' because by
3900       // default the value doesn't have to be retrieved. For an aggregate type,
3901       // the default is 'tofrom'.
3902       MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
3903                                                : OMP_MAP_TO;
3904       if (ElementType->isAnyPointerType())
3905         MapType |= OMP_MAP_PTR;
3906     }
3907 
3908     BasePointers.push_back(BasePointer);
3909     Pointers.push_back(Pointer);
3910     Sizes.push_back(Size);
3911     MapTypes.push_back(MapType);
3912   }
3913 
3914   // Keep track on whether the host function has to be executed.
3915   auto OffloadErrorQType =
3916       Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
3917   auto OffloadError = CGF.MakeAddrLValue(
3918       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
3919       OffloadErrorQType);
3920   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
3921                         OffloadError);
3922 
3923   // Fill up the pointer arrays and transfer execution to the device.
3924   auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
3925                     hasVLACaptures, Device, OutlinedFnID, OffloadError,
3926                     OffloadErrorQType](CodeGenFunction &CGF) {
3927     unsigned PointerNumVal = BasePointers.size();
3928     llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
3929     llvm::Value *BasePointersArray;
3930     llvm::Value *PointersArray;
3931     llvm::Value *SizesArray;
3932     llvm::Value *MapTypesArray;
3933 
3934     if (PointerNumVal) {
3935       llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
3936       QualType PointerArrayType = Ctx.getConstantArrayType(
3937           Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
3938           /*IndexTypeQuals=*/0);
3939 
3940       BasePointersArray =
3941           CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
3942       PointersArray =
3943           CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
3944 
3945       // If we don't have any VLA types, we can use a constant array for the map
3946       // sizes, otherwise we need to fill up the arrays as we do for the
3947       // pointers.
3948       if (hasVLACaptures) {
3949         QualType SizeArrayType = Ctx.getConstantArrayType(
3950             Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
3951             /*IndexTypeQuals=*/0);
3952         SizesArray =
3953             CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
3954       } else {
3955         // We expect all the sizes to be constant, so we collect them to create
3956         // a constant array.
3957         SmallVector<llvm::Constant *, 16> ConstSizes;
3958         for (auto S : Sizes)
3959           ConstSizes.push_back(cast<llvm::Constant>(S));
3960 
3961         auto *SizesArrayInit = llvm::ConstantArray::get(
3962             llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
3963         auto *SizesArrayGbl = new llvm::GlobalVariable(
3964             CGM.getModule(), SizesArrayInit->getType(),
3965             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
3966             SizesArrayInit, ".offload_sizes");
3967         SizesArrayGbl->setUnnamedAddr(true);
3968         SizesArray = SizesArrayGbl;
3969       }
3970 
3971       // The map types are always constant so we don't need to generate code to
3972       // fill arrays. Instead, we create an array constant.
3973       llvm::Constant *MapTypesArrayInit =
3974           llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
3975       auto *MapTypesArrayGbl = new llvm::GlobalVariable(
3976           CGM.getModule(), MapTypesArrayInit->getType(),
3977           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
3978           MapTypesArrayInit, ".offload_maptypes");
3979       MapTypesArrayGbl->setUnnamedAddr(true);
3980       MapTypesArray = MapTypesArrayGbl;
3981 
3982       for (unsigned i = 0; i < PointerNumVal; ++i) {
3983 
3984         llvm::Value *BPVal = BasePointers[i];
3985         if (BPVal->getType()->isPointerTy())
3986           BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
3987         else {
3988           assert(BPVal->getType()->isIntegerTy() &&
3989                  "If not a pointer, the value type must be an integer.");
3990           BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
3991         }
3992         llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
3993             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal),
3994             BasePointersArray, 0, i);
3995         Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
3996         CGF.Builder.CreateStore(BPVal, BPAddr);
3997 
3998         llvm::Value *PVal = Pointers[i];
3999         if (PVal->getType()->isPointerTy())
4000           PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
4001         else {
4002           assert(PVal->getType()->isIntegerTy() &&
4003                  "If not a pointer, the value type must be an integer.");
4004           PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
4005         }
4006         llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
4007             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
4008             0, i);
4009         Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
4010         CGF.Builder.CreateStore(PVal, PAddr);
4011 
4012         if (hasVLACaptures) {
4013           llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
4014               llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
4015               /*Idx0=*/0,
4016               /*Idx1=*/i);
4017           Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
4018           CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(
4019                                       Sizes[i], CGM.SizeTy, /*isSigned=*/true),
4020                                   SAddr);
4021         }
4022       }
4023 
4024       BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4025           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
4026           /*Idx0=*/0, /*Idx1=*/0);
4027       PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4028           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
4029           /*Idx0=*/0,
4030           /*Idx1=*/0);
4031       SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4032           llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
4033           /*Idx0=*/0, /*Idx1=*/0);
4034       MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4035           llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray,
4036           /*Idx0=*/0,
4037           /*Idx1=*/0);
4038 
4039     } else {
4040       BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
4041       PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
4042       SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
4043       MapTypesArray =
4044           llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
4045     }
4046 
4047     // On top of the arrays that were filled up, the target offloading call
4048     // takes as arguments the device id as well as the host pointer. The host
4049     // pointer is used by the runtime library to identify the current target
4050     // region, so it only has to be unique and not necessarily point to
4051     // anything. It could be the pointer to the outlined function that
4052     // implements the target region, but we aren't using that so that the
4053     // compiler doesn't need to keep that, and could therefore inline the host
4054     // function if proven worthwhile during optimization.
4055 
4056     // From this point on, we need to have an ID of the target region defined.
4057     assert(OutlinedFnID && "Invalid outlined function ID!");
4058 
4059     // Emit device ID if any.
4060     llvm::Value *DeviceID;
4061     if (Device)
4062       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4063                                            CGM.Int32Ty, /*isSigned=*/true);
4064     else
4065       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
4066 
4067     llvm::Value *OffloadingArgs[] = {
4068         DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
4069         PointersArray, SizesArray,   MapTypesArray};
4070     auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
4071                                       OffloadingArgs);
4072 
4073     CGF.EmitStoreOfScalar(Return, OffloadError);
4074   };
4075 
4076   // Notify that the host version must be executed.
4077   auto &&ElseGen = [this, OffloadError,
4078                     OffloadErrorQType](CodeGenFunction &CGF) {
4079     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u),
4080                           OffloadError);
4081   };
4082 
4083   // If we have a target function ID it means that we need to support
4084   // offloading, otherwise, just execute on the host. We need to execute on host
4085   // regardless of the conditional in the if clause if, e.g., the user do not
4086   // specify target triples.
4087   if (OutlinedFnID) {
4088     if (IfCond) {
4089       emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
4090     } else {
4091       CodeGenFunction::RunCleanupsScope Scope(CGF);
4092       ThenGen(CGF);
4093     }
4094   } else {
4095     CodeGenFunction::RunCleanupsScope Scope(CGF);
4096     ElseGen(CGF);
4097   }
4098 
4099   // Check the error code and execute the host version if required.
4100   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
4101   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
4102   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
4103   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
4104   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
4105 
4106   CGF.EmitBlock(OffloadFailedBlock);
4107   CGF.Builder.CreateCall(OutlinedFn, BasePointers);
4108   CGF.EmitBranch(OffloadContBlock);
4109 
4110   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
4111   return;
4112 }
4113 
4114 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
4115                                                     StringRef ParentName) {
4116   if (!S)
4117     return;
4118 
4119   // If we find a OMP target directive, codegen the outline function and
4120   // register the result.
4121   // FIXME: Add other directives with target when they become supported.
4122   bool isTargetDirective = isa<OMPTargetDirective>(S);
4123 
4124   if (isTargetDirective) {
4125     auto *E = cast<OMPExecutableDirective>(S);
4126     unsigned DeviceID;
4127     unsigned FileID;
4128     unsigned Line;
4129     unsigned Column;
4130     getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
4131                              FileID, Line, Column);
4132 
4133     // Is this a target region that should not be emitted as an entry point? If
4134     // so just signal we are done with this target region.
4135     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(
4136             DeviceID, FileID, ParentName, Line, Column))
4137       return;
4138 
4139     llvm::Function *Fn;
4140     llvm::Constant *Addr;
4141     emitTargetOutlinedFunction(*E, ParentName, Fn, Addr,
4142                                /*isOffloadEntry=*/true);
4143     assert(Fn && Addr && "Target region emission failed.");
4144     return;
4145   }
4146 
4147   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
4148     if (!E->getAssociatedStmt())
4149       return;
4150 
4151     scanForTargetRegionsFunctions(
4152         cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
4153         ParentName);
4154     return;
4155   }
4156 
4157   // If this is a lambda function, look into its body.
4158   if (auto *L = dyn_cast<LambdaExpr>(S))
4159     S = L->getBody();
4160 
4161   // Keep looking for target regions recursively.
4162   for (auto *II : S->children())
4163     scanForTargetRegionsFunctions(II, ParentName);
4164 
4165   return;
4166 }
4167 
4168 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
4169   auto &FD = *cast<FunctionDecl>(GD.getDecl());
4170 
4171   // If emitting code for the host, we do not process FD here. Instead we do
4172   // the normal code generation.
4173   if (!CGM.getLangOpts().OpenMPIsDevice)
4174     return false;
4175 
4176   // Try to detect target regions in the function.
4177   scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
4178 
4179   // We should not emit any function othen that the ones created during the
4180   // scanning. Therefore, we signal that this function is completely dealt
4181   // with.
4182   return true;
4183 }
4184 
4185 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
4186   if (!CGM.getLangOpts().OpenMPIsDevice)
4187     return false;
4188 
4189   // Check if there are Ctors/Dtors in this declaration and look for target
4190   // regions in it. We use the complete variant to produce the kernel name
4191   // mangling.
4192   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
4193   if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
4194     for (auto *Ctor : RD->ctors()) {
4195       StringRef ParentName =
4196           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
4197       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
4198     }
4199     auto *Dtor = RD->getDestructor();
4200     if (Dtor) {
4201       StringRef ParentName =
4202           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
4203       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
4204     }
4205   }
4206 
4207   // If we are in target mode we do not emit any global (declare target is not
4208   // implemented yet). Therefore we signal that GD was processed in this case.
4209   return true;
4210 }
4211 
4212 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
4213   auto *VD = GD.getDecl();
4214   if (isa<FunctionDecl>(VD))
4215     return emitTargetFunctions(GD);
4216 
4217   return emitTargetGlobalVariable(GD);
4218 }
4219 
4220 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
4221   // If we have offloading in the current module, we need to emit the entries
4222   // now and register the offloading descriptor.
4223   createOffloadEntriesAndInfoMetadata();
4224 
4225   // Create and register the offloading binary descriptors. This is the main
4226   // entity that captures all the information about offloading in the current
4227   // compilation unit.
4228   return createOffloadingBinaryDescriptorRegistration();
4229 }
4230