1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CGCleanup.h"
17 #include "clang/AST/Decl.h"
18 #include "clang/AST/StmtOpenMP.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cassert>
26 
27 using namespace clang;
28 using namespace CodeGen;
29 
30 namespace {
31 /// \brief Base class for handling code generation inside OpenMP regions.
32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
33 public:
34   /// \brief Kinds of OpenMP regions used in codegen.
35   enum CGOpenMPRegionKind {
36     /// \brief Region with outlined function for standalone 'parallel'
37     /// directive.
38     ParallelOutlinedRegion,
39     /// \brief Region with outlined function for standalone 'task' directive.
40     TaskOutlinedRegion,
41     /// \brief Region for constructs that do not require function outlining,
42     /// like 'for', 'sections', 'atomic' etc. directives.
43     InlinedRegion,
44     /// \brief Region with outlined function for standalone 'target' directive.
45     TargetRegion,
46   };
47 
48   CGOpenMPRegionInfo(const CapturedStmt &CS,
49                      const CGOpenMPRegionKind RegionKind,
50                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
51                      bool HasCancel)
52       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
53         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
54 
55   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
56                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
57                      bool HasCancel)
58       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
59         Kind(Kind), HasCancel(HasCancel) {}
60 
61   /// \brief Get a variable or parameter for storing global thread id
62   /// inside OpenMP construct.
63   virtual const VarDecl *getThreadIDVariable() const = 0;
64 
65   /// \brief Emit the captured statement body.
66   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
67 
68   /// \brief Get an LValue for the current ThreadID variable.
69   /// \return LValue for thread id variable. This LValue always has type int32*.
70   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
71 
72   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
73 
74   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
75 
76   bool hasCancel() const { return HasCancel; }
77 
78   static bool classof(const CGCapturedStmtInfo *Info) {
79     return Info->getKind() == CR_OpenMP;
80   }
81 
82 protected:
83   CGOpenMPRegionKind RegionKind;
84   const RegionCodeGenTy &CodeGen;
85   OpenMPDirectiveKind Kind;
86   bool HasCancel;
87 };
88 
89 /// \brief API for captured statement code generation in OpenMP constructs.
90 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
91 public:
92   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
93                              const RegionCodeGenTy &CodeGen,
94                              OpenMPDirectiveKind Kind, bool HasCancel)
95       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
96                            HasCancel),
97         ThreadIDVar(ThreadIDVar) {
98     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
99   }
100   /// \brief Get a variable or parameter for storing global thread id
101   /// inside OpenMP construct.
102   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
103 
104   /// \brief Get the name of the capture helper.
105   StringRef getHelperName() const override { return ".omp_outlined."; }
106 
107   static bool classof(const CGCapturedStmtInfo *Info) {
108     return CGOpenMPRegionInfo::classof(Info) &&
109            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
110                ParallelOutlinedRegion;
111   }
112 
113 private:
114   /// \brief A variable or parameter storing global thread id for OpenMP
115   /// constructs.
116   const VarDecl *ThreadIDVar;
117 };
118 
119 /// \brief API for captured statement code generation in OpenMP constructs.
120 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
121 public:
122   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
123                                  const VarDecl *ThreadIDVar,
124                                  const RegionCodeGenTy &CodeGen,
125                                  OpenMPDirectiveKind Kind, bool HasCancel)
126       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
127         ThreadIDVar(ThreadIDVar) {
128     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
129   }
130   /// \brief Get a variable or parameter for storing global thread id
131   /// inside OpenMP construct.
132   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
133 
134   /// \brief Get an LValue for the current ThreadID variable.
135   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
136 
137   /// \brief Get the name of the capture helper.
138   StringRef getHelperName() const override { return ".omp_outlined."; }
139 
140   static bool classof(const CGCapturedStmtInfo *Info) {
141     return CGOpenMPRegionInfo::classof(Info) &&
142            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
143                TaskOutlinedRegion;
144   }
145 
146 private:
147   /// \brief A variable or parameter storing global thread id for OpenMP
148   /// constructs.
149   const VarDecl *ThreadIDVar;
150 };
151 
152 /// \brief API for inlined captured statement code generation in OpenMP
153 /// constructs.
154 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
155 public:
156   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
157                             const RegionCodeGenTy &CodeGen,
158                             OpenMPDirectiveKind Kind, bool HasCancel)
159       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
160         OldCSI(OldCSI),
161         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
162   // \brief Retrieve the value of the context parameter.
163   llvm::Value *getContextValue() const override {
164     if (OuterRegionInfo)
165       return OuterRegionInfo->getContextValue();
166     llvm_unreachable("No context value for inlined OpenMP region");
167   }
168   void setContextValue(llvm::Value *V) override {
169     if (OuterRegionInfo) {
170       OuterRegionInfo->setContextValue(V);
171       return;
172     }
173     llvm_unreachable("No context value for inlined OpenMP region");
174   }
175   /// \brief Lookup the captured field decl for a variable.
176   const FieldDecl *lookup(const VarDecl *VD) const override {
177     if (OuterRegionInfo)
178       return OuterRegionInfo->lookup(VD);
179     // If there is no outer outlined region,no need to lookup in a list of
180     // captured variables, we can use the original one.
181     return nullptr;
182   }
183   FieldDecl *getThisFieldDecl() const override {
184     if (OuterRegionInfo)
185       return OuterRegionInfo->getThisFieldDecl();
186     return nullptr;
187   }
188   /// \brief Get a variable or parameter for storing global thread id
189   /// inside OpenMP construct.
190   const VarDecl *getThreadIDVariable() const override {
191     if (OuterRegionInfo)
192       return OuterRegionInfo->getThreadIDVariable();
193     return nullptr;
194   }
195 
196   /// \brief Get the name of the capture helper.
197   StringRef getHelperName() const override {
198     if (auto *OuterRegionInfo = getOldCSI())
199       return OuterRegionInfo->getHelperName();
200     llvm_unreachable("No helper name for inlined OpenMP construct");
201   }
202 
203   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
208   }
209 
210 private:
211   /// \brief CodeGen info about outer OpenMP region.
212   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
213   CGOpenMPRegionInfo *OuterRegionInfo;
214 };
215 
216 /// \brief API for captured statement code generation in OpenMP target
217 /// constructs. For this captures, implicit parameters are used instead of the
218 /// captured fields.
219 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
220 public:
221   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
222                            const RegionCodeGenTy &CodeGen)
223       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
224                            /*HasCancel = */ false) {}
225 
226   /// \brief This is unused for target regions because each starts executing
227   /// with a single thread.
228   const VarDecl *getThreadIDVariable() const override { return nullptr; }
229 
230   /// \brief Get the name of the capture helper.
231   StringRef getHelperName() const override { return ".omp_offloading."; }
232 
233   static bool classof(const CGCapturedStmtInfo *Info) {
234     return CGOpenMPRegionInfo::classof(Info) &&
235            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
236   }
237 };
238 
239 /// \brief RAII for emitting code of OpenMP constructs.
240 class InlinedOpenMPRegionRAII {
241   CodeGenFunction &CGF;
242 
243 public:
244   /// \brief Constructs region for combined constructs.
245   /// \param CodeGen Code generation sequence for combined directives. Includes
246   /// a list of functions used for code generation of implicitly inlined
247   /// regions.
248   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
249                           OpenMPDirectiveKind Kind, bool HasCancel)
250       : CGF(CGF) {
251     // Start emission for the construct.
252     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
253         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
254   }
255   ~InlinedOpenMPRegionRAII() {
256     // Restore original CapturedStmtInfo only if we're done with code emission.
257     auto *OldCSI =
258         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
259     delete CGF.CapturedStmtInfo;
260     CGF.CapturedStmtInfo = OldCSI;
261   }
262 };
263 
264 } // anonymous namespace
265 
266 static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr,
267                                       QualType Ty) {
268   AlignmentSource Source;
269   CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source);
270   return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align),
271                             Ty->getPointeeType(), Source);
272 }
273 
274 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
275   return emitLoadOfPointerLValue(CGF,
276                                  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
277                                  getThreadIDVariable()->getType());
278 }
279 
280 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
281   // 1.2.2 OpenMP Language Terminology
282   // Structured block - An executable statement with a single entry at the
283   // top and a single exit at the bottom.
284   // The point of exit cannot be a branch out of the structured block.
285   // longjmp() and throw() must not violate the entry/exit criteria.
286   CGF.EHStack.pushTerminate();
287   {
288     CodeGenFunction::RunCleanupsScope Scope(CGF);
289     CodeGen(CGF);
290   }
291   CGF.EHStack.popTerminate();
292 }
293 
294 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
295     CodeGenFunction &CGF) {
296   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
297                             getThreadIDVariable()->getType(),
298                             AlignmentSource::Decl);
299 }
300 
301 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
302     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
303   IdentTy = llvm::StructType::create(
304       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
305       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
306       CGM.Int8PtrTy /* psource */, nullptr);
307   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
308   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
309                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
310   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
311   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
312 }
313 
314 void CGOpenMPRuntime::clear() {
315   InternalVars.clear();
316 }
317 
318 // Layout information for ident_t.
319 static CharUnits getIdentAlign(CodeGenModule &CGM) {
320   return CGM.getPointerAlign();
321 }
322 static CharUnits getIdentSize(CodeGenModule &CGM) {
323   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
324   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
325 }
326 static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) {
327   // All the fields except the last are i32, so this works beautifully.
328   return unsigned(Field) * CharUnits::fromQuantity(4);
329 }
330 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
331                                    CGOpenMPRuntime::IdentFieldIndex Field,
332                                    const llvm::Twine &Name = "") {
333   auto Offset = getOffsetOfIdentField(Field);
334   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
335 }
336 
337 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
338     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
339     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
340   assert(ThreadIDVar->getType()->isPointerType() &&
341          "thread id variable must be of type kmp_int32 *");
342   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
343   CodeGenFunction CGF(CGM, true);
344   bool HasCancel = false;
345   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
346     HasCancel = OPD->hasCancel();
347   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
348     HasCancel = OPSD->hasCancel();
349   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
350     HasCancel = OPFD->hasCancel();
351   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
352                                     HasCancel);
353   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
354   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
355 }
356 
357 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
358     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
359     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
360   assert(!ThreadIDVar->getType()->isPointerType() &&
361          "thread id variable must be of type kmp_int32 for tasks");
362   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
363   CodeGenFunction CGF(CGM, true);
364   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
365                                         InnermostKind,
366                                         cast<OMPTaskDirective>(D).hasCancel());
367   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
368   return CGF.GenerateCapturedStmtFunction(*CS);
369 }
370 
371 Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
372   CharUnits Align = getIdentAlign(CGM);
373   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
374   if (!Entry) {
375     if (!DefaultOpenMPPSource) {
376       // Initialize default location for psource field of ident_t structure of
377       // all ident_t objects. Format is ";file;function;line;column;;".
378       // Taken from
379       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
380       DefaultOpenMPPSource =
381           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
382       DefaultOpenMPPSource =
383           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
384     }
385     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
386         CGM.getModule(), IdentTy, /*isConstant*/ true,
387         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
388     DefaultOpenMPLocation->setUnnamedAddr(true);
389     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
390 
391     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
392     llvm::Constant *Values[] = {Zero,
393                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
394                                 Zero, Zero, DefaultOpenMPPSource};
395     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
396     DefaultOpenMPLocation->setInitializer(Init);
397     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
398   }
399   return Address(Entry, Align);
400 }
401 
402 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
403                                                  SourceLocation Loc,
404                                                  OpenMPLocationFlags Flags) {
405   // If no debug info is generated - return global default location.
406   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
407       Loc.isInvalid())
408     return getOrCreateDefaultLocation(Flags).getPointer();
409 
410   assert(CGF.CurFn && "No function in current CodeGenFunction.");
411 
412   Address LocValue = Address::invalid();
413   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
414   if (I != OpenMPLocThreadIDMap.end())
415     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
416 
417   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
418   // GetOpenMPThreadID was called before this routine.
419   if (!LocValue.isValid()) {
420     // Generate "ident_t .kmpc_loc.addr;"
421     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
422                                       ".kmpc_loc.addr");
423     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
424     Elem.second.DebugLoc = AI.getPointer();
425     LocValue = AI;
426 
427     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
428     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
429     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
430                              CGM.getSize(getIdentSize(CGF.CGM)));
431   }
432 
433   // char **psource = &.kmpc_loc_<flags>.addr.psource;
434   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
435 
436   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
437   if (OMPDebugLoc == nullptr) {
438     SmallString<128> Buffer2;
439     llvm::raw_svector_ostream OS2(Buffer2);
440     // Build debug location
441     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
442     OS2 << ";" << PLoc.getFilename() << ";";
443     if (const FunctionDecl *FD =
444             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
445       OS2 << FD->getQualifiedNameAsString();
446     }
447     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
448     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
449     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
450   }
451   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
452   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
453 
454   // Our callers always pass this to a runtime function, so for
455   // convenience, go ahead and return a naked pointer.
456   return LocValue.getPointer();
457 }
458 
459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
460                                           SourceLocation Loc) {
461   assert(CGF.CurFn && "No function in current CodeGenFunction.");
462 
463   llvm::Value *ThreadID = nullptr;
464   // Check whether we've already cached a load of the thread id in this
465   // function.
466   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
467   if (I != OpenMPLocThreadIDMap.end()) {
468     ThreadID = I->second.ThreadID;
469     if (ThreadID != nullptr)
470       return ThreadID;
471   }
472   if (auto OMPRegionInfo =
473           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
474     if (OMPRegionInfo->getThreadIDVariable()) {
475       // Check if this an outlined function with thread id passed as argument.
476       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
477       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
478       // If value loaded in entry block, cache it and use it everywhere in
479       // function.
480       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
481         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
482         Elem.second.ThreadID = ThreadID;
483       }
484       return ThreadID;
485     }
486   }
487 
488   // This is not an outlined function region - need to call __kmpc_int32
489   // kmpc_global_thread_num(ident_t *loc).
490   // Generate thread id value and cache this value for use across the
491   // function.
492   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
493   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
494   ThreadID =
495       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
496                           emitUpdateLocation(CGF, Loc));
497   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
498   Elem.second.ThreadID = ThreadID;
499   return ThreadID;
500 }
501 
502 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
503   assert(CGF.CurFn && "No function in current CodeGenFunction.");
504   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
505     OpenMPLocThreadIDMap.erase(CGF.CurFn);
506 }
507 
508 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
509   return llvm::PointerType::getUnqual(IdentTy);
510 }
511 
512 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
513   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
514 }
515 
516 llvm::Constant *
517 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
518   llvm::Constant *RTLFn = nullptr;
519   switch (Function) {
520   case OMPRTL__kmpc_fork_call: {
521     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
522     // microtask, ...);
523     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
524                                 getKmpc_MicroPointerTy()};
525     llvm::FunctionType *FnTy =
526         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
527     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
528     break;
529   }
530   case OMPRTL__kmpc_global_thread_num: {
531     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
532     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
533     llvm::FunctionType *FnTy =
534         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
535     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
536     break;
537   }
538   case OMPRTL__kmpc_threadprivate_cached: {
539     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
540     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
541     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
542                                 CGM.VoidPtrTy, CGM.SizeTy,
543                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
544     llvm::FunctionType *FnTy =
545         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
546     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
547     break;
548   }
549   case OMPRTL__kmpc_critical: {
550     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
551     // kmp_critical_name *crit);
552     llvm::Type *TypeParams[] = {
553         getIdentTyPointerTy(), CGM.Int32Ty,
554         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
555     llvm::FunctionType *FnTy =
556         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
557     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
558     break;
559   }
560   case OMPRTL__kmpc_threadprivate_register: {
561     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
562     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
563     // typedef void *(*kmpc_ctor)(void *);
564     auto KmpcCtorTy =
565         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
566                                 /*isVarArg*/ false)->getPointerTo();
567     // typedef void *(*kmpc_cctor)(void *, void *);
568     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
569     auto KmpcCopyCtorTy =
570         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
571                                 /*isVarArg*/ false)->getPointerTo();
572     // typedef void (*kmpc_dtor)(void *);
573     auto KmpcDtorTy =
574         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
575             ->getPointerTo();
576     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
577                               KmpcCopyCtorTy, KmpcDtorTy};
578     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
579                                         /*isVarArg*/ false);
580     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
581     break;
582   }
583   case OMPRTL__kmpc_end_critical: {
584     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
585     // kmp_critical_name *crit);
586     llvm::Type *TypeParams[] = {
587         getIdentTyPointerTy(), CGM.Int32Ty,
588         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
589     llvm::FunctionType *FnTy =
590         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
591     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
592     break;
593   }
594   case OMPRTL__kmpc_cancel_barrier: {
595     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
596     // global_tid);
597     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
598     llvm::FunctionType *FnTy =
599         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
600     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
601     break;
602   }
603   case OMPRTL__kmpc_barrier: {
604     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
605     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
606     llvm::FunctionType *FnTy =
607         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
608     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
609     break;
610   }
611   case OMPRTL__kmpc_for_static_fini: {
612     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
613     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
614     llvm::FunctionType *FnTy =
615         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
616     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
617     break;
618   }
619   case OMPRTL__kmpc_push_num_threads: {
620     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
621     // kmp_int32 num_threads)
622     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
623                                 CGM.Int32Ty};
624     llvm::FunctionType *FnTy =
625         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
626     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
627     break;
628   }
629   case OMPRTL__kmpc_serialized_parallel: {
630     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
631     // global_tid);
632     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
633     llvm::FunctionType *FnTy =
634         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
635     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
636     break;
637   }
638   case OMPRTL__kmpc_end_serialized_parallel: {
639     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
640     // global_tid);
641     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
642     llvm::FunctionType *FnTy =
643         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
644     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
645     break;
646   }
647   case OMPRTL__kmpc_flush: {
648     // Build void __kmpc_flush(ident_t *loc);
649     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
650     llvm::FunctionType *FnTy =
651         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
652     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
653     break;
654   }
655   case OMPRTL__kmpc_master: {
656     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
657     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
658     llvm::FunctionType *FnTy =
659         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
660     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
661     break;
662   }
663   case OMPRTL__kmpc_end_master: {
664     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
665     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
666     llvm::FunctionType *FnTy =
667         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
668     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
669     break;
670   }
671   case OMPRTL__kmpc_omp_taskyield: {
672     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
673     // int end_part);
674     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
675     llvm::FunctionType *FnTy =
676         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
677     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
678     break;
679   }
680   case OMPRTL__kmpc_single: {
681     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
682     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
683     llvm::FunctionType *FnTy =
684         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
685     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
686     break;
687   }
688   case OMPRTL__kmpc_end_single: {
689     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
690     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
691     llvm::FunctionType *FnTy =
692         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
693     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
694     break;
695   }
696   case OMPRTL__kmpc_omp_task_alloc: {
697     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
698     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
699     // kmp_routine_entry_t *task_entry);
700     assert(KmpRoutineEntryPtrTy != nullptr &&
701            "Type kmp_routine_entry_t must be created.");
702     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
703                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
704     // Return void * and then cast to particular kmp_task_t type.
705     llvm::FunctionType *FnTy =
706         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
707     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
708     break;
709   }
710   case OMPRTL__kmpc_omp_task: {
711     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
712     // *new_task);
713     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
714                                 CGM.VoidPtrTy};
715     llvm::FunctionType *FnTy =
716         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
717     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
718     break;
719   }
720   case OMPRTL__kmpc_copyprivate: {
721     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
722     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
723     // kmp_int32 didit);
724     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
725     auto *CpyFnTy =
726         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
727     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
728                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
729                                 CGM.Int32Ty};
730     llvm::FunctionType *FnTy =
731         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
732     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
733     break;
734   }
735   case OMPRTL__kmpc_reduce: {
736     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
737     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
738     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
739     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
740     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
741                                                /*isVarArg=*/false);
742     llvm::Type *TypeParams[] = {
743         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
744         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
745         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
746     llvm::FunctionType *FnTy =
747         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
748     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
749     break;
750   }
751   case OMPRTL__kmpc_reduce_nowait: {
752     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
753     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
754     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
755     // *lck);
756     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
757     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
758                                                /*isVarArg=*/false);
759     llvm::Type *TypeParams[] = {
760         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
761         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
762         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
763     llvm::FunctionType *FnTy =
764         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
765     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
766     break;
767   }
768   case OMPRTL__kmpc_end_reduce: {
769     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
770     // kmp_critical_name *lck);
771     llvm::Type *TypeParams[] = {
772         getIdentTyPointerTy(), CGM.Int32Ty,
773         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
774     llvm::FunctionType *FnTy =
775         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
776     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
777     break;
778   }
779   case OMPRTL__kmpc_end_reduce_nowait: {
780     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
781     // kmp_critical_name *lck);
782     llvm::Type *TypeParams[] = {
783         getIdentTyPointerTy(), CGM.Int32Ty,
784         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
785     llvm::FunctionType *FnTy =
786         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
787     RTLFn =
788         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
789     break;
790   }
791   case OMPRTL__kmpc_omp_task_begin_if0: {
792     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
793     // *new_task);
794     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
795                                 CGM.VoidPtrTy};
796     llvm::FunctionType *FnTy =
797         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
798     RTLFn =
799         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
800     break;
801   }
802   case OMPRTL__kmpc_omp_task_complete_if0: {
803     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
804     // *new_task);
805     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
806                                 CGM.VoidPtrTy};
807     llvm::FunctionType *FnTy =
808         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
809     RTLFn = CGM.CreateRuntimeFunction(FnTy,
810                                       /*Name=*/"__kmpc_omp_task_complete_if0");
811     break;
812   }
813   case OMPRTL__kmpc_ordered: {
814     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
815     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
816     llvm::FunctionType *FnTy =
817         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
818     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
819     break;
820   }
821   case OMPRTL__kmpc_end_ordered: {
822     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
823     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
824     llvm::FunctionType *FnTy =
825         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
826     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
827     break;
828   }
829   case OMPRTL__kmpc_omp_taskwait: {
830     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
831     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
832     llvm::FunctionType *FnTy =
833         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
834     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
835     break;
836   }
837   case OMPRTL__kmpc_taskgroup: {
838     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
839     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
840     llvm::FunctionType *FnTy =
841         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
842     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
843     break;
844   }
845   case OMPRTL__kmpc_end_taskgroup: {
846     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
847     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
848     llvm::FunctionType *FnTy =
849         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
850     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
851     break;
852   }
853   case OMPRTL__kmpc_push_proc_bind: {
854     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
855     // int proc_bind)
856     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
857     llvm::FunctionType *FnTy =
858         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
859     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
860     break;
861   }
862   case OMPRTL__kmpc_omp_task_with_deps: {
863     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
864     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
865     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
866     llvm::Type *TypeParams[] = {
867         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
868         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
869     llvm::FunctionType *FnTy =
870         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
871     RTLFn =
872         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
873     break;
874   }
875   case OMPRTL__kmpc_omp_wait_deps: {
876     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
877     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
878     // kmp_depend_info_t *noalias_dep_list);
879     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
880                                 CGM.Int32Ty,           CGM.VoidPtrTy,
881                                 CGM.Int32Ty,           CGM.VoidPtrTy};
882     llvm::FunctionType *FnTy =
883         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
884     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
885     break;
886   }
887   case OMPRTL__kmpc_cancellationpoint: {
888     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
889     // global_tid, kmp_int32 cncl_kind)
890     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
891     llvm::FunctionType *FnTy =
892         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
893     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
894     break;
895   }
896   case OMPRTL__kmpc_cancel: {
897     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
898     // kmp_int32 cncl_kind)
899     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
900     llvm::FunctionType *FnTy =
901         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
902     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
903     break;
904   }
905   case OMPRTL__tgt_target: {
906     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
907     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
908     // *arg_types);
909     llvm::Type *TypeParams[] = {CGM.Int32Ty,
910                                 CGM.VoidPtrTy,
911                                 CGM.Int32Ty,
912                                 CGM.VoidPtrPtrTy,
913                                 CGM.VoidPtrPtrTy,
914                                 CGM.SizeTy->getPointerTo(),
915                                 CGM.Int32Ty->getPointerTo()};
916     llvm::FunctionType *FnTy =
917         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
918     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
919     break;
920   }
921   }
922   return RTLFn;
923 }
924 
925 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
926                                                              bool IVSigned) {
927   assert((IVSize == 32 || IVSize == 64) &&
928          "IV size is not compatible with the omp runtime");
929   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
930                                        : "__kmpc_for_static_init_4u")
931                            : (IVSigned ? "__kmpc_for_static_init_8"
932                                        : "__kmpc_for_static_init_8u");
933   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
934   auto PtrTy = llvm::PointerType::getUnqual(ITy);
935   llvm::Type *TypeParams[] = {
936     getIdentTyPointerTy(),                     // loc
937     CGM.Int32Ty,                               // tid
938     CGM.Int32Ty,                               // schedtype
939     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
940     PtrTy,                                     // p_lower
941     PtrTy,                                     // p_upper
942     PtrTy,                                     // p_stride
943     ITy,                                       // incr
944     ITy                                        // chunk
945   };
946   llvm::FunctionType *FnTy =
947       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
948   return CGM.CreateRuntimeFunction(FnTy, Name);
949 }
950 
951 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
952                                                             bool IVSigned) {
953   assert((IVSize == 32 || IVSize == 64) &&
954          "IV size is not compatible with the omp runtime");
955   auto Name =
956       IVSize == 32
957           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
958           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
959   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
960   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
961                                CGM.Int32Ty,           // tid
962                                CGM.Int32Ty,           // schedtype
963                                ITy,                   // lower
964                                ITy,                   // upper
965                                ITy,                   // stride
966                                ITy                    // chunk
967   };
968   llvm::FunctionType *FnTy =
969       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
970   return CGM.CreateRuntimeFunction(FnTy, Name);
971 }
972 
973 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
974                                                             bool IVSigned) {
975   assert((IVSize == 32 || IVSize == 64) &&
976          "IV size is not compatible with the omp runtime");
977   auto Name =
978       IVSize == 32
979           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
980           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
981   llvm::Type *TypeParams[] = {
982       getIdentTyPointerTy(), // loc
983       CGM.Int32Ty,           // tid
984   };
985   llvm::FunctionType *FnTy =
986       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
987   return CGM.CreateRuntimeFunction(FnTy, Name);
988 }
989 
990 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
991                                                             bool IVSigned) {
992   assert((IVSize == 32 || IVSize == 64) &&
993          "IV size is not compatible with the omp runtime");
994   auto Name =
995       IVSize == 32
996           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
997           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
998   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
999   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1000   llvm::Type *TypeParams[] = {
1001     getIdentTyPointerTy(),                     // loc
1002     CGM.Int32Ty,                               // tid
1003     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1004     PtrTy,                                     // p_lower
1005     PtrTy,                                     // p_upper
1006     PtrTy                                      // p_stride
1007   };
1008   llvm::FunctionType *FnTy =
1009       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1010   return CGM.CreateRuntimeFunction(FnTy, Name);
1011 }
1012 
1013 llvm::Constant *
1014 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1015   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1016          !CGM.getContext().getTargetInfo().isTLSSupported());
1017   // Lookup the entry, lazily creating it if necessary.
1018   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1019                                      Twine(CGM.getMangledName(VD)) + ".cache.");
1020 }
1021 
1022 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1023                                                 const VarDecl *VD,
1024                                                 Address VDAddr,
1025                                                 SourceLocation Loc) {
1026   if (CGM.getLangOpts().OpenMPUseTLS &&
1027       CGM.getContext().getTargetInfo().isTLSSupported())
1028     return VDAddr;
1029 
1030   auto VarTy = VDAddr.getElementType();
1031   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1032                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1033                                                        CGM.Int8PtrTy),
1034                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1035                          getOrCreateThreadPrivateCache(VD)};
1036   return Address(CGF.EmitRuntimeCall(
1037       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1038                  VDAddr.getAlignment());
1039 }
1040 
1041 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1042     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1043     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1044   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1045   // library.
1046   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1047   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1048                       OMPLoc);
1049   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1050   // to register constructor/destructor for variable.
1051   llvm::Value *Args[] = {OMPLoc,
1052                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1053                                                        CGM.VoidPtrTy),
1054                          Ctor, CopyCtor, Dtor};
1055   CGF.EmitRuntimeCall(
1056       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1057 }
1058 
1059 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1060     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1061     bool PerformInit, CodeGenFunction *CGF) {
1062   if (CGM.getLangOpts().OpenMPUseTLS &&
1063       CGM.getContext().getTargetInfo().isTLSSupported())
1064     return nullptr;
1065 
1066   VD = VD->getDefinition(CGM.getContext());
1067   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1068     ThreadPrivateWithDefinition.insert(VD);
1069     QualType ASTTy = VD->getType();
1070 
1071     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1072     auto Init = VD->getAnyInitializer();
1073     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1074       // Generate function that re-emits the declaration's initializer into the
1075       // threadprivate copy of the variable VD
1076       CodeGenFunction CtorCGF(CGM);
1077       FunctionArgList Args;
1078       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1079                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1080       Args.push_back(&Dst);
1081 
1082       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1083           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
1084           /*isVariadic=*/false);
1085       auto FTy = CGM.getTypes().GetFunctionType(FI);
1086       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1087           FTy, ".__kmpc_global_ctor_.", Loc);
1088       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1089                             Args, SourceLocation());
1090       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1091           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1092           CGM.getContext().VoidPtrTy, Dst.getLocation());
1093       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1094       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1095                                              CtorCGF.ConvertTypeForMem(ASTTy));
1096       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1097                                /*IsInitializer=*/true);
1098       ArgVal = CtorCGF.EmitLoadOfScalar(
1099           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1100           CGM.getContext().VoidPtrTy, Dst.getLocation());
1101       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1102       CtorCGF.FinishFunction();
1103       Ctor = Fn;
1104     }
1105     if (VD->getType().isDestructedType() != QualType::DK_none) {
1106       // Generate function that emits destructor call for the threadprivate copy
1107       // of the variable VD
1108       CodeGenFunction DtorCGF(CGM);
1109       FunctionArgList Args;
1110       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1111                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1112       Args.push_back(&Dst);
1113 
1114       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1115           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
1116           /*isVariadic=*/false);
1117       auto FTy = CGM.getTypes().GetFunctionType(FI);
1118       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1119           FTy, ".__kmpc_global_dtor_.", Loc);
1120       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1121                             SourceLocation());
1122       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1123           DtorCGF.GetAddrOfLocalVar(&Dst),
1124           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1125       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1126                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1127                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1128       DtorCGF.FinishFunction();
1129       Dtor = Fn;
1130     }
1131     // Do not emit init function if it is not required.
1132     if (!Ctor && !Dtor)
1133       return nullptr;
1134 
1135     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1136     auto CopyCtorTy =
1137         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1138                                 /*isVarArg=*/false)->getPointerTo();
1139     // Copying constructor for the threadprivate variable.
1140     // Must be NULL - reserved by runtime, but currently it requires that this
1141     // parameter is always NULL. Otherwise it fires assertion.
1142     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1143     if (Ctor == nullptr) {
1144       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1145                                             /*isVarArg=*/false)->getPointerTo();
1146       Ctor = llvm::Constant::getNullValue(CtorTy);
1147     }
1148     if (Dtor == nullptr) {
1149       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1150                                             /*isVarArg=*/false)->getPointerTo();
1151       Dtor = llvm::Constant::getNullValue(DtorTy);
1152     }
1153     if (!CGF) {
1154       auto InitFunctionTy =
1155           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1156       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1157           InitFunctionTy, ".__omp_threadprivate_init_.");
1158       CodeGenFunction InitCGF(CGM);
1159       FunctionArgList ArgList;
1160       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1161                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1162                             Loc);
1163       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1164       InitCGF.FinishFunction();
1165       return InitFunction;
1166     }
1167     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1168   }
1169   return nullptr;
1170 }
1171 
1172 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1173 /// function. Here is the logic:
1174 /// if (Cond) {
1175 ///   ThenGen();
1176 /// } else {
1177 ///   ElseGen();
1178 /// }
1179 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1180                             const RegionCodeGenTy &ThenGen,
1181                             const RegionCodeGenTy &ElseGen) {
1182   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1183 
1184   // If the condition constant folds and can be elided, try to avoid emitting
1185   // the condition and the dead arm of the if/else.
1186   bool CondConstant;
1187   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1188     CodeGenFunction::RunCleanupsScope Scope(CGF);
1189     if (CondConstant) {
1190       ThenGen(CGF);
1191     } else {
1192       ElseGen(CGF);
1193     }
1194     return;
1195   }
1196 
1197   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1198   // emit the conditional branch.
1199   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1200   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1201   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1202   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1203 
1204   // Emit the 'then' code.
1205   CGF.EmitBlock(ThenBlock);
1206   {
1207     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1208     ThenGen(CGF);
1209   }
1210   CGF.EmitBranch(ContBlock);
1211   // Emit the 'else' code if present.
1212   {
1213     // There is no need to emit line number for unconditional branch.
1214     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1215     CGF.EmitBlock(ElseBlock);
1216   }
1217   {
1218     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1219     ElseGen(CGF);
1220   }
1221   {
1222     // There is no need to emit line number for unconditional branch.
1223     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1224     CGF.EmitBranch(ContBlock);
1225   }
1226   // Emit the continuation block for code after the if.
1227   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1228 }
1229 
1230 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1231                                        llvm::Value *OutlinedFn,
1232                                        ArrayRef<llvm::Value *> CapturedVars,
1233                                        const Expr *IfCond) {
1234   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1235   auto &&ThenGen = [this, OutlinedFn, CapturedVars,
1236                     RTLoc](CodeGenFunction &CGF) {
1237     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1238     llvm::Value *Args[] = {
1239         RTLoc,
1240         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1241         CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
1242     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1243     RealArgs.append(std::begin(Args), std::end(Args));
1244     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1245 
1246     auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
1247     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1248   };
1249   auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc,
1250                     Loc](CodeGenFunction &CGF) {
1251     auto ThreadID = getThreadID(CGF, Loc);
1252     // Build calls:
1253     // __kmpc_serialized_parallel(&Loc, GTid);
1254     llvm::Value *Args[] = {RTLoc, ThreadID};
1255     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
1256                         Args);
1257 
1258     // OutlinedFn(&GTid, &zero, CapturedStruct);
1259     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
1260     Address ZeroAddr =
1261       CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1262                            /*Name*/ ".zero.addr");
1263     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1264     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1265     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1266     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1267     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1268     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1269 
1270     // __kmpc_end_serialized_parallel(&Loc, GTid);
1271     llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
1272     CGF.EmitRuntimeCall(
1273         createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
1274   };
1275   if (IfCond) {
1276     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1277   } else {
1278     CodeGenFunction::RunCleanupsScope Scope(CGF);
1279     ThenGen(CGF);
1280   }
1281 }
1282 
1283 // If we're inside an (outlined) parallel region, use the region info's
1284 // thread-ID variable (it is passed in a first argument of the outlined function
1285 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1286 // regular serial code region, get thread ID by calling kmp_int32
1287 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1288 // return the address of that temp.
1289 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1290                                              SourceLocation Loc) {
1291   if (auto OMPRegionInfo =
1292           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1293     if (OMPRegionInfo->getThreadIDVariable())
1294       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1295 
1296   auto ThreadID = getThreadID(CGF, Loc);
1297   auto Int32Ty =
1298       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1299   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1300   CGF.EmitStoreOfScalar(ThreadID,
1301                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1302 
1303   return ThreadIDTemp;
1304 }
1305 
1306 llvm::Constant *
1307 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1308                                              const llvm::Twine &Name) {
1309   SmallString<256> Buffer;
1310   llvm::raw_svector_ostream Out(Buffer);
1311   Out << Name;
1312   auto RuntimeName = Out.str();
1313   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1314   if (Elem.second) {
1315     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1316            "OMP internal variable has different type than requested");
1317     return &*Elem.second;
1318   }
1319 
1320   return Elem.second = new llvm::GlobalVariable(
1321              CGM.getModule(), Ty, /*IsConstant*/ false,
1322              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1323              Elem.first());
1324 }
1325 
1326 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1327   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1328   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1329 }
1330 
1331 namespace {
1332 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup {
1333   llvm::Value *Callee;
1334   llvm::Value *Args[N];
1335 
1336 public:
1337   CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
1338       : Callee(Callee) {
1339     assert(CleanupArgs.size() == N);
1340     std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
1341   }
1342   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1343     CGF.EmitRuntimeCall(Callee, Args);
1344   }
1345 };
1346 } // anonymous namespace
1347 
1348 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1349                                          StringRef CriticalName,
1350                                          const RegionCodeGenTy &CriticalOpGen,
1351                                          SourceLocation Loc) {
1352   // __kmpc_critical(ident_t *, gtid, Lock);
1353   // CriticalOpGen();
1354   // __kmpc_end_critical(ident_t *, gtid, Lock);
1355   // Prepare arguments and build a call to __kmpc_critical
1356   {
1357     CodeGenFunction::RunCleanupsScope Scope(CGF);
1358     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1359                            getCriticalRegionLock(CriticalName)};
1360     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1361     // Build a call to __kmpc_end_critical
1362     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1363         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1364         llvm::makeArrayRef(Args));
1365     emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
1366   }
1367 }
1368 
1369 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1370                        OpenMPDirectiveKind Kind, SourceLocation Loc,
1371                        const RegionCodeGenTy &BodyOpGen) {
1372   llvm::Value *CallBool = CGF.EmitScalarConversion(
1373       IfCond,
1374       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1375       CGF.getContext().BoolTy, Loc);
1376 
1377   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1378   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1379   // Generate the branch (If-stmt)
1380   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1381   CGF.EmitBlock(ThenBlock);
1382   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
1383   // Emit the rest of bblocks/branches
1384   CGF.EmitBranch(ContBlock);
1385   CGF.EmitBlock(ContBlock, true);
1386 }
1387 
1388 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1389                                        const RegionCodeGenTy &MasterOpGen,
1390                                        SourceLocation Loc) {
1391   // if(__kmpc_master(ident_t *, gtid)) {
1392   //   MasterOpGen();
1393   //   __kmpc_end_master(ident_t *, gtid);
1394   // }
1395   // Prepare arguments and build a call to __kmpc_master
1396   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1397   auto *IsMaster =
1398       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1399   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1400       MasterCallEndCleanup;
1401   emitIfStmt(
1402       CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void {
1403         CodeGenFunction::RunCleanupsScope Scope(CGF);
1404         CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
1405             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1406             llvm::makeArrayRef(Args));
1407         MasterOpGen(CGF);
1408       });
1409 }
1410 
1411 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1412                                         SourceLocation Loc) {
1413   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1414   llvm::Value *Args[] = {
1415       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1416       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1417   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1418 }
1419 
1420 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
1421                                           const RegionCodeGenTy &TaskgroupOpGen,
1422                                           SourceLocation Loc) {
1423   // __kmpc_taskgroup(ident_t *, gtid);
1424   // TaskgroupOpGen();
1425   // __kmpc_end_taskgroup(ident_t *, gtid);
1426   // Prepare arguments and build a call to __kmpc_taskgroup
1427   {
1428     CodeGenFunction::RunCleanupsScope Scope(CGF);
1429     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1430     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
1431     // Build a call to __kmpc_end_taskgroup
1432     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1433         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
1434         llvm::makeArrayRef(Args));
1435     emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
1436   }
1437 }
1438 
1439 /// Given an array of pointers to variables, project the address of a
1440 /// given variable.
1441 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF,
1442                                       Address Array, unsigned Index,
1443                                       const VarDecl *Var) {
1444   // Pull out the pointer to the variable.
1445   Address PtrAddr =
1446     CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
1447   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
1448 
1449   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
1450   Addr = CGF.Builder.CreateElementBitCast(Addr,
1451                                       CGF.ConvertTypeForMem(Var->getType()));
1452   return Addr;
1453 }
1454 
1455 static llvm::Value *emitCopyprivateCopyFunction(
1456     CodeGenModule &CGM, llvm::Type *ArgsType,
1457     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1458     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1459   auto &C = CGM.getContext();
1460   // void copy_func(void *LHSArg, void *RHSArg);
1461   FunctionArgList Args;
1462   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1463                            C.VoidPtrTy);
1464   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1465                            C.VoidPtrTy);
1466   Args.push_back(&LHSArg);
1467   Args.push_back(&RHSArg);
1468   FunctionType::ExtInfo EI;
1469   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1470       C.VoidTy, Args, EI, /*isVariadic=*/false);
1471   auto *Fn = llvm::Function::Create(
1472       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1473       ".omp.copyprivate.copy_func", &CGM.getModule());
1474   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
1475   CodeGenFunction CGF(CGM);
1476   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1477   // Dest = (void*[n])(LHSArg);
1478   // Src = (void*[n])(RHSArg);
1479   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1480       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
1481       ArgsType), CGF.getPointerAlign());
1482   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1483       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
1484       ArgsType), CGF.getPointerAlign());
1485   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1486   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1487   // ...
1488   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1489   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1490     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
1491     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
1492 
1493     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
1494     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
1495 
1496     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
1497     QualType Type = VD->getType();
1498     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
1499   }
1500   CGF.FinishFunction();
1501   return Fn;
1502 }
1503 
1504 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1505                                        const RegionCodeGenTy &SingleOpGen,
1506                                        SourceLocation Loc,
1507                                        ArrayRef<const Expr *> CopyprivateVars,
1508                                        ArrayRef<const Expr *> SrcExprs,
1509                                        ArrayRef<const Expr *> DstExprs,
1510                                        ArrayRef<const Expr *> AssignmentOps) {
1511   assert(CopyprivateVars.size() == SrcExprs.size() &&
1512          CopyprivateVars.size() == DstExprs.size() &&
1513          CopyprivateVars.size() == AssignmentOps.size());
1514   auto &C = CGM.getContext();
1515   // int32 did_it = 0;
1516   // if(__kmpc_single(ident_t *, gtid)) {
1517   //   SingleOpGen();
1518   //   __kmpc_end_single(ident_t *, gtid);
1519   //   did_it = 1;
1520   // }
1521   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1522   // <copy_func>, did_it);
1523 
1524   Address DidIt = Address::invalid();
1525   if (!CopyprivateVars.empty()) {
1526     // int32 did_it = 0;
1527     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1528     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1529     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
1530   }
1531   // Prepare arguments and build a call to __kmpc_single
1532   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1533   auto *IsSingle =
1534       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1535   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1536       SingleCallEndCleanup;
1537   emitIfStmt(
1538       CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void {
1539         CodeGenFunction::RunCleanupsScope Scope(CGF);
1540         CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
1541             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1542             llvm::makeArrayRef(Args));
1543         SingleOpGen(CGF);
1544         if (DidIt.isValid()) {
1545           // did_it = 1;
1546           CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
1547         }
1548       });
1549   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1550   // <copy_func>, did_it);
1551   if (DidIt.isValid()) {
1552     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1553     auto CopyprivateArrayTy =
1554         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1555                                /*IndexTypeQuals=*/0);
1556     // Create a list of all private variables for copyprivate.
1557     Address CopyprivateList =
1558         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1559     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1560       Address Elem = CGF.Builder.CreateConstArrayGEP(
1561           CopyprivateList, I, CGF.getPointerSize());
1562       CGF.Builder.CreateStore(
1563           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1564               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
1565           Elem);
1566     }
1567     // Build function that copies private values from single region to all other
1568     // threads in the corresponding parallel region.
1569     auto *CpyFn = emitCopyprivateCopyFunction(
1570         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1571         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1572     auto *BufSize = llvm::ConstantInt::get(
1573         CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
1574     Address CL =
1575       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1576                                                       CGF.VoidPtrTy);
1577     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
1578     llvm::Value *Args[] = {
1579         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1580         getThreadID(CGF, Loc),        // i32 <gtid>
1581         BufSize,                      // size_t <buf_size>
1582         CL.getPointer(),              // void *<copyprivate list>
1583         CpyFn,                        // void (*) (void *, void *) <copy_func>
1584         DidItVal                      // i32 did_it
1585     };
1586     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1587   }
1588 }
1589 
1590 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
1591                                         const RegionCodeGenTy &OrderedOpGen,
1592                                         SourceLocation Loc, bool IsThreads) {
1593   // __kmpc_ordered(ident_t *, gtid);
1594   // OrderedOpGen();
1595   // __kmpc_end_ordered(ident_t *, gtid);
1596   // Prepare arguments and build a call to __kmpc_ordered
1597   CodeGenFunction::RunCleanupsScope Scope(CGF);
1598   if (IsThreads) {
1599     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1600     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
1601     // Build a call to __kmpc_end_ordered
1602     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1603         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
1604         llvm::makeArrayRef(Args));
1605   }
1606   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
1607 }
1608 
1609 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1610                                       OpenMPDirectiveKind Kind, bool EmitChecks,
1611                                       bool ForceSimpleCall) {
1612   // Build call __kmpc_cancel_barrier(loc, thread_id);
1613   // Build call __kmpc_barrier(loc, thread_id);
1614   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1615   if (Kind == OMPD_for) {
1616     Flags =
1617         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1618   } else if (Kind == OMPD_sections) {
1619     Flags = static_cast<OpenMPLocationFlags>(Flags |
1620                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
1621   } else if (Kind == OMPD_single) {
1622     Flags =
1623         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1624   } else if (Kind == OMPD_barrier) {
1625     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1626   } else {
1627     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1628   }
1629   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
1630   // thread_id);
1631   auto *OMPRegionInfo =
1632       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
1633   // Do not emit barrier call in the single directive emitted in some rare cases
1634   // for sections directives.
1635   if (OMPRegionInfo && OMPRegionInfo->getDirectiveKind() == OMPD_single)
1636     return;
1637   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1638                          getThreadID(CGF, Loc)};
1639   if (OMPRegionInfo) {
1640     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
1641       auto *Result = CGF.EmitRuntimeCall(
1642           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1643       if (EmitChecks) {
1644         // if (__kmpc_cancel_barrier()) {
1645         //   exit from construct;
1646         // }
1647         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
1648         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
1649         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
1650         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
1651         CGF.EmitBlock(ExitBB);
1652         //   exit from construct;
1653         auto CancelDestination =
1654             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
1655         CGF.EmitBranchThroughCleanup(CancelDestination);
1656         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
1657       }
1658       return;
1659     }
1660   }
1661   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
1662 }
1663 
1664 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1665 /// the enum sched_type in kmp.h).
1666 enum OpenMPSchedType {
1667   /// \brief Lower bound for default (unordered) versions.
1668   OMP_sch_lower = 32,
1669   OMP_sch_static_chunked = 33,
1670   OMP_sch_static = 34,
1671   OMP_sch_dynamic_chunked = 35,
1672   OMP_sch_guided_chunked = 36,
1673   OMP_sch_runtime = 37,
1674   OMP_sch_auto = 38,
1675   /// \brief Lower bound for 'ordered' versions.
1676   OMP_ord_lower = 64,
1677   OMP_ord_static_chunked = 65,
1678   OMP_ord_static = 66,
1679   OMP_ord_dynamic_chunked = 67,
1680   OMP_ord_guided_chunked = 68,
1681   OMP_ord_runtime = 69,
1682   OMP_ord_auto = 70,
1683   OMP_sch_default = OMP_sch_static,
1684 };
1685 
1686 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
1687 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1688                                           bool Chunked, bool Ordered) {
1689   switch (ScheduleKind) {
1690   case OMPC_SCHEDULE_static:
1691     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
1692                    : (Ordered ? OMP_ord_static : OMP_sch_static);
1693   case OMPC_SCHEDULE_dynamic:
1694     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
1695   case OMPC_SCHEDULE_guided:
1696     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
1697   case OMPC_SCHEDULE_runtime:
1698     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
1699   case OMPC_SCHEDULE_auto:
1700     return Ordered ? OMP_ord_auto : OMP_sch_auto;
1701   case OMPC_SCHEDULE_unknown:
1702     assert(!Chunked && "chunk was specified but schedule kind not known");
1703     return Ordered ? OMP_ord_static : OMP_sch_static;
1704   }
1705   llvm_unreachable("Unexpected runtime schedule");
1706 }
1707 
1708 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1709                                          bool Chunked) const {
1710   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
1711   return Schedule == OMP_sch_static;
1712 }
1713 
1714 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1715   auto Schedule =
1716       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
1717   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1718   return Schedule != OMP_sch_static;
1719 }
1720 
1721 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
1722                                           SourceLocation Loc,
1723                                           OpenMPScheduleClauseKind ScheduleKind,
1724                                           unsigned IVSize, bool IVSigned,
1725                                           bool Ordered, llvm::Value *UB,
1726                                           llvm::Value *Chunk) {
1727   OpenMPSchedType Schedule =
1728       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1729   assert(Ordered ||
1730          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
1731           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
1732   // Call __kmpc_dispatch_init(
1733   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1734   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1735   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1736 
1737   // If the Chunk was not specified in the clause - use default value 1.
1738   if (Chunk == nullptr)
1739     Chunk = CGF.Builder.getIntN(IVSize, 1);
1740   llvm::Value *Args[] = {
1741     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1742     getThreadID(CGF, Loc),
1743     CGF.Builder.getInt32(Schedule), // Schedule type
1744     CGF.Builder.getIntN(IVSize, 0), // Lower
1745     UB,                             // Upper
1746     CGF.Builder.getIntN(IVSize, 1), // Stride
1747     Chunk                           // Chunk
1748   };
1749   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1750 }
1751 
1752 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
1753                                         SourceLocation Loc,
1754                                         OpenMPScheduleClauseKind ScheduleKind,
1755                                         unsigned IVSize, bool IVSigned,
1756                                         bool Ordered, Address IL, Address LB,
1757                                         Address UB, Address ST,
1758                                         llvm::Value *Chunk) {
1759   OpenMPSchedType Schedule =
1760     getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1761   assert(!Ordered);
1762   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
1763          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked);
1764 
1765   // Call __kmpc_for_static_init(
1766   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1767   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1768   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1769   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1770   if (Chunk == nullptr) {
1771     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
1772            "expected static non-chunked schedule");
1773     // If the Chunk was not specified in the clause - use default value 1.
1774       Chunk = CGF.Builder.getIntN(IVSize, 1);
1775   } else {
1776     assert((Schedule == OMP_sch_static_chunked ||
1777             Schedule == OMP_ord_static_chunked) &&
1778            "expected static chunked schedule");
1779   }
1780   llvm::Value *Args[] = {
1781     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1782     getThreadID(CGF, Loc),
1783     CGF.Builder.getInt32(Schedule), // Schedule type
1784     IL.getPointer(),                // &isLastIter
1785     LB.getPointer(),                // &LB
1786     UB.getPointer(),                // &UB
1787     ST.getPointer(),                // &Stride
1788     CGF.Builder.getIntN(IVSize, 1), // Incr
1789     Chunk                           // Chunk
1790   };
1791   CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1792 }
1793 
1794 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
1795                                           SourceLocation Loc) {
1796   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1797   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1798                          getThreadID(CGF, Loc)};
1799   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1800                       Args);
1801 }
1802 
1803 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
1804                                                  SourceLocation Loc,
1805                                                  unsigned IVSize,
1806                                                  bool IVSigned) {
1807   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
1808   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1809                          getThreadID(CGF, Loc)};
1810   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
1811 }
1812 
1813 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1814                                           SourceLocation Loc, unsigned IVSize,
1815                                           bool IVSigned, Address IL,
1816                                           Address LB, Address UB,
1817                                           Address ST) {
1818   // Call __kmpc_dispatch_next(
1819   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1820   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1821   //          kmp_int[32|64] *p_stride);
1822   llvm::Value *Args[] = {
1823       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1824       IL.getPointer(), // &isLastIter
1825       LB.getPointer(), // &Lower
1826       UB.getPointer(), // &Upper
1827       ST.getPointer()  // &Stride
1828   };
1829   llvm::Value *Call =
1830       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1831   return CGF.EmitScalarConversion(
1832       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1833       CGF.getContext().BoolTy, Loc);
1834 }
1835 
1836 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1837                                            llvm::Value *NumThreads,
1838                                            SourceLocation Loc) {
1839   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1840   llvm::Value *Args[] = {
1841       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1842       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1843   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1844                       Args);
1845 }
1846 
1847 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
1848                                          OpenMPProcBindClauseKind ProcBind,
1849                                          SourceLocation Loc) {
1850   // Constants for proc bind value accepted by the runtime.
1851   enum ProcBindTy {
1852     ProcBindFalse = 0,
1853     ProcBindTrue,
1854     ProcBindMaster,
1855     ProcBindClose,
1856     ProcBindSpread,
1857     ProcBindIntel,
1858     ProcBindDefault
1859   } RuntimeProcBind;
1860   switch (ProcBind) {
1861   case OMPC_PROC_BIND_master:
1862     RuntimeProcBind = ProcBindMaster;
1863     break;
1864   case OMPC_PROC_BIND_close:
1865     RuntimeProcBind = ProcBindClose;
1866     break;
1867   case OMPC_PROC_BIND_spread:
1868     RuntimeProcBind = ProcBindSpread;
1869     break;
1870   case OMPC_PROC_BIND_unknown:
1871     llvm_unreachable("Unsupported proc_bind value.");
1872   }
1873   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
1874   llvm::Value *Args[] = {
1875       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1876       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
1877   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
1878 }
1879 
1880 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1881                                 SourceLocation Loc) {
1882   // Build call void __kmpc_flush(ident_t *loc)
1883   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1884                       emitUpdateLocation(CGF, Loc));
1885 }
1886 
1887 namespace {
1888 /// \brief Indexes of fields for type kmp_task_t.
1889 enum KmpTaskTFields {
1890   /// \brief List of shared variables.
1891   KmpTaskTShareds,
1892   /// \brief Task routine.
1893   KmpTaskTRoutine,
1894   /// \brief Partition id for the untied tasks.
1895   KmpTaskTPartId,
1896   /// \brief Function with call of destructors for private variables.
1897   KmpTaskTDestructors,
1898 };
1899 } // anonymous namespace
1900 
1901 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
1902   if (!KmpRoutineEntryPtrTy) {
1903     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
1904     auto &C = CGM.getContext();
1905     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
1906     FunctionProtoType::ExtProtoInfo EPI;
1907     KmpRoutineEntryPtrQTy = C.getPointerType(
1908         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
1909     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
1910   }
1911 }
1912 
1913 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1914                                        QualType FieldTy) {
1915   auto *Field = FieldDecl::Create(
1916       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1917       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1918       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1919   Field->setAccess(AS_public);
1920   DC->addDecl(Field);
1921   return Field;
1922 }
1923 
1924 namespace {
1925 struct PrivateHelpersTy {
1926   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
1927                    const VarDecl *PrivateElemInit)
1928       : Original(Original), PrivateCopy(PrivateCopy),
1929         PrivateElemInit(PrivateElemInit) {}
1930   const VarDecl *Original;
1931   const VarDecl *PrivateCopy;
1932   const VarDecl *PrivateElemInit;
1933 };
1934 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
1935 } // anonymous namespace
1936 
1937 static RecordDecl *
1938 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
1939   if (!Privates.empty()) {
1940     auto &C = CGM.getContext();
1941     // Build struct .kmp_privates_t. {
1942     //         /*  private vars  */
1943     //       };
1944     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
1945     RD->startDefinition();
1946     for (auto &&Pair : Privates) {
1947       auto *VD = Pair.second.Original;
1948       auto Type = VD->getType();
1949       Type = Type.getNonReferenceType();
1950       auto *FD = addFieldToRecordDecl(C, RD, Type);
1951       if (VD->hasAttrs()) {
1952         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
1953              E(VD->getAttrs().end());
1954              I != E; ++I)
1955           FD->addAttr(*I);
1956       }
1957     }
1958     RD->completeDefinition();
1959     return RD;
1960   }
1961   return nullptr;
1962 }
1963 
1964 static RecordDecl *
1965 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
1966                          QualType KmpRoutineEntryPointerQTy) {
1967   auto &C = CGM.getContext();
1968   // Build struct kmp_task_t {
1969   //         void *              shareds;
1970   //         kmp_routine_entry_t routine;
1971   //         kmp_int32           part_id;
1972   //         kmp_routine_entry_t destructors;
1973   //       };
1974   auto *RD = C.buildImplicitRecord("kmp_task_t");
1975   RD->startDefinition();
1976   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1977   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1978   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1979   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1980   RD->completeDefinition();
1981   return RD;
1982 }
1983 
1984 static RecordDecl *
1985 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
1986                                      ArrayRef<PrivateDataTy> Privates) {
1987   auto &C = CGM.getContext();
1988   // Build struct kmp_task_t_with_privates {
1989   //         kmp_task_t task_data;
1990   //         .kmp_privates_t. privates;
1991   //       };
1992   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
1993   RD->startDefinition();
1994   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
1995   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
1996     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
1997   }
1998   RD->completeDefinition();
1999   return RD;
2000 }
2001 
2002 /// \brief Emit a proxy function which accepts kmp_task_t as the second
2003 /// argument.
2004 /// \code
2005 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2006 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
2007 ///   tt->shareds);
2008 ///   return 0;
2009 /// }
2010 /// \endcode
2011 static llvm::Value *
2012 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2013                       QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
2014                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2015                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
2016                       llvm::Value *TaskPrivatesMap) {
2017   auto &C = CGM.getContext();
2018   FunctionArgList Args;
2019   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2020   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2021                                 /*Id=*/nullptr,
2022                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2023   Args.push_back(&GtidArg);
2024   Args.push_back(&TaskTypeArg);
2025   FunctionType::ExtInfo Info;
2026   auto &TaskEntryFnInfo =
2027       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2028                                                     /*isVariadic=*/false);
2029   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2030   auto *TaskEntry =
2031       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
2032                              ".omp_task_entry.", &CGM.getModule());
2033   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
2034   CodeGenFunction CGF(CGM);
2035   CGF.disableDebugInfo();
2036   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
2037 
2038   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
2039   // tt->task_data.shareds);
2040   auto *GtidParam = CGF.EmitLoadOfScalar(
2041       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
2042   LValue TDBase = emitLoadOfPointerLValue(
2043       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2044   auto *KmpTaskTWithPrivatesQTyRD =
2045       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2046   LValue Base =
2047       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
2048   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2049   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
2050   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
2051   auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
2052 
2053   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
2054   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
2055   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2056       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
2057       CGF.ConvertTypeForMem(SharedsPtrTy));
2058 
2059   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
2060   llvm::Value *PrivatesParam;
2061   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
2062     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
2063     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2064         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
2065   } else {
2066     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2067   }
2068 
2069   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
2070                              TaskPrivatesMap, SharedsParam};
2071   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
2072   CGF.EmitStoreThroughLValue(
2073       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
2074       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
2075   CGF.FinishFunction();
2076   return TaskEntry;
2077 }
2078 
2079 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
2080                                             SourceLocation Loc,
2081                                             QualType KmpInt32Ty,
2082                                             QualType KmpTaskTWithPrivatesPtrQTy,
2083                                             QualType KmpTaskTWithPrivatesQTy) {
2084   auto &C = CGM.getContext();
2085   FunctionArgList Args;
2086   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2087   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2088                                 /*Id=*/nullptr,
2089                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2090   Args.push_back(&GtidArg);
2091   Args.push_back(&TaskTypeArg);
2092   FunctionType::ExtInfo Info;
2093   auto &DestructorFnInfo =
2094       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2095                                                     /*isVariadic=*/false);
2096   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
2097   auto *DestructorFn =
2098       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
2099                              ".omp_task_destructor.", &CGM.getModule());
2100   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, DestructorFnInfo, DestructorFn);
2101   CodeGenFunction CGF(CGM);
2102   CGF.disableDebugInfo();
2103   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
2104                     Args);
2105 
2106   LValue Base = emitLoadOfPointerLValue(
2107       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2108   auto *KmpTaskTWithPrivatesQTyRD =
2109       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2110   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2111   Base = CGF.EmitLValueForField(Base, *FI);
2112   for (auto *Field :
2113        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
2114     if (auto DtorKind = Field->getType().isDestructedType()) {
2115       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
2116       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
2117     }
2118   }
2119   CGF.FinishFunction();
2120   return DestructorFn;
2121 }
2122 
2123 /// \brief Emit a privates mapping function for correct handling of private and
2124 /// firstprivate variables.
2125 /// \code
2126 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
2127 /// **noalias priv1,...,  <tyn> **noalias privn) {
2128 ///   *priv1 = &.privates.priv1;
2129 ///   ...;
2130 ///   *privn = &.privates.privn;
2131 /// }
2132 /// \endcode
2133 static llvm::Value *
2134 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
2135                                ArrayRef<const Expr *> PrivateVars,
2136                                ArrayRef<const Expr *> FirstprivateVars,
2137                                QualType PrivatesQTy,
2138                                ArrayRef<PrivateDataTy> Privates) {
2139   auto &C = CGM.getContext();
2140   FunctionArgList Args;
2141   ImplicitParamDecl TaskPrivatesArg(
2142       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2143       C.getPointerType(PrivatesQTy).withConst().withRestrict());
2144   Args.push_back(&TaskPrivatesArg);
2145   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
2146   unsigned Counter = 1;
2147   for (auto *E: PrivateVars) {
2148     Args.push_back(ImplicitParamDecl::Create(
2149         C, /*DC=*/nullptr, Loc,
2150         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2151                             .withConst()
2152                             .withRestrict()));
2153     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2154     PrivateVarsPos[VD] = Counter;
2155     ++Counter;
2156   }
2157   for (auto *E : FirstprivateVars) {
2158     Args.push_back(ImplicitParamDecl::Create(
2159         C, /*DC=*/nullptr, Loc,
2160         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2161                             .withConst()
2162                             .withRestrict()));
2163     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2164     PrivateVarsPos[VD] = Counter;
2165     ++Counter;
2166   }
2167   FunctionType::ExtInfo Info;
2168   auto &TaskPrivatesMapFnInfo =
2169       CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
2170                                                     /*isVariadic=*/false);
2171   auto *TaskPrivatesMapTy =
2172       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
2173   auto *TaskPrivatesMap = llvm::Function::Create(
2174       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
2175       ".omp_task_privates_map.", &CGM.getModule());
2176   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskPrivatesMapFnInfo,
2177                                 TaskPrivatesMap);
2178   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
2179   CodeGenFunction CGF(CGM);
2180   CGF.disableDebugInfo();
2181   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
2182                     TaskPrivatesMapFnInfo, Args);
2183 
2184   // *privi = &.privates.privi;
2185   LValue Base = emitLoadOfPointerLValue(
2186       CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType());
2187   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
2188   Counter = 0;
2189   for (auto *Field : PrivatesQTyRD->fields()) {
2190     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
2191     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
2192     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
2193     auto RefLoadLVal =
2194         emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType());
2195     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
2196     ++Counter;
2197   }
2198   CGF.FinishFunction();
2199   return TaskPrivatesMap;
2200 }
2201 
2202 static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) {
2203   auto &C = CGF.getContext();
2204   llvm::Value *Size;
2205   auto SizeInChars = C.getTypeSizeInChars(Ty);
2206   if (SizeInChars.isZero()) {
2207     // getTypeSizeInChars() returns 0 for a VLA.
2208     Size = nullptr;
2209     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
2210       llvm::Value *ArraySize;
2211       std::tie(ArraySize, Ty) = CGF.getVLASize(VAT);
2212       Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
2213     }
2214     SizeInChars = C.getTypeSizeInChars(Ty);
2215     assert(!SizeInChars.isZero());
2216     Size = CGF.Builder.CreateNUWMul(
2217         Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()));
2218   } else
2219     Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity());
2220   return Size;
2221 }
2222 
2223 static int array_pod_sort_comparator(const PrivateDataTy *P1,
2224                                      const PrivateDataTy *P2) {
2225   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
2226 }
2227 
2228 void CGOpenMPRuntime::emitTaskCall(
2229     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
2230     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
2231     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
2232     const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
2233     ArrayRef<const Expr *> PrivateCopies,
2234     ArrayRef<const Expr *> FirstprivateVars,
2235     ArrayRef<const Expr *> FirstprivateCopies,
2236     ArrayRef<const Expr *> FirstprivateInits,
2237     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
2238   auto &C = CGM.getContext();
2239   llvm::SmallVector<PrivateDataTy, 8> Privates;
2240   // Aggregate privates and sort them by the alignment.
2241   auto I = PrivateCopies.begin();
2242   for (auto *E : PrivateVars) {
2243     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2244     Privates.push_back(std::make_pair(
2245         C.getDeclAlign(VD),
2246         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2247                          /*PrivateElemInit=*/nullptr)));
2248     ++I;
2249   }
2250   I = FirstprivateCopies.begin();
2251   auto IElemInitRef = FirstprivateInits.begin();
2252   for (auto *E : FirstprivateVars) {
2253     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2254     Privates.push_back(std::make_pair(
2255         C.getDeclAlign(VD),
2256         PrivateHelpersTy(
2257             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2258             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
2259     ++I, ++IElemInitRef;
2260   }
2261   llvm::array_pod_sort(Privates.begin(), Privates.end(),
2262                        array_pod_sort_comparator);
2263   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2264   // Build type kmp_routine_entry_t (if not built yet).
2265   emitKmpRoutineEntryT(KmpInt32Ty);
2266   // Build type kmp_task_t (if not built yet).
2267   if (KmpTaskTQTy.isNull()) {
2268     KmpTaskTQTy = C.getRecordType(
2269         createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
2270   }
2271   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2272   // Build particular struct kmp_task_t for the given task.
2273   auto *KmpTaskTWithPrivatesQTyRD =
2274       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
2275   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
2276   QualType KmpTaskTWithPrivatesPtrQTy =
2277       C.getPointerType(KmpTaskTWithPrivatesQTy);
2278   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
2279   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
2280   auto KmpTaskTWithPrivatesTySize =
2281       CGM.getSize(C.getTypeSizeInChars(KmpTaskTWithPrivatesQTy));
2282   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
2283 
2284   // Emit initial values for private copies (if any).
2285   llvm::Value *TaskPrivatesMap = nullptr;
2286   auto *TaskPrivatesMapTy =
2287       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
2288                 3)
2289           ->getType();
2290   if (!Privates.empty()) {
2291     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2292     TaskPrivatesMap = emitTaskPrivateMappingFunction(
2293         CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
2294     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2295         TaskPrivatesMap, TaskPrivatesMapTy);
2296   } else {
2297     TaskPrivatesMap = llvm::ConstantPointerNull::get(
2298         cast<llvm::PointerType>(TaskPrivatesMapTy));
2299   }
2300   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
2301   // kmp_task_t *tt);
2302   auto *TaskEntry = emitProxyTaskFunction(
2303       CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
2304       KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
2305 
2306   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2307   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2308   // kmp_routine_entry_t *task_entry);
2309   // Task flags. Format is taken from
2310   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
2311   // description of kmp_tasking_flags struct.
2312   const unsigned TiedFlag = 0x1;
2313   const unsigned FinalFlag = 0x2;
2314   unsigned Flags = Tied ? TiedFlag : 0;
2315   auto *TaskFlags =
2316       Final.getPointer()
2317           ? CGF.Builder.CreateSelect(Final.getPointer(),
2318                                      CGF.Builder.getInt32(FinalFlag),
2319                                      CGF.Builder.getInt32(/*C=*/0))
2320           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
2321   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
2322   auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
2323   llvm::Value *AllocArgs[] = {
2324       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags,
2325       KmpTaskTWithPrivatesTySize, CGM.getSize(SharedsSize),
2326       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry,
2327                                                       KmpRoutineEntryPtrTy)};
2328   auto *NewTask = CGF.EmitRuntimeCall(
2329       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
2330   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2331       NewTask, KmpTaskTWithPrivatesPtrTy);
2332   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
2333                                                KmpTaskTWithPrivatesQTy);
2334   LValue TDBase =
2335       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
2336   // Fill the data in the resulting kmp_task_t record.
2337   // Copy shareds if there are any.
2338   Address KmpTaskSharedsPtr = Address::invalid();
2339   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
2340     KmpTaskSharedsPtr =
2341         Address(CGF.EmitLoadOfScalar(
2342                     CGF.EmitLValueForField(
2343                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
2344                                            KmpTaskTShareds)),
2345                     Loc),
2346                 CGF.getNaturalTypeAlignment(SharedsTy));
2347     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
2348   }
2349   // Emit initial values for private copies (if any).
2350   bool NeedsCleanup = false;
2351   if (!Privates.empty()) {
2352     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2353     auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
2354     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
2355     LValue SharedsBase;
2356     if (!FirstprivateVars.empty()) {
2357       SharedsBase = CGF.MakeAddrLValue(
2358           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2359               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
2360           SharedsTy);
2361     }
2362     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
2363         cast<CapturedStmt>(*D.getAssociatedStmt()));
2364     for (auto &&Pair : Privates) {
2365       auto *VD = Pair.second.PrivateCopy;
2366       auto *Init = VD->getAnyInitializer();
2367       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
2368       if (Init) {
2369         if (auto *Elem = Pair.second.PrivateElemInit) {
2370           auto *OriginalVD = Pair.second.Original;
2371           auto *SharedField = CapturesInfo.lookup(OriginalVD);
2372           auto SharedRefLValue =
2373               CGF.EmitLValueForField(SharedsBase, SharedField);
2374           SharedRefLValue = CGF.MakeAddrLValue(
2375               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
2376               SharedRefLValue.getType(), AlignmentSource::Decl);
2377           QualType Type = OriginalVD->getType();
2378           if (Type->isArrayType()) {
2379             // Initialize firstprivate array.
2380             if (!isa<CXXConstructExpr>(Init) ||
2381                 CGF.isTrivialInitializer(Init)) {
2382               // Perform simple memcpy.
2383               CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
2384                                       SharedRefLValue.getAddress(), Type);
2385             } else {
2386               // Initialize firstprivate array using element-by-element
2387               // intialization.
2388               CGF.EmitOMPAggregateAssign(
2389                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
2390                   Type, [&CGF, Elem, Init, &CapturesInfo](
2391                             Address DestElement, Address SrcElement) {
2392                     // Clean up any temporaries needed by the initialization.
2393                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
2394                     InitScope.addPrivate(Elem, [SrcElement]() -> Address {
2395                       return SrcElement;
2396                     });
2397                     (void)InitScope.Privatize();
2398                     // Emit initialization for single element.
2399                     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
2400                         CGF, &CapturesInfo);
2401                     CGF.EmitAnyExprToMem(Init, DestElement,
2402                                          Init->getType().getQualifiers(),
2403                                          /*IsInitializer=*/false);
2404                   });
2405             }
2406           } else {
2407             CodeGenFunction::OMPPrivateScope InitScope(CGF);
2408             InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
2409               return SharedRefLValue.getAddress();
2410             });
2411             (void)InitScope.Privatize();
2412             CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
2413             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
2414                                /*capturedByInit=*/false);
2415           }
2416         } else {
2417           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
2418         }
2419       }
2420       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
2421       ++FI;
2422     }
2423   }
2424   // Provide pointer to function with destructors for privates.
2425   llvm::Value *DestructorFn =
2426       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
2427                                              KmpTaskTWithPrivatesPtrQTy,
2428                                              KmpTaskTWithPrivatesQTy)
2429                    : llvm::ConstantPointerNull::get(
2430                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
2431   LValue Destructor = CGF.EmitLValueForField(
2432       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
2433   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2434                             DestructorFn, KmpRoutineEntryPtrTy),
2435                         Destructor);
2436 
2437   // Process list of dependences.
2438   Address DependenciesArray = Address::invalid();
2439   unsigned NumDependencies = Dependences.size();
2440   if (NumDependencies) {
2441     // Dependence kind for RTL.
2442     enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 };
2443     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
2444     RecordDecl *KmpDependInfoRD;
2445     QualType FlagsTy = C.getIntTypeForBitwidth(
2446         C.toBits(C.getTypeSizeInChars(C.BoolTy)), /*Signed=*/false);
2447     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
2448     if (KmpDependInfoTy.isNull()) {
2449       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
2450       KmpDependInfoRD->startDefinition();
2451       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
2452       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
2453       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
2454       KmpDependInfoRD->completeDefinition();
2455       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
2456     } else {
2457       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
2458     }
2459     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
2460     // Define type kmp_depend_info[<Dependences.size()>];
2461     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
2462         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
2463         ArrayType::Normal, /*IndexTypeQuals=*/0);
2464     // kmp_depend_info[<Dependences.size()>] deps;
2465     DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
2466     for (unsigned i = 0; i < NumDependencies; ++i) {
2467       const Expr *E = Dependences[i].second;
2468       auto Addr = CGF.EmitLValue(E);
2469       llvm::Value *Size;
2470       QualType Ty = E->getType();
2471       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
2472         LValue UpAddrLVal =
2473             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
2474         llvm::Value *UpAddr =
2475             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
2476         llvm::Value *LowIntPtr =
2477             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
2478         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
2479         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
2480       } else {
2481         Size = getTypeSize(CGF, Ty);
2482       }
2483       auto Base = CGF.MakeAddrLValue(
2484           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
2485           KmpDependInfoTy);
2486       // deps[i].base_addr = &<Dependences[i].second>;
2487       auto BaseAddrLVal = CGF.EmitLValueForField(
2488           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
2489       CGF.EmitStoreOfScalar(
2490           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
2491           BaseAddrLVal);
2492       // deps[i].len = sizeof(<Dependences[i].second>);
2493       auto LenLVal = CGF.EmitLValueForField(
2494           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
2495       CGF.EmitStoreOfScalar(Size, LenLVal);
2496       // deps[i].flags = <Dependences[i].first>;
2497       RTLDependenceKindTy DepKind;
2498       switch (Dependences[i].first) {
2499       case OMPC_DEPEND_in:
2500         DepKind = DepIn;
2501         break;
2502       case OMPC_DEPEND_out:
2503         DepKind = DepOut;
2504         break;
2505       case OMPC_DEPEND_inout:
2506         DepKind = DepInOut;
2507         break;
2508       case OMPC_DEPEND_unknown:
2509         llvm_unreachable("Unknown task dependence type");
2510       }
2511       auto FlagsLVal = CGF.EmitLValueForField(
2512           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
2513       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
2514                             FlagsLVal);
2515     }
2516     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2517         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
2518         CGF.VoidPtrTy);
2519   }
2520 
2521   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
2522   // libcall.
2523   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2524   // *new_task);
2525   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2526   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2527   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
2528   // list is not empty
2529   auto *ThreadID = getThreadID(CGF, Loc);
2530   auto *UpLoc = emitUpdateLocation(CGF, Loc);
2531   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
2532   llvm::Value *DepTaskArgs[7];
2533   if (NumDependencies) {
2534     DepTaskArgs[0] = UpLoc;
2535     DepTaskArgs[1] = ThreadID;
2536     DepTaskArgs[2] = NewTask;
2537     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
2538     DepTaskArgs[4] = DependenciesArray.getPointer();
2539     DepTaskArgs[5] = CGF.Builder.getInt32(0);
2540     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2541   }
2542   auto &&ThenCodeGen = [this, NumDependencies,
2543                         &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
2544     // TODO: add check for untied tasks.
2545     if (NumDependencies) {
2546       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
2547                           DepTaskArgs);
2548     } else {
2549       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
2550                           TaskArgs);
2551     }
2552   };
2553   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
2554       IfCallEndCleanup;
2555 
2556   llvm::Value *DepWaitTaskArgs[6];
2557   if (NumDependencies) {
2558     DepWaitTaskArgs[0] = UpLoc;
2559     DepWaitTaskArgs[1] = ThreadID;
2560     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
2561     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
2562     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
2563     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2564   }
2565   auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
2566                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
2567     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
2568     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2569     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
2570     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
2571     // is specified.
2572     if (NumDependencies)
2573       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
2574                           DepWaitTaskArgs);
2575     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
2576     // kmp_task_t *new_task);
2577     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
2578                         TaskArgs);
2579     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
2580     // kmp_task_t *new_task);
2581     CGF.EHStack.pushCleanup<IfCallEndCleanup>(
2582         NormalAndEHCleanup,
2583         createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
2584         llvm::makeArrayRef(TaskArgs));
2585 
2586     // Call proxy_task_entry(gtid, new_task);
2587     llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
2588     CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
2589   };
2590 
2591   if (IfCond) {
2592     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
2593   } else {
2594     CodeGenFunction::RunCleanupsScope Scope(CGF);
2595     ThenCodeGen(CGF);
2596   }
2597 }
2598 
2599 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
2600                                           llvm::Type *ArgsType,
2601                                           ArrayRef<const Expr *> LHSExprs,
2602                                           ArrayRef<const Expr *> RHSExprs,
2603                                           ArrayRef<const Expr *> ReductionOps) {
2604   auto &C = CGM.getContext();
2605 
2606   // void reduction_func(void *LHSArg, void *RHSArg);
2607   FunctionArgList Args;
2608   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2609                            C.VoidPtrTy);
2610   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2611                            C.VoidPtrTy);
2612   Args.push_back(&LHSArg);
2613   Args.push_back(&RHSArg);
2614   FunctionType::ExtInfo EI;
2615   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
2616       C.VoidTy, Args, EI, /*isVariadic=*/false);
2617   auto *Fn = llvm::Function::Create(
2618       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2619       ".omp.reduction.reduction_func", &CGM.getModule());
2620   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
2621   CodeGenFunction CGF(CGM);
2622   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2623 
2624   // Dst = (void*[n])(LHSArg);
2625   // Src = (void*[n])(RHSArg);
2626   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2627       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2628       ArgsType), CGF.getPointerAlign());
2629   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2630       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2631       ArgsType), CGF.getPointerAlign());
2632 
2633   //  ...
2634   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
2635   //  ...
2636   CodeGenFunction::OMPPrivateScope Scope(CGF);
2637   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
2638     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
2639     Scope.addPrivate(RHSVar, [&]() -> Address {
2640       return emitAddrOfVarFromArray(CGF, RHS, I, RHSVar);
2641     });
2642     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
2643     Scope.addPrivate(LHSVar, [&]() -> Address {
2644       return emitAddrOfVarFromArray(CGF, LHS, I, LHSVar);
2645     });
2646   }
2647   Scope.Privatize();
2648   for (auto *E : ReductionOps) {
2649     CGF.EmitIgnoredExpr(E);
2650   }
2651   Scope.ForceCleanup();
2652   CGF.FinishFunction();
2653   return Fn;
2654 }
2655 
2656 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
2657                                     ArrayRef<const Expr *> LHSExprs,
2658                                     ArrayRef<const Expr *> RHSExprs,
2659                                     ArrayRef<const Expr *> ReductionOps,
2660                                     bool WithNowait, bool SimpleReduction) {
2661   // Next code should be emitted for reduction:
2662   //
2663   // static kmp_critical_name lock = { 0 };
2664   //
2665   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
2666   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
2667   //  ...
2668   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
2669   //  *(Type<n>-1*)rhs[<n>-1]);
2670   // }
2671   //
2672   // ...
2673   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
2674   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2675   // RedList, reduce_func, &<lock>)) {
2676   // case 1:
2677   //  ...
2678   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2679   //  ...
2680   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2681   // break;
2682   // case 2:
2683   //  ...
2684   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2685   //  ...
2686   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
2687   // break;
2688   // default:;
2689   // }
2690   //
2691   // if SimpleReduction is true, only the next code is generated:
2692   //  ...
2693   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2694   //  ...
2695 
2696   auto &C = CGM.getContext();
2697 
2698   if (SimpleReduction) {
2699     CodeGenFunction::RunCleanupsScope Scope(CGF);
2700     for (auto *E : ReductionOps) {
2701       CGF.EmitIgnoredExpr(E);
2702     }
2703     return;
2704   }
2705 
2706   // 1. Build a list of reduction variables.
2707   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
2708   llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size());
2709   QualType ReductionArrayTy =
2710       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2711                              /*IndexTypeQuals=*/0);
2712   Address ReductionList =
2713       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
2714   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
2715     Address Elem =
2716       CGF.Builder.CreateConstArrayGEP(ReductionList, I, CGF.getPointerSize());
2717     CGF.Builder.CreateStore(
2718         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2719             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
2720         Elem);
2721   }
2722 
2723   // 2. Emit reduce_func().
2724   auto *ReductionFn = emitReductionFunction(
2725       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs,
2726       RHSExprs, ReductionOps);
2727 
2728   // 3. Create static kmp_critical_name lock = { 0 };
2729   auto *Lock = getCriticalRegionLock(".reduction");
2730 
2731   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2732   // RedList, reduce_func, &<lock>);
2733   auto *IdentTLoc = emitUpdateLocation(
2734       CGF, Loc,
2735       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
2736   auto *ThreadId = getThreadID(CGF, Loc);
2737   auto *ReductionArrayTySize = llvm::ConstantInt::get(
2738       CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
2739   auto *RL =
2740     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
2741                                                     CGF.VoidPtrTy);
2742   llvm::Value *Args[] = {
2743       IdentTLoc,                             // ident_t *<loc>
2744       ThreadId,                              // i32 <gtid>
2745       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
2746       ReductionArrayTySize,                  // size_type sizeof(RedList)
2747       RL,                                    // void *RedList
2748       ReductionFn, // void (*) (void *, void *) <reduce_func>
2749       Lock         // kmp_critical_name *&<lock>
2750   };
2751   auto Res = CGF.EmitRuntimeCall(
2752       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
2753                                        : OMPRTL__kmpc_reduce),
2754       Args);
2755 
2756   // 5. Build switch(res)
2757   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
2758   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
2759 
2760   // 6. Build case 1:
2761   //  ...
2762   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2763   //  ...
2764   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2765   // break;
2766   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
2767   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
2768   CGF.EmitBlock(Case1BB);
2769 
2770   {
2771     CodeGenFunction::RunCleanupsScope Scope(CGF);
2772     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2773     llvm::Value *EndArgs[] = {
2774         IdentTLoc, // ident_t *<loc>
2775         ThreadId,  // i32 <gtid>
2776         Lock       // kmp_critical_name *&<lock>
2777     };
2778     CGF.EHStack
2779         .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2780             NormalAndEHCleanup,
2781             createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
2782                                              : OMPRTL__kmpc_end_reduce),
2783             llvm::makeArrayRef(EndArgs));
2784     for (auto *E : ReductionOps) {
2785       CGF.EmitIgnoredExpr(E);
2786     }
2787   }
2788 
2789   CGF.EmitBranch(DefaultBB);
2790 
2791   // 7. Build case 2:
2792   //  ...
2793   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2794   //  ...
2795   // break;
2796   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
2797   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
2798   CGF.EmitBlock(Case2BB);
2799 
2800   {
2801     CodeGenFunction::RunCleanupsScope Scope(CGF);
2802     if (!WithNowait) {
2803       // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
2804       llvm::Value *EndArgs[] = {
2805           IdentTLoc, // ident_t *<loc>
2806           ThreadId,  // i32 <gtid>
2807           Lock       // kmp_critical_name *&<lock>
2808       };
2809       CGF.EHStack
2810           .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2811               NormalAndEHCleanup,
2812               createRuntimeFunction(OMPRTL__kmpc_end_reduce),
2813               llvm::makeArrayRef(EndArgs));
2814     }
2815     auto I = LHSExprs.begin();
2816     for (auto *E : ReductionOps) {
2817       const Expr *XExpr = nullptr;
2818       const Expr *EExpr = nullptr;
2819       const Expr *UpExpr = nullptr;
2820       BinaryOperatorKind BO = BO_Comma;
2821       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
2822         if (BO->getOpcode() == BO_Assign) {
2823           XExpr = BO->getLHS();
2824           UpExpr = BO->getRHS();
2825         }
2826       }
2827       // Try to emit update expression as a simple atomic.
2828       auto *RHSExpr = UpExpr;
2829       if (RHSExpr) {
2830         // Analyze RHS part of the whole expression.
2831         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
2832                 RHSExpr->IgnoreParenImpCasts())) {
2833           // If this is a conditional operator, analyze its condition for
2834           // min/max reduction operator.
2835           RHSExpr = ACO->getCond();
2836         }
2837         if (auto *BORHS =
2838                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
2839           EExpr = BORHS->getRHS();
2840           BO = BORHS->getOpcode();
2841         }
2842       }
2843       if (XExpr) {
2844         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2845         LValue X = CGF.EmitLValue(XExpr);
2846         RValue E;
2847         if (EExpr)
2848           E = CGF.EmitAnyExpr(EExpr);
2849         CGF.EmitOMPAtomicSimpleUpdateExpr(
2850             X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
2851             [&CGF, UpExpr, VD](RValue XRValue) {
2852               CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
2853               PrivateScope.addPrivate(
2854                   VD, [&CGF, VD, XRValue]() -> Address {
2855                     Address LHSTemp = CGF.CreateMemTemp(VD->getType());
2856                     CGF.EmitStoreThroughLValue(
2857                         XRValue,
2858                         CGF.MakeAddrLValue(LHSTemp, VD->getType()));
2859                     return LHSTemp;
2860                   });
2861               (void)PrivateScope.Privatize();
2862               return CGF.EmitAnyExpr(UpExpr);
2863             });
2864       } else {
2865         // Emit as a critical region.
2866         emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) {
2867           CGF.EmitIgnoredExpr(E);
2868         }, Loc);
2869       }
2870       ++I;
2871     }
2872   }
2873 
2874   CGF.EmitBranch(DefaultBB);
2875   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
2876 }
2877 
2878 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
2879                                        SourceLocation Loc) {
2880   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
2881   // global_tid);
2882   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2883   // Ignore return result until untied tasks are supported.
2884   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
2885 }
2886 
2887 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
2888                                            OpenMPDirectiveKind InnerKind,
2889                                            const RegionCodeGenTy &CodeGen,
2890                                            bool HasCancel) {
2891   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
2892   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
2893 }
2894 
2895 namespace {
2896 enum RTCancelKind {
2897   CancelNoreq = 0,
2898   CancelParallel = 1,
2899   CancelLoop = 2,
2900   CancelSections = 3,
2901   CancelTaskgroup = 4
2902 };
2903 }
2904 
2905 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
2906   RTCancelKind CancelKind = CancelNoreq;
2907   if (CancelRegion == OMPD_parallel)
2908     CancelKind = CancelParallel;
2909   else if (CancelRegion == OMPD_for)
2910     CancelKind = CancelLoop;
2911   else if (CancelRegion == OMPD_sections)
2912     CancelKind = CancelSections;
2913   else {
2914     assert(CancelRegion == OMPD_taskgroup);
2915     CancelKind = CancelTaskgroup;
2916   }
2917   return CancelKind;
2918 }
2919 
2920 void CGOpenMPRuntime::emitCancellationPointCall(
2921     CodeGenFunction &CGF, SourceLocation Loc,
2922     OpenMPDirectiveKind CancelRegion) {
2923   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2924   // global_tid, kmp_int32 cncl_kind);
2925   if (auto *OMPRegionInfo =
2926           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2927     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
2928       return;
2929     if (OMPRegionInfo->hasCancel()) {
2930       llvm::Value *Args[] = {
2931           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2932           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
2933       // Ignore return result until untied tasks are supported.
2934       auto *Result = CGF.EmitRuntimeCall(
2935           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
2936       // if (__kmpc_cancellationpoint()) {
2937       //  __kmpc_cancel_barrier();
2938       //   exit from construct;
2939       // }
2940       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2941       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2942       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2943       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2944       CGF.EmitBlock(ExitBB);
2945       // __kmpc_cancel_barrier();
2946       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
2947       // exit from construct;
2948       auto CancelDest =
2949           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2950       CGF.EmitBranchThroughCleanup(CancelDest);
2951       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2952     }
2953   }
2954 }
2955 
2956 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
2957                                      const Expr *IfCond,
2958                                      OpenMPDirectiveKind CancelRegion) {
2959   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2960   // kmp_int32 cncl_kind);
2961   if (auto *OMPRegionInfo =
2962           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2963     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
2964       return;
2965     auto &&ThenGen = [this, Loc, CancelRegion,
2966                       OMPRegionInfo](CodeGenFunction &CGF) {
2967       llvm::Value *Args[] = {
2968           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2969           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
2970       // Ignore return result until untied tasks are supported.
2971       auto *Result =
2972           CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
2973       // if (__kmpc_cancel()) {
2974       //  __kmpc_cancel_barrier();
2975       //   exit from construct;
2976       // }
2977       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2978       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2979       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2980       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2981       CGF.EmitBlock(ExitBB);
2982       // __kmpc_cancel_barrier();
2983       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
2984       // exit from construct;
2985       auto CancelDest =
2986           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2987       CGF.EmitBranchThroughCleanup(CancelDest);
2988       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2989     };
2990     if (IfCond)
2991       emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {});
2992     else
2993       ThenGen(CGF);
2994   }
2995 }
2996 
2997 llvm::Value *
2998 CGOpenMPRuntime::emitTargetOutlinedFunction(const OMPExecutableDirective &D,
2999                                             const RegionCodeGenTy &CodeGen) {
3000   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3001 
3002   CodeGenFunction CGF(CGM, true);
3003   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen);
3004   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
3005   return CGF.GenerateOpenMPCapturedStmtFunction(CS, /*UseOnlyReferences=*/true);
3006 }
3007 
3008 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
3009                                      const OMPExecutableDirective &D,
3010                                      llvm::Value *OutlinedFn,
3011                                      const Expr *IfCond, const Expr *Device,
3012                                      ArrayRef<llvm::Value *> CapturedVars) {
3013   /// \brief Values for bit flags used to specify the mapping type for
3014   /// offloading.
3015   enum OpenMPOffloadMappingFlags {
3016     /// \brief Allocate memory on the device and move data from host to device.
3017     OMP_MAP_TO = 0x01,
3018     /// \brief Allocate memory on the device and move data from device to host.
3019     OMP_MAP_FROM = 0x02,
3020   };
3021 
3022   enum OpenMPOffloadingReservedDeviceIDs {
3023     /// \brief Device ID if the device was not defined, runtime should get it
3024     /// from environment variables in the spec.
3025     OMP_DEVICEID_UNDEF = -1,
3026   };
3027 
3028   // Fill up the arrays with the all the captured variables.
3029   SmallVector<llvm::Value *, 16> BasePointers;
3030   SmallVector<llvm::Value *, 16> Pointers;
3031   SmallVector<llvm::Value *, 16> Sizes;
3032   SmallVector<unsigned, 16> MapTypes;
3033 
3034   bool hasVLACaptures = false;
3035 
3036   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3037   auto RI = CS.getCapturedRecordDecl()->field_begin();
3038   // auto II = CS.capture_init_begin();
3039   auto CV = CapturedVars.begin();
3040   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
3041                                             CE = CS.capture_end();
3042        CI != CE; ++CI, ++RI, ++CV) {
3043     StringRef Name;
3044     QualType Ty;
3045     llvm::Value *BasePointer;
3046     llvm::Value *Pointer;
3047     llvm::Value *Size;
3048     unsigned MapType;
3049 
3050     if (CI->capturesVariableArrayType()) {
3051       BasePointer = Pointer = *CV;
3052       Size = getTypeSize(CGF, RI->getType());
3053       hasVLACaptures = true;
3054       // VLA sizes don't need to be copied back from the device.
3055       MapType = OMP_MAP_TO;
3056     } else if (CI->capturesThis()) {
3057       BasePointer = Pointer = *CV;
3058       const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr());
3059       Size = getTypeSize(CGF, PtrTy->getPointeeType());
3060       // Default map type.
3061       MapType = OMP_MAP_TO | OMP_MAP_FROM;
3062     } else {
3063       BasePointer = Pointer = *CV;
3064 
3065       const ReferenceType *PtrTy =
3066           cast<ReferenceType>(RI->getType().getTypePtr());
3067       QualType ElementType = PtrTy->getPointeeType();
3068       Size = getTypeSize(CGF, ElementType);
3069       // Default map type.
3070       MapType = OMP_MAP_TO | OMP_MAP_FROM;
3071     }
3072 
3073     BasePointers.push_back(BasePointer);
3074     Pointers.push_back(Pointer);
3075     Sizes.push_back(Size);
3076     MapTypes.push_back(MapType);
3077   }
3078 
3079   // Keep track on whether the host function has to be executed.
3080   auto OffloadErrorQType =
3081       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
3082   auto OffloadError = CGF.MakeAddrLValue(
3083       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
3084       OffloadErrorQType);
3085   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
3086                         OffloadError);
3087 
3088   // Fill up the pointer arrays and transfer execution to the device.
3089   auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes,
3090                     hasVLACaptures, Device, OffloadError,
3091                     OffloadErrorQType](CodeGenFunction &CGF) {
3092     unsigned PointerNumVal = BasePointers.size();
3093     llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
3094     llvm::Value *BasePointersArray;
3095     llvm::Value *PointersArray;
3096     llvm::Value *SizesArray;
3097     llvm::Value *MapTypesArray;
3098 
3099     if (PointerNumVal) {
3100       llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
3101       QualType PointerArrayType = CGF.getContext().getConstantArrayType(
3102           CGF.getContext().VoidPtrTy, PointerNumAP, ArrayType::Normal,
3103           /*IndexTypeQuals=*/0);
3104 
3105       BasePointersArray =
3106           CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
3107       PointersArray =
3108           CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
3109 
3110       // If we don't have any VLA types, we can use a constant array for the map
3111       // sizes, otherwise we need to fill up the arrays as we do for the
3112       // pointers.
3113       if (hasVLACaptures) {
3114         QualType SizeArrayType = CGF.getContext().getConstantArrayType(
3115             CGF.getContext().getSizeType(), PointerNumAP, ArrayType::Normal,
3116             /*IndexTypeQuals=*/0);
3117         SizesArray =
3118             CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
3119       } else {
3120         // We expect all the sizes to be constant, so we collect them to create
3121         // a constant array.
3122         SmallVector<llvm::Constant *, 16> ConstSizes;
3123         for (auto S : Sizes)
3124           ConstSizes.push_back(cast<llvm::Constant>(S));
3125 
3126         auto *SizesArrayInit = llvm::ConstantArray::get(
3127             llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
3128         auto *SizesArrayGbl = new llvm::GlobalVariable(
3129             CGM.getModule(), SizesArrayInit->getType(),
3130             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
3131             SizesArrayInit, ".offload_sizes");
3132         SizesArrayGbl->setUnnamedAddr(true);
3133         SizesArray = SizesArrayGbl;
3134       }
3135 
3136       // The map types are always constant so we don't need to generate code to
3137       // fill arrays. Instead, we create an array constant.
3138       llvm::Constant *MapTypesArrayInit =
3139           llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
3140       auto *MapTypesArrayGbl = new llvm::GlobalVariable(
3141           CGM.getModule(), MapTypesArrayInit->getType(),
3142           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
3143           MapTypesArrayInit, ".offload_maptypes");
3144       MapTypesArrayGbl->setUnnamedAddr(true);
3145       MapTypesArray = MapTypesArrayGbl;
3146 
3147       for (unsigned i = 0; i < PointerNumVal; ++i) {
3148         llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
3149             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal),
3150             BasePointersArray, 0, i);
3151         Address BPAddr(BP, CGM.getContext().getTypeAlignInChars(
3152                                CGM.getContext().VoidPtrTy));
3153         CGF.Builder.CreateStore(
3154             CGF.Builder.CreateBitCast(BasePointers[i], CGM.VoidPtrTy), BPAddr);
3155 
3156         llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
3157             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
3158             0, i);
3159         Address PAddr(P, CGM.getContext().getTypeAlignInChars(
3160                              CGM.getContext().VoidPtrTy));
3161         CGF.Builder.CreateStore(
3162             CGF.Builder.CreateBitCast(Pointers[i], CGM.VoidPtrTy), PAddr);
3163 
3164         if (hasVLACaptures) {
3165           llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
3166               llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
3167               /*Idx0=*/0,
3168               /*Idx1=*/i);
3169           Address SAddr(S, CGM.getContext().getTypeAlignInChars(
3170                                CGM.getContext().getSizeType()));
3171           CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(
3172                                       Sizes[i], CGM.SizeTy, /*isSigned=*/true),
3173                                   SAddr);
3174         }
3175       }
3176 
3177       BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3178           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
3179           /*Idx0=*/0, /*Idx1=*/0);
3180       PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3181           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
3182           /*Idx0=*/0,
3183           /*Idx1=*/0);
3184       SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3185           llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
3186           /*Idx0=*/0, /*Idx1=*/0);
3187       MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3188           llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray,
3189           /*Idx0=*/0,
3190           /*Idx1=*/0);
3191 
3192     } else {
3193       BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
3194       PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
3195       SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
3196       MapTypesArray =
3197           llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
3198     }
3199 
3200     // On top of the arrays that were filled up, the target offloading call
3201     // takes as arguments the device id as well as the host pointer. The host
3202     // pointer is used by the runtime library to identify the current target
3203     // region, so it only has to be unique and not necessarily point to
3204     // anything. It could be the pointer to the outlined function that
3205     // implements the target region, but we aren't using that so that the
3206     // compiler doesn't need to keep that, and could therefore inline the host
3207     // function if proven worthwhile during optimization.
3208 
3209     llvm::Value *HostPtr = new llvm::GlobalVariable(
3210         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3211         llvm::GlobalValue::PrivateLinkage,
3212         llvm::Constant::getNullValue(CGM.Int8Ty), ".offload_hstptr");
3213 
3214     // Emit device ID if any.
3215     llvm::Value *DeviceID;
3216     if (Device)
3217       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3218                                            CGM.Int32Ty, /*isSigned=*/true);
3219     else
3220       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
3221 
3222     llvm::Value *OffloadingArgs[] = {
3223         DeviceID,      HostPtr,    PointerNum,   BasePointersArray,
3224         PointersArray, SizesArray, MapTypesArray};
3225     auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
3226                                       OffloadingArgs);
3227 
3228     CGF.EmitStoreOfScalar(Return, OffloadError);
3229   };
3230 
3231   if (IfCond) {
3232     // Notify that the host version must be executed.
3233     auto &&ElseGen = [this, OffloadError,
3234                       OffloadErrorQType](CodeGenFunction &CGF) {
3235       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u),
3236                             OffloadError);
3237     };
3238     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3239   } else {
3240     CodeGenFunction::RunCleanupsScope Scope(CGF);
3241     ThenGen(CGF);
3242   }
3243 
3244   // Check the error code and execute the host version if required.
3245   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
3246   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
3247   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
3248   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
3249   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
3250 
3251   CGF.EmitBlock(OffloadFailedBlock);
3252   CGF.Builder.CreateCall(OutlinedFn, BasePointers);
3253   CGF.EmitBranch(OffloadContBlock);
3254 
3255   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
3256   return;
3257 }
3258