1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CGCleanup.h"
17 #include "clang/AST/Decl.h"
18 #include "clang/AST/StmtOpenMP.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cassert>
26 
27 using namespace clang;
28 using namespace CodeGen;
29 
30 namespace {
31 /// \brief Base class for handling code generation inside OpenMP regions.
32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
33 public:
34   /// \brief Kinds of OpenMP regions used in codegen.
35   enum CGOpenMPRegionKind {
36     /// \brief Region with outlined function for standalone 'parallel'
37     /// directive.
38     ParallelOutlinedRegion,
39     /// \brief Region with outlined function for standalone 'task' directive.
40     TaskOutlinedRegion,
41     /// \brief Region for constructs that do not require function outlining,
42     /// like 'for', 'sections', 'atomic' etc. directives.
43     InlinedRegion,
44   };
45 
46   CGOpenMPRegionInfo(const CapturedStmt &CS,
47                      const CGOpenMPRegionKind RegionKind,
48                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
49                      bool HasCancel)
50       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
51         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
52 
53   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
57         Kind(Kind), HasCancel(HasCancel) {}
58 
59   /// \brief Get a variable or parameter for storing global thread id
60   /// inside OpenMP construct.
61   virtual const VarDecl *getThreadIDVariable() const = 0;
62 
63   /// \brief Emit the captured statement body.
64   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
65 
66   /// \brief Get an LValue for the current ThreadID variable.
67   /// \return LValue for thread id variable. This LValue always has type int32*.
68   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
69 
70   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
71 
72   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
73 
74   bool hasCancel() const { return HasCancel; }
75 
76   static bool classof(const CGCapturedStmtInfo *Info) {
77     return Info->getKind() == CR_OpenMP;
78   }
79 
80 protected:
81   CGOpenMPRegionKind RegionKind;
82   const RegionCodeGenTy &CodeGen;
83   OpenMPDirectiveKind Kind;
84   bool HasCancel;
85 };
86 
87 /// \brief API for captured statement code generation in OpenMP constructs.
88 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
89 public:
90   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
91                              const RegionCodeGenTy &CodeGen,
92                              OpenMPDirectiveKind Kind, bool HasCancel)
93       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
94                            HasCancel),
95         ThreadIDVar(ThreadIDVar) {
96     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
97   }
98   /// \brief Get a variable or parameter for storing global thread id
99   /// inside OpenMP construct.
100   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
101 
102   /// \brief Get the name of the capture helper.
103   StringRef getHelperName() const override { return ".omp_outlined."; }
104 
105   static bool classof(const CGCapturedStmtInfo *Info) {
106     return CGOpenMPRegionInfo::classof(Info) &&
107            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
108                ParallelOutlinedRegion;
109   }
110 
111 private:
112   /// \brief A variable or parameter storing global thread id for OpenMP
113   /// constructs.
114   const VarDecl *ThreadIDVar;
115 };
116 
117 /// \brief API for captured statement code generation in OpenMP constructs.
118 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
119 public:
120   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
121                                  const VarDecl *ThreadIDVar,
122                                  const RegionCodeGenTy &CodeGen,
123                                  OpenMPDirectiveKind Kind, bool HasCancel)
124       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
125         ThreadIDVar(ThreadIDVar) {
126     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
127   }
128   /// \brief Get a variable or parameter for storing global thread id
129   /// inside OpenMP construct.
130   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131 
132   /// \brief Get an LValue for the current ThreadID variable.
133   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
134 
135   /// \brief Get the name of the capture helper.
136   StringRef getHelperName() const override { return ".omp_outlined."; }
137 
138   static bool classof(const CGCapturedStmtInfo *Info) {
139     return CGOpenMPRegionInfo::classof(Info) &&
140            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
141                TaskOutlinedRegion;
142   }
143 
144 private:
145   /// \brief A variable or parameter storing global thread id for OpenMP
146   /// constructs.
147   const VarDecl *ThreadIDVar;
148 };
149 
150 /// \brief API for inlined captured statement code generation in OpenMP
151 /// constructs.
152 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
153 public:
154   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
155                             const RegionCodeGenTy &CodeGen,
156                             OpenMPDirectiveKind Kind, bool HasCancel)
157       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
158         OldCSI(OldCSI),
159         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
160   // \brief Retrieve the value of the context parameter.
161   llvm::Value *getContextValue() const override {
162     if (OuterRegionInfo)
163       return OuterRegionInfo->getContextValue();
164     llvm_unreachable("No context value for inlined OpenMP region");
165   }
166   void setContextValue(llvm::Value *V) override {
167     if (OuterRegionInfo) {
168       OuterRegionInfo->setContextValue(V);
169       return;
170     }
171     llvm_unreachable("No context value for inlined OpenMP region");
172   }
173   /// \brief Lookup the captured field decl for a variable.
174   const FieldDecl *lookup(const VarDecl *VD) const override {
175     if (OuterRegionInfo)
176       return OuterRegionInfo->lookup(VD);
177     // If there is no outer outlined region,no need to lookup in a list of
178     // captured variables, we can use the original one.
179     return nullptr;
180   }
181   FieldDecl *getThisFieldDecl() const override {
182     if (OuterRegionInfo)
183       return OuterRegionInfo->getThisFieldDecl();
184     return nullptr;
185   }
186   /// \brief Get a variable or parameter for storing global thread id
187   /// inside OpenMP construct.
188   const VarDecl *getThreadIDVariable() const override {
189     if (OuterRegionInfo)
190       return OuterRegionInfo->getThreadIDVariable();
191     return nullptr;
192   }
193 
194   /// \brief Get the name of the capture helper.
195   StringRef getHelperName() const override {
196     if (auto *OuterRegionInfo = getOldCSI())
197       return OuterRegionInfo->getHelperName();
198     llvm_unreachable("No helper name for inlined OpenMP construct");
199   }
200 
201   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
202 
203   static bool classof(const CGCapturedStmtInfo *Info) {
204     return CGOpenMPRegionInfo::classof(Info) &&
205            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
206   }
207 
208 private:
209   /// \brief CodeGen info about outer OpenMP region.
210   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
211   CGOpenMPRegionInfo *OuterRegionInfo;
212 };
213 
214 /// \brief RAII for emitting code of OpenMP constructs.
215 class InlinedOpenMPRegionRAII {
216   CodeGenFunction &CGF;
217 
218 public:
219   /// \brief Constructs region for combined constructs.
220   /// \param CodeGen Code generation sequence for combined directives. Includes
221   /// a list of functions used for code generation of implicitly inlined
222   /// regions.
223   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
224                           OpenMPDirectiveKind Kind, bool HasCancel)
225       : CGF(CGF) {
226     // Start emission for the construct.
227     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
228         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
229   }
230   ~InlinedOpenMPRegionRAII() {
231     // Restore original CapturedStmtInfo only if we're done with code emission.
232     auto *OldCSI =
233         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
234     delete CGF.CapturedStmtInfo;
235     CGF.CapturedStmtInfo = OldCSI;
236   }
237 };
238 
239 } // anonymous namespace
240 
241 static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr,
242                                       QualType Ty) {
243   AlignmentSource Source;
244   CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source);
245   return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align),
246                             Ty->getPointeeType(), Source);
247 }
248 
249 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
250   return emitLoadOfPointerLValue(CGF,
251                                  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
252                                  getThreadIDVariable()->getType());
253 }
254 
255 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
256   // 1.2.2 OpenMP Language Terminology
257   // Structured block - An executable statement with a single entry at the
258   // top and a single exit at the bottom.
259   // The point of exit cannot be a branch out of the structured block.
260   // longjmp() and throw() must not violate the entry/exit criteria.
261   CGF.EHStack.pushTerminate();
262   {
263     CodeGenFunction::RunCleanupsScope Scope(CGF);
264     CodeGen(CGF);
265   }
266   CGF.EHStack.popTerminate();
267 }
268 
269 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
270     CodeGenFunction &CGF) {
271   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
272                             getThreadIDVariable()->getType(),
273                             AlignmentSource::Decl);
274 }
275 
276 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
277     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
278   IdentTy = llvm::StructType::create(
279       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
280       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
281       CGM.Int8PtrTy /* psource */, nullptr);
282   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
283   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
284                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
285   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
286   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
287 }
288 
289 void CGOpenMPRuntime::clear() {
290   InternalVars.clear();
291 }
292 
293 // Layout information for ident_t.
294 static CharUnits getIdentAlign(CodeGenModule &CGM) {
295   return CGM.getPointerAlign();
296 }
297 static CharUnits getIdentSize(CodeGenModule &CGM) {
298   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
299   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
300 }
301 static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) {
302   // All the fields except the last are i32, so this works beautifully.
303   return unsigned(Field) * CharUnits::fromQuantity(4);
304 }
305 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
306                                    CGOpenMPRuntime::IdentFieldIndex Field,
307                                    const llvm::Twine &Name = "") {
308   auto Offset = getOffsetOfIdentField(Field);
309   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
310 }
311 
312 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
313     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
314     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
315   assert(ThreadIDVar->getType()->isPointerType() &&
316          "thread id variable must be of type kmp_int32 *");
317   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
318   CodeGenFunction CGF(CGM, true);
319   bool HasCancel = false;
320   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
321     HasCancel = OPD->hasCancel();
322   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
323     HasCancel = OPSD->hasCancel();
324   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
325     HasCancel = OPFD->hasCancel();
326   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
327                                     HasCancel);
328   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
329   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
330 }
331 
332 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
333     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
334     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
335   assert(!ThreadIDVar->getType()->isPointerType() &&
336          "thread id variable must be of type kmp_int32 for tasks");
337   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
338   CodeGenFunction CGF(CGM, true);
339   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
340                                         InnermostKind,
341                                         cast<OMPTaskDirective>(D).hasCancel());
342   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
343   return CGF.GenerateCapturedStmtFunction(*CS);
344 }
345 
346 Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
347   CharUnits Align = getIdentAlign(CGM);
348   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
349   if (!Entry) {
350     if (!DefaultOpenMPPSource) {
351       // Initialize default location for psource field of ident_t structure of
352       // all ident_t objects. Format is ";file;function;line;column;;".
353       // Taken from
354       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
355       DefaultOpenMPPSource =
356           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
357       DefaultOpenMPPSource =
358           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
359     }
360     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
361         CGM.getModule(), IdentTy, /*isConstant*/ true,
362         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
363     DefaultOpenMPLocation->setUnnamedAddr(true);
364     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
365 
366     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
367     llvm::Constant *Values[] = {Zero,
368                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
369                                 Zero, Zero, DefaultOpenMPPSource};
370     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
371     DefaultOpenMPLocation->setInitializer(Init);
372     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
373   }
374   return Address(Entry, Align);
375 }
376 
377 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
378                                                  SourceLocation Loc,
379                                                  OpenMPLocationFlags Flags) {
380   // If no debug info is generated - return global default location.
381   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
382       Loc.isInvalid())
383     return getOrCreateDefaultLocation(Flags).getPointer();
384 
385   assert(CGF.CurFn && "No function in current CodeGenFunction.");
386 
387   Address LocValue = Address::invalid();
388   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
389   if (I != OpenMPLocThreadIDMap.end())
390     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
391 
392   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
393   // GetOpenMPThreadID was called before this routine.
394   if (!LocValue.isValid()) {
395     // Generate "ident_t .kmpc_loc.addr;"
396     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
397                                       ".kmpc_loc.addr");
398     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
399     Elem.second.DebugLoc = AI.getPointer();
400     LocValue = AI;
401 
402     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
403     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
404     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
405                              CGM.getSize(getIdentSize(CGF.CGM)));
406   }
407 
408   // char **psource = &.kmpc_loc_<flags>.addr.psource;
409   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
410 
411   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
412   if (OMPDebugLoc == nullptr) {
413     SmallString<128> Buffer2;
414     llvm::raw_svector_ostream OS2(Buffer2);
415     // Build debug location
416     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
417     OS2 << ";" << PLoc.getFilename() << ";";
418     if (const FunctionDecl *FD =
419             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
420       OS2 << FD->getQualifiedNameAsString();
421     }
422     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
423     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
424     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
425   }
426   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
427   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
428 
429   // Our callers always pass this to a runtime function, so for
430   // convenience, go ahead and return a naked pointer.
431   return LocValue.getPointer();
432 }
433 
434 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
435                                           SourceLocation Loc) {
436   assert(CGF.CurFn && "No function in current CodeGenFunction.");
437 
438   llvm::Value *ThreadID = nullptr;
439   // Check whether we've already cached a load of the thread id in this
440   // function.
441   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
442   if (I != OpenMPLocThreadIDMap.end()) {
443     ThreadID = I->second.ThreadID;
444     if (ThreadID != nullptr)
445       return ThreadID;
446   }
447   if (auto OMPRegionInfo =
448           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
449     if (OMPRegionInfo->getThreadIDVariable()) {
450       // Check if this an outlined function with thread id passed as argument.
451       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
452       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
453       // If value loaded in entry block, cache it and use it everywhere in
454       // function.
455       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
456         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
457         Elem.second.ThreadID = ThreadID;
458       }
459       return ThreadID;
460     }
461   }
462 
463   // This is not an outlined function region - need to call __kmpc_int32
464   // kmpc_global_thread_num(ident_t *loc).
465   // Generate thread id value and cache this value for use across the
466   // function.
467   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
468   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
469   ThreadID =
470       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
471                           emitUpdateLocation(CGF, Loc));
472   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
473   Elem.second.ThreadID = ThreadID;
474   return ThreadID;
475 }
476 
477 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
478   assert(CGF.CurFn && "No function in current CodeGenFunction.");
479   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
480     OpenMPLocThreadIDMap.erase(CGF.CurFn);
481 }
482 
483 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
484   return llvm::PointerType::getUnqual(IdentTy);
485 }
486 
487 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
488   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
489 }
490 
491 llvm::Constant *
492 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
493   llvm::Constant *RTLFn = nullptr;
494   switch (Function) {
495   case OMPRTL__kmpc_fork_call: {
496     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
497     // microtask, ...);
498     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
499                                 getKmpc_MicroPointerTy()};
500     llvm::FunctionType *FnTy =
501         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
502     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
503     break;
504   }
505   case OMPRTL__kmpc_global_thread_num: {
506     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
507     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
508     llvm::FunctionType *FnTy =
509         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
510     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
511     break;
512   }
513   case OMPRTL__kmpc_threadprivate_cached: {
514     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
515     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
516     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
517                                 CGM.VoidPtrTy, CGM.SizeTy,
518                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
519     llvm::FunctionType *FnTy =
520         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
521     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
522     break;
523   }
524   case OMPRTL__kmpc_critical: {
525     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
526     // kmp_critical_name *crit);
527     llvm::Type *TypeParams[] = {
528         getIdentTyPointerTy(), CGM.Int32Ty,
529         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
530     llvm::FunctionType *FnTy =
531         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
532     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
533     break;
534   }
535   case OMPRTL__kmpc_threadprivate_register: {
536     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
537     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
538     // typedef void *(*kmpc_ctor)(void *);
539     auto KmpcCtorTy =
540         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
541                                 /*isVarArg*/ false)->getPointerTo();
542     // typedef void *(*kmpc_cctor)(void *, void *);
543     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
544     auto KmpcCopyCtorTy =
545         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
546                                 /*isVarArg*/ false)->getPointerTo();
547     // typedef void (*kmpc_dtor)(void *);
548     auto KmpcDtorTy =
549         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
550             ->getPointerTo();
551     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
552                               KmpcCopyCtorTy, KmpcDtorTy};
553     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
554                                         /*isVarArg*/ false);
555     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
556     break;
557   }
558   case OMPRTL__kmpc_end_critical: {
559     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
560     // kmp_critical_name *crit);
561     llvm::Type *TypeParams[] = {
562         getIdentTyPointerTy(), CGM.Int32Ty,
563         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
564     llvm::FunctionType *FnTy =
565         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
566     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
567     break;
568   }
569   case OMPRTL__kmpc_cancel_barrier: {
570     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
571     // global_tid);
572     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
573     llvm::FunctionType *FnTy =
574         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
575     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
576     break;
577   }
578   case OMPRTL__kmpc_barrier: {
579     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
580     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
581     llvm::FunctionType *FnTy =
582         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
583     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
584     break;
585   }
586   case OMPRTL__kmpc_for_static_fini: {
587     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
588     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
589     llvm::FunctionType *FnTy =
590         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
591     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
592     break;
593   }
594   case OMPRTL__kmpc_push_num_threads: {
595     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
596     // kmp_int32 num_threads)
597     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
598                                 CGM.Int32Ty};
599     llvm::FunctionType *FnTy =
600         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
601     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
602     break;
603   }
604   case OMPRTL__kmpc_serialized_parallel: {
605     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
606     // global_tid);
607     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
608     llvm::FunctionType *FnTy =
609         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
610     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
611     break;
612   }
613   case OMPRTL__kmpc_end_serialized_parallel: {
614     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
615     // global_tid);
616     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
617     llvm::FunctionType *FnTy =
618         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
619     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
620     break;
621   }
622   case OMPRTL__kmpc_flush: {
623     // Build void __kmpc_flush(ident_t *loc);
624     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
625     llvm::FunctionType *FnTy =
626         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
627     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
628     break;
629   }
630   case OMPRTL__kmpc_master: {
631     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
632     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
633     llvm::FunctionType *FnTy =
634         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
635     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
636     break;
637   }
638   case OMPRTL__kmpc_end_master: {
639     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
640     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
641     llvm::FunctionType *FnTy =
642         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
643     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
644     break;
645   }
646   case OMPRTL__kmpc_omp_taskyield: {
647     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
648     // int end_part);
649     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
650     llvm::FunctionType *FnTy =
651         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
652     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
653     break;
654   }
655   case OMPRTL__kmpc_single: {
656     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
657     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
658     llvm::FunctionType *FnTy =
659         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
660     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
661     break;
662   }
663   case OMPRTL__kmpc_end_single: {
664     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
665     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
666     llvm::FunctionType *FnTy =
667         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
668     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
669     break;
670   }
671   case OMPRTL__kmpc_omp_task_alloc: {
672     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
673     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
674     // kmp_routine_entry_t *task_entry);
675     assert(KmpRoutineEntryPtrTy != nullptr &&
676            "Type kmp_routine_entry_t must be created.");
677     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
678                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
679     // Return void * and then cast to particular kmp_task_t type.
680     llvm::FunctionType *FnTy =
681         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
682     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
683     break;
684   }
685   case OMPRTL__kmpc_omp_task: {
686     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
687     // *new_task);
688     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
689                                 CGM.VoidPtrTy};
690     llvm::FunctionType *FnTy =
691         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
692     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
693     break;
694   }
695   case OMPRTL__kmpc_copyprivate: {
696     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
697     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
698     // kmp_int32 didit);
699     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
700     auto *CpyFnTy =
701         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
702     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
703                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
704                                 CGM.Int32Ty};
705     llvm::FunctionType *FnTy =
706         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
707     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
708     break;
709   }
710   case OMPRTL__kmpc_reduce: {
711     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
712     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
713     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
714     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
715     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
716                                                /*isVarArg=*/false);
717     llvm::Type *TypeParams[] = {
718         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
719         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
720         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
721     llvm::FunctionType *FnTy =
722         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
723     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
724     break;
725   }
726   case OMPRTL__kmpc_reduce_nowait: {
727     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
728     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
729     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
730     // *lck);
731     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
732     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
733                                                /*isVarArg=*/false);
734     llvm::Type *TypeParams[] = {
735         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
736         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
737         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
738     llvm::FunctionType *FnTy =
739         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
740     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
741     break;
742   }
743   case OMPRTL__kmpc_end_reduce: {
744     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
745     // kmp_critical_name *lck);
746     llvm::Type *TypeParams[] = {
747         getIdentTyPointerTy(), CGM.Int32Ty,
748         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
749     llvm::FunctionType *FnTy =
750         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
751     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
752     break;
753   }
754   case OMPRTL__kmpc_end_reduce_nowait: {
755     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
756     // kmp_critical_name *lck);
757     llvm::Type *TypeParams[] = {
758         getIdentTyPointerTy(), CGM.Int32Ty,
759         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
760     llvm::FunctionType *FnTy =
761         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
762     RTLFn =
763         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
764     break;
765   }
766   case OMPRTL__kmpc_omp_task_begin_if0: {
767     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
768     // *new_task);
769     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
770                                 CGM.VoidPtrTy};
771     llvm::FunctionType *FnTy =
772         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
773     RTLFn =
774         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
775     break;
776   }
777   case OMPRTL__kmpc_omp_task_complete_if0: {
778     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
779     // *new_task);
780     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
781                                 CGM.VoidPtrTy};
782     llvm::FunctionType *FnTy =
783         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
784     RTLFn = CGM.CreateRuntimeFunction(FnTy,
785                                       /*Name=*/"__kmpc_omp_task_complete_if0");
786     break;
787   }
788   case OMPRTL__kmpc_ordered: {
789     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
790     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
791     llvm::FunctionType *FnTy =
792         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
793     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
794     break;
795   }
796   case OMPRTL__kmpc_end_ordered: {
797     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
798     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
799     llvm::FunctionType *FnTy =
800         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
801     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
802     break;
803   }
804   case OMPRTL__kmpc_omp_taskwait: {
805     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
806     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
807     llvm::FunctionType *FnTy =
808         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
809     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
810     break;
811   }
812   case OMPRTL__kmpc_taskgroup: {
813     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
814     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
815     llvm::FunctionType *FnTy =
816         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
817     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
818     break;
819   }
820   case OMPRTL__kmpc_end_taskgroup: {
821     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
822     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
823     llvm::FunctionType *FnTy =
824         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
825     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
826     break;
827   }
828   case OMPRTL__kmpc_push_proc_bind: {
829     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
830     // int proc_bind)
831     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
832     llvm::FunctionType *FnTy =
833         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
834     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
835     break;
836   }
837   case OMPRTL__kmpc_omp_task_with_deps: {
838     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
839     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
840     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
841     llvm::Type *TypeParams[] = {
842         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
843         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
844     llvm::FunctionType *FnTy =
845         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
846     RTLFn =
847         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
848     break;
849   }
850   case OMPRTL__kmpc_omp_wait_deps: {
851     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
852     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
853     // kmp_depend_info_t *noalias_dep_list);
854     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
855                                 CGM.Int32Ty,           CGM.VoidPtrTy,
856                                 CGM.Int32Ty,           CGM.VoidPtrTy};
857     llvm::FunctionType *FnTy =
858         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
859     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
860     break;
861   }
862   case OMPRTL__kmpc_cancellationpoint: {
863     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
864     // global_tid, kmp_int32 cncl_kind)
865     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
866     llvm::FunctionType *FnTy =
867         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
868     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
869     break;
870   }
871   case OMPRTL__kmpc_cancel: {
872     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
873     // kmp_int32 cncl_kind)
874     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
875     llvm::FunctionType *FnTy =
876         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
877     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
878     break;
879   }
880   }
881   return RTLFn;
882 }
883 
884 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
885                                                              bool IVSigned) {
886   assert((IVSize == 32 || IVSize == 64) &&
887          "IV size is not compatible with the omp runtime");
888   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
889                                        : "__kmpc_for_static_init_4u")
890                            : (IVSigned ? "__kmpc_for_static_init_8"
891                                        : "__kmpc_for_static_init_8u");
892   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
893   auto PtrTy = llvm::PointerType::getUnqual(ITy);
894   llvm::Type *TypeParams[] = {
895     getIdentTyPointerTy(),                     // loc
896     CGM.Int32Ty,                               // tid
897     CGM.Int32Ty,                               // schedtype
898     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
899     PtrTy,                                     // p_lower
900     PtrTy,                                     // p_upper
901     PtrTy,                                     // p_stride
902     ITy,                                       // incr
903     ITy                                        // chunk
904   };
905   llvm::FunctionType *FnTy =
906       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
907   return CGM.CreateRuntimeFunction(FnTy, Name);
908 }
909 
910 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
911                                                             bool IVSigned) {
912   assert((IVSize == 32 || IVSize == 64) &&
913          "IV size is not compatible with the omp runtime");
914   auto Name =
915       IVSize == 32
916           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
917           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
918   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
919   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
920                                CGM.Int32Ty,           // tid
921                                CGM.Int32Ty,           // schedtype
922                                ITy,                   // lower
923                                ITy,                   // upper
924                                ITy,                   // stride
925                                ITy                    // chunk
926   };
927   llvm::FunctionType *FnTy =
928       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
929   return CGM.CreateRuntimeFunction(FnTy, Name);
930 }
931 
932 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
933                                                             bool IVSigned) {
934   assert((IVSize == 32 || IVSize == 64) &&
935          "IV size is not compatible with the omp runtime");
936   auto Name =
937       IVSize == 32
938           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
939           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
940   llvm::Type *TypeParams[] = {
941       getIdentTyPointerTy(), // loc
942       CGM.Int32Ty,           // tid
943   };
944   llvm::FunctionType *FnTy =
945       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
946   return CGM.CreateRuntimeFunction(FnTy, Name);
947 }
948 
949 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
950                                                             bool IVSigned) {
951   assert((IVSize == 32 || IVSize == 64) &&
952          "IV size is not compatible with the omp runtime");
953   auto Name =
954       IVSize == 32
955           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
956           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
957   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
958   auto PtrTy = llvm::PointerType::getUnqual(ITy);
959   llvm::Type *TypeParams[] = {
960     getIdentTyPointerTy(),                     // loc
961     CGM.Int32Ty,                               // tid
962     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
963     PtrTy,                                     // p_lower
964     PtrTy,                                     // p_upper
965     PtrTy                                      // p_stride
966   };
967   llvm::FunctionType *FnTy =
968       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
969   return CGM.CreateRuntimeFunction(FnTy, Name);
970 }
971 
972 llvm::Constant *
973 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
974   assert(!CGM.getLangOpts().OpenMPUseTLS ||
975          !CGM.getContext().getTargetInfo().isTLSSupported());
976   // Lookup the entry, lazily creating it if necessary.
977   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
978                                      Twine(CGM.getMangledName(VD)) + ".cache.");
979 }
980 
981 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
982                                                 const VarDecl *VD,
983                                                 Address VDAddr,
984                                                 SourceLocation Loc) {
985   if (CGM.getLangOpts().OpenMPUseTLS &&
986       CGM.getContext().getTargetInfo().isTLSSupported())
987     return VDAddr;
988 
989   auto VarTy = VDAddr.getElementType();
990   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
991                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
992                                                        CGM.Int8PtrTy),
993                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
994                          getOrCreateThreadPrivateCache(VD)};
995   return Address(CGF.EmitRuntimeCall(
996       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
997                  VDAddr.getAlignment());
998 }
999 
1000 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1001     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1002     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1003   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1004   // library.
1005   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1006   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1007                       OMPLoc);
1008   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1009   // to register constructor/destructor for variable.
1010   llvm::Value *Args[] = {OMPLoc,
1011                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1012                                                        CGM.VoidPtrTy),
1013                          Ctor, CopyCtor, Dtor};
1014   CGF.EmitRuntimeCall(
1015       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1016 }
1017 
1018 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1019     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1020     bool PerformInit, CodeGenFunction *CGF) {
1021   if (CGM.getLangOpts().OpenMPUseTLS &&
1022       CGM.getContext().getTargetInfo().isTLSSupported())
1023     return nullptr;
1024 
1025   VD = VD->getDefinition(CGM.getContext());
1026   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1027     ThreadPrivateWithDefinition.insert(VD);
1028     QualType ASTTy = VD->getType();
1029 
1030     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1031     auto Init = VD->getAnyInitializer();
1032     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1033       // Generate function that re-emits the declaration's initializer into the
1034       // threadprivate copy of the variable VD
1035       CodeGenFunction CtorCGF(CGM);
1036       FunctionArgList Args;
1037       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1038                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1039       Args.push_back(&Dst);
1040 
1041       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1042           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
1043           /*isVariadic=*/false);
1044       auto FTy = CGM.getTypes().GetFunctionType(FI);
1045       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1046           FTy, ".__kmpc_global_ctor_.", Loc);
1047       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1048                             Args, SourceLocation());
1049       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1050           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1051           CGM.getContext().VoidPtrTy, Dst.getLocation());
1052       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1053       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1054                                              CtorCGF.ConvertTypeForMem(ASTTy));
1055       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1056                                /*IsInitializer=*/true);
1057       ArgVal = CtorCGF.EmitLoadOfScalar(
1058           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1059           CGM.getContext().VoidPtrTy, Dst.getLocation());
1060       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1061       CtorCGF.FinishFunction();
1062       Ctor = Fn;
1063     }
1064     if (VD->getType().isDestructedType() != QualType::DK_none) {
1065       // Generate function that emits destructor call for the threadprivate copy
1066       // of the variable VD
1067       CodeGenFunction DtorCGF(CGM);
1068       FunctionArgList Args;
1069       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1070                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1071       Args.push_back(&Dst);
1072 
1073       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1074           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
1075           /*isVariadic=*/false);
1076       auto FTy = CGM.getTypes().GetFunctionType(FI);
1077       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1078           FTy, ".__kmpc_global_dtor_.", Loc);
1079       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1080                             SourceLocation());
1081       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1082           DtorCGF.GetAddrOfLocalVar(&Dst),
1083           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1084       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1085                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1086                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1087       DtorCGF.FinishFunction();
1088       Dtor = Fn;
1089     }
1090     // Do not emit init function if it is not required.
1091     if (!Ctor && !Dtor)
1092       return nullptr;
1093 
1094     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1095     auto CopyCtorTy =
1096         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1097                                 /*isVarArg=*/false)->getPointerTo();
1098     // Copying constructor for the threadprivate variable.
1099     // Must be NULL - reserved by runtime, but currently it requires that this
1100     // parameter is always NULL. Otherwise it fires assertion.
1101     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1102     if (Ctor == nullptr) {
1103       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1104                                             /*isVarArg=*/false)->getPointerTo();
1105       Ctor = llvm::Constant::getNullValue(CtorTy);
1106     }
1107     if (Dtor == nullptr) {
1108       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1109                                             /*isVarArg=*/false)->getPointerTo();
1110       Dtor = llvm::Constant::getNullValue(DtorTy);
1111     }
1112     if (!CGF) {
1113       auto InitFunctionTy =
1114           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1115       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1116           InitFunctionTy, ".__omp_threadprivate_init_.");
1117       CodeGenFunction InitCGF(CGM);
1118       FunctionArgList ArgList;
1119       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1120                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1121                             Loc);
1122       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1123       InitCGF.FinishFunction();
1124       return InitFunction;
1125     }
1126     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1127   }
1128   return nullptr;
1129 }
1130 
1131 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1132 /// function. Here is the logic:
1133 /// if (Cond) {
1134 ///   ThenGen();
1135 /// } else {
1136 ///   ElseGen();
1137 /// }
1138 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1139                             const RegionCodeGenTy &ThenGen,
1140                             const RegionCodeGenTy &ElseGen) {
1141   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1142 
1143   // If the condition constant folds and can be elided, try to avoid emitting
1144   // the condition and the dead arm of the if/else.
1145   bool CondConstant;
1146   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1147     CodeGenFunction::RunCleanupsScope Scope(CGF);
1148     if (CondConstant) {
1149       ThenGen(CGF);
1150     } else {
1151       ElseGen(CGF);
1152     }
1153     return;
1154   }
1155 
1156   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1157   // emit the conditional branch.
1158   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1159   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1160   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1161   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1162 
1163   // Emit the 'then' code.
1164   CGF.EmitBlock(ThenBlock);
1165   {
1166     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1167     ThenGen(CGF);
1168   }
1169   CGF.EmitBranch(ContBlock);
1170   // Emit the 'else' code if present.
1171   {
1172     // There is no need to emit line number for unconditional branch.
1173     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1174     CGF.EmitBlock(ElseBlock);
1175   }
1176   {
1177     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1178     ElseGen(CGF);
1179   }
1180   {
1181     // There is no need to emit line number for unconditional branch.
1182     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1183     CGF.EmitBranch(ContBlock);
1184   }
1185   // Emit the continuation block for code after the if.
1186   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1187 }
1188 
1189 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1190                                        llvm::Value *OutlinedFn,
1191                                        ArrayRef<llvm::Value *> CapturedVars,
1192                                        const Expr *IfCond) {
1193   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1194   auto &&ThenGen = [this, OutlinedFn, CapturedVars,
1195                     RTLoc](CodeGenFunction &CGF) {
1196     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1197     llvm::Value *Args[] = {
1198         RTLoc,
1199         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1200         CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
1201     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1202     RealArgs.append(std::begin(Args), std::end(Args));
1203     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1204 
1205     auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
1206     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1207   };
1208   auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc,
1209                     Loc](CodeGenFunction &CGF) {
1210     auto ThreadID = getThreadID(CGF, Loc);
1211     // Build calls:
1212     // __kmpc_serialized_parallel(&Loc, GTid);
1213     llvm::Value *Args[] = {RTLoc, ThreadID};
1214     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
1215                         Args);
1216 
1217     // OutlinedFn(&GTid, &zero, CapturedStruct);
1218     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
1219     Address ZeroAddr =
1220       CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1221                            /*Name*/ ".zero.addr");
1222     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1223     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1224     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1225     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1226     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1227     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1228 
1229     // __kmpc_end_serialized_parallel(&Loc, GTid);
1230     llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
1231     CGF.EmitRuntimeCall(
1232         createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
1233   };
1234   if (IfCond) {
1235     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1236   } else {
1237     CodeGenFunction::RunCleanupsScope Scope(CGF);
1238     ThenGen(CGF);
1239   }
1240 }
1241 
1242 // If we're inside an (outlined) parallel region, use the region info's
1243 // thread-ID variable (it is passed in a first argument of the outlined function
1244 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1245 // regular serial code region, get thread ID by calling kmp_int32
1246 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1247 // return the address of that temp.
1248 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1249                                              SourceLocation Loc) {
1250   if (auto OMPRegionInfo =
1251           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1252     if (OMPRegionInfo->getThreadIDVariable())
1253       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1254 
1255   auto ThreadID = getThreadID(CGF, Loc);
1256   auto Int32Ty =
1257       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1258   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1259   CGF.EmitStoreOfScalar(ThreadID,
1260                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1261 
1262   return ThreadIDTemp;
1263 }
1264 
1265 llvm::Constant *
1266 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1267                                              const llvm::Twine &Name) {
1268   SmallString<256> Buffer;
1269   llvm::raw_svector_ostream Out(Buffer);
1270   Out << Name;
1271   auto RuntimeName = Out.str();
1272   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1273   if (Elem.second) {
1274     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1275            "OMP internal variable has different type than requested");
1276     return &*Elem.second;
1277   }
1278 
1279   return Elem.second = new llvm::GlobalVariable(
1280              CGM.getModule(), Ty, /*IsConstant*/ false,
1281              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1282              Elem.first());
1283 }
1284 
1285 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1286   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1287   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1288 }
1289 
1290 namespace {
1291 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup {
1292   llvm::Value *Callee;
1293   llvm::Value *Args[N];
1294 
1295 public:
1296   CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
1297       : Callee(Callee) {
1298     assert(CleanupArgs.size() == N);
1299     std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
1300   }
1301   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1302     CGF.EmitRuntimeCall(Callee, Args);
1303   }
1304 };
1305 } // anonymous namespace
1306 
1307 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1308                                          StringRef CriticalName,
1309                                          const RegionCodeGenTy &CriticalOpGen,
1310                                          SourceLocation Loc) {
1311   // __kmpc_critical(ident_t *, gtid, Lock);
1312   // CriticalOpGen();
1313   // __kmpc_end_critical(ident_t *, gtid, Lock);
1314   // Prepare arguments and build a call to __kmpc_critical
1315   {
1316     CodeGenFunction::RunCleanupsScope Scope(CGF);
1317     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1318                            getCriticalRegionLock(CriticalName)};
1319     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1320     // Build a call to __kmpc_end_critical
1321     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1322         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1323         llvm::makeArrayRef(Args));
1324     emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
1325   }
1326 }
1327 
1328 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1329                        OpenMPDirectiveKind Kind, SourceLocation Loc,
1330                        const RegionCodeGenTy &BodyOpGen) {
1331   llvm::Value *CallBool = CGF.EmitScalarConversion(
1332       IfCond,
1333       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1334       CGF.getContext().BoolTy, Loc);
1335 
1336   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1337   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1338   // Generate the branch (If-stmt)
1339   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1340   CGF.EmitBlock(ThenBlock);
1341   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
1342   // Emit the rest of bblocks/branches
1343   CGF.EmitBranch(ContBlock);
1344   CGF.EmitBlock(ContBlock, true);
1345 }
1346 
1347 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1348                                        const RegionCodeGenTy &MasterOpGen,
1349                                        SourceLocation Loc) {
1350   // if(__kmpc_master(ident_t *, gtid)) {
1351   //   MasterOpGen();
1352   //   __kmpc_end_master(ident_t *, gtid);
1353   // }
1354   // Prepare arguments and build a call to __kmpc_master
1355   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1356   auto *IsMaster =
1357       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1358   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1359       MasterCallEndCleanup;
1360   emitIfStmt(
1361       CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void {
1362         CodeGenFunction::RunCleanupsScope Scope(CGF);
1363         CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
1364             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1365             llvm::makeArrayRef(Args));
1366         MasterOpGen(CGF);
1367       });
1368 }
1369 
1370 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1371                                         SourceLocation Loc) {
1372   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1373   llvm::Value *Args[] = {
1374       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1375       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1376   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1377 }
1378 
1379 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
1380                                           const RegionCodeGenTy &TaskgroupOpGen,
1381                                           SourceLocation Loc) {
1382   // __kmpc_taskgroup(ident_t *, gtid);
1383   // TaskgroupOpGen();
1384   // __kmpc_end_taskgroup(ident_t *, gtid);
1385   // Prepare arguments and build a call to __kmpc_taskgroup
1386   {
1387     CodeGenFunction::RunCleanupsScope Scope(CGF);
1388     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1389     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
1390     // Build a call to __kmpc_end_taskgroup
1391     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1392         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
1393         llvm::makeArrayRef(Args));
1394     emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
1395   }
1396 }
1397 
1398 /// Given an array of pointers to variables, project the address of a
1399 /// given variable.
1400 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF,
1401                                       Address Array, unsigned Index,
1402                                       const VarDecl *Var) {
1403   // Pull out the pointer to the variable.
1404   Address PtrAddr =
1405     CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
1406   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
1407 
1408   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
1409   Addr = CGF.Builder.CreateElementBitCast(Addr,
1410                                       CGF.ConvertTypeForMem(Var->getType()));
1411   return Addr;
1412 }
1413 
1414 static llvm::Value *emitCopyprivateCopyFunction(
1415     CodeGenModule &CGM, llvm::Type *ArgsType,
1416     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1417     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1418   auto &C = CGM.getContext();
1419   // void copy_func(void *LHSArg, void *RHSArg);
1420   FunctionArgList Args;
1421   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1422                            C.VoidPtrTy);
1423   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1424                            C.VoidPtrTy);
1425   Args.push_back(&LHSArg);
1426   Args.push_back(&RHSArg);
1427   FunctionType::ExtInfo EI;
1428   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1429       C.VoidTy, Args, EI, /*isVariadic=*/false);
1430   auto *Fn = llvm::Function::Create(
1431       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1432       ".omp.copyprivate.copy_func", &CGM.getModule());
1433   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
1434   CodeGenFunction CGF(CGM);
1435   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1436   // Dest = (void*[n])(LHSArg);
1437   // Src = (void*[n])(RHSArg);
1438   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1439       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
1440       ArgsType), CGF.getPointerAlign());
1441   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1442       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
1443       ArgsType), CGF.getPointerAlign());
1444   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1445   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1446   // ...
1447   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1448   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1449     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
1450     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
1451 
1452     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
1453     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
1454 
1455     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
1456     QualType Type = VD->getType();
1457     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
1458   }
1459   CGF.FinishFunction();
1460   return Fn;
1461 }
1462 
1463 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1464                                        const RegionCodeGenTy &SingleOpGen,
1465                                        SourceLocation Loc,
1466                                        ArrayRef<const Expr *> CopyprivateVars,
1467                                        ArrayRef<const Expr *> SrcExprs,
1468                                        ArrayRef<const Expr *> DstExprs,
1469                                        ArrayRef<const Expr *> AssignmentOps) {
1470   assert(CopyprivateVars.size() == SrcExprs.size() &&
1471          CopyprivateVars.size() == DstExprs.size() &&
1472          CopyprivateVars.size() == AssignmentOps.size());
1473   auto &C = CGM.getContext();
1474   // int32 did_it = 0;
1475   // if(__kmpc_single(ident_t *, gtid)) {
1476   //   SingleOpGen();
1477   //   __kmpc_end_single(ident_t *, gtid);
1478   //   did_it = 1;
1479   // }
1480   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1481   // <copy_func>, did_it);
1482 
1483   Address DidIt = Address::invalid();
1484   if (!CopyprivateVars.empty()) {
1485     // int32 did_it = 0;
1486     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1487     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1488     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
1489   }
1490   // Prepare arguments and build a call to __kmpc_single
1491   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1492   auto *IsSingle =
1493       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1494   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1495       SingleCallEndCleanup;
1496   emitIfStmt(
1497       CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void {
1498         CodeGenFunction::RunCleanupsScope Scope(CGF);
1499         CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
1500             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1501             llvm::makeArrayRef(Args));
1502         SingleOpGen(CGF);
1503         if (DidIt.isValid()) {
1504           // did_it = 1;
1505           CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
1506         }
1507       });
1508   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1509   // <copy_func>, did_it);
1510   if (DidIt.isValid()) {
1511     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1512     auto CopyprivateArrayTy =
1513         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1514                                /*IndexTypeQuals=*/0);
1515     // Create a list of all private variables for copyprivate.
1516     Address CopyprivateList =
1517         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1518     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1519       Address Elem = CGF.Builder.CreateConstArrayGEP(
1520           CopyprivateList, I, CGF.getPointerSize());
1521       CGF.Builder.CreateStore(
1522           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1523               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
1524           Elem);
1525     }
1526     // Build function that copies private values from single region to all other
1527     // threads in the corresponding parallel region.
1528     auto *CpyFn = emitCopyprivateCopyFunction(
1529         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1530         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1531     auto *BufSize = llvm::ConstantInt::get(
1532         CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
1533     Address CL =
1534       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1535                                                       CGF.VoidPtrTy);
1536     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
1537     llvm::Value *Args[] = {
1538         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1539         getThreadID(CGF, Loc),        // i32 <gtid>
1540         BufSize,                      // size_t <buf_size>
1541         CL.getPointer(),              // void *<copyprivate list>
1542         CpyFn,                        // void (*) (void *, void *) <copy_func>
1543         DidItVal                      // i32 did_it
1544     };
1545     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1546   }
1547 }
1548 
1549 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
1550                                         const RegionCodeGenTy &OrderedOpGen,
1551                                         SourceLocation Loc) {
1552   // __kmpc_ordered(ident_t *, gtid);
1553   // OrderedOpGen();
1554   // __kmpc_end_ordered(ident_t *, gtid);
1555   // Prepare arguments and build a call to __kmpc_ordered
1556   {
1557     CodeGenFunction::RunCleanupsScope Scope(CGF);
1558     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1559     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
1560     // Build a call to __kmpc_end_ordered
1561     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1562         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
1563         llvm::makeArrayRef(Args));
1564     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
1565   }
1566 }
1567 
1568 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1569                                       OpenMPDirectiveKind Kind, bool EmitChecks,
1570                                       bool ForceSimpleCall) {
1571   // Build call __kmpc_cancel_barrier(loc, thread_id);
1572   // Build call __kmpc_barrier(loc, thread_id);
1573   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1574   if (Kind == OMPD_for) {
1575     Flags =
1576         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1577   } else if (Kind == OMPD_sections) {
1578     Flags = static_cast<OpenMPLocationFlags>(Flags |
1579                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
1580   } else if (Kind == OMPD_single) {
1581     Flags =
1582         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1583   } else if (Kind == OMPD_barrier) {
1584     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1585   } else {
1586     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1587   }
1588   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
1589   // thread_id);
1590   auto *OMPRegionInfo =
1591       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
1592   // Do not emit barrier call in the single directive emitted in some rare cases
1593   // for sections directives.
1594   if (OMPRegionInfo && OMPRegionInfo->getDirectiveKind() == OMPD_single)
1595     return;
1596   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1597                          getThreadID(CGF, Loc)};
1598   if (OMPRegionInfo) {
1599     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
1600       auto *Result = CGF.EmitRuntimeCall(
1601           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1602       if (EmitChecks) {
1603         // if (__kmpc_cancel_barrier()) {
1604         //   exit from construct;
1605         // }
1606         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
1607         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
1608         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
1609         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
1610         CGF.EmitBlock(ExitBB);
1611         //   exit from construct;
1612         auto CancelDestination =
1613             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
1614         CGF.EmitBranchThroughCleanup(CancelDestination);
1615         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
1616       }
1617       return;
1618     }
1619   }
1620   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
1621 }
1622 
1623 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1624 /// the enum sched_type in kmp.h).
1625 enum OpenMPSchedType {
1626   /// \brief Lower bound for default (unordered) versions.
1627   OMP_sch_lower = 32,
1628   OMP_sch_static_chunked = 33,
1629   OMP_sch_static = 34,
1630   OMP_sch_dynamic_chunked = 35,
1631   OMP_sch_guided_chunked = 36,
1632   OMP_sch_runtime = 37,
1633   OMP_sch_auto = 38,
1634   /// \brief Lower bound for 'ordered' versions.
1635   OMP_ord_lower = 64,
1636   OMP_ord_static_chunked = 65,
1637   OMP_ord_static = 66,
1638   OMP_ord_dynamic_chunked = 67,
1639   OMP_ord_guided_chunked = 68,
1640   OMP_ord_runtime = 69,
1641   OMP_ord_auto = 70,
1642   OMP_sch_default = OMP_sch_static,
1643 };
1644 
1645 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
1646 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1647                                           bool Chunked, bool Ordered) {
1648   switch (ScheduleKind) {
1649   case OMPC_SCHEDULE_static:
1650     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
1651                    : (Ordered ? OMP_ord_static : OMP_sch_static);
1652   case OMPC_SCHEDULE_dynamic:
1653     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
1654   case OMPC_SCHEDULE_guided:
1655     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
1656   case OMPC_SCHEDULE_runtime:
1657     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
1658   case OMPC_SCHEDULE_auto:
1659     return Ordered ? OMP_ord_auto : OMP_sch_auto;
1660   case OMPC_SCHEDULE_unknown:
1661     assert(!Chunked && "chunk was specified but schedule kind not known");
1662     return Ordered ? OMP_ord_static : OMP_sch_static;
1663   }
1664   llvm_unreachable("Unexpected runtime schedule");
1665 }
1666 
1667 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1668                                          bool Chunked) const {
1669   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
1670   return Schedule == OMP_sch_static;
1671 }
1672 
1673 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1674   auto Schedule =
1675       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
1676   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1677   return Schedule != OMP_sch_static;
1678 }
1679 
1680 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
1681                                           SourceLocation Loc,
1682                                           OpenMPScheduleClauseKind ScheduleKind,
1683                                           unsigned IVSize, bool IVSigned,
1684                                           bool Ordered, llvm::Value *UB,
1685                                           llvm::Value *Chunk) {
1686   OpenMPSchedType Schedule =
1687       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1688   assert(Ordered ||
1689          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
1690           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
1691   // Call __kmpc_dispatch_init(
1692   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1693   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1694   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1695 
1696   // If the Chunk was not specified in the clause - use default value 1.
1697   if (Chunk == nullptr)
1698     Chunk = CGF.Builder.getIntN(IVSize, 1);
1699   llvm::Value *Args[] = {
1700     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1701     getThreadID(CGF, Loc),
1702     CGF.Builder.getInt32(Schedule), // Schedule type
1703     CGF.Builder.getIntN(IVSize, 0), // Lower
1704     UB,                             // Upper
1705     CGF.Builder.getIntN(IVSize, 1), // Stride
1706     Chunk                           // Chunk
1707   };
1708   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1709 }
1710 
1711 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
1712                                         SourceLocation Loc,
1713                                         OpenMPScheduleClauseKind ScheduleKind,
1714                                         unsigned IVSize, bool IVSigned,
1715                                         bool Ordered, Address IL, Address LB,
1716                                         Address UB, Address ST,
1717                                         llvm::Value *Chunk) {
1718   OpenMPSchedType Schedule =
1719     getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1720   assert(!Ordered);
1721   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
1722          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked);
1723 
1724   // Call __kmpc_for_static_init(
1725   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1726   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1727   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1728   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1729   if (Chunk == nullptr) {
1730     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
1731            "expected static non-chunked schedule");
1732     // If the Chunk was not specified in the clause - use default value 1.
1733       Chunk = CGF.Builder.getIntN(IVSize, 1);
1734   } else {
1735     assert((Schedule == OMP_sch_static_chunked ||
1736             Schedule == OMP_ord_static_chunked) &&
1737            "expected static chunked schedule");
1738   }
1739   llvm::Value *Args[] = {
1740     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1741     getThreadID(CGF, Loc),
1742     CGF.Builder.getInt32(Schedule), // Schedule type
1743     IL.getPointer(),                // &isLastIter
1744     LB.getPointer(),                // &LB
1745     UB.getPointer(),                // &UB
1746     ST.getPointer(),                // &Stride
1747     CGF.Builder.getIntN(IVSize, 1), // Incr
1748     Chunk                           // Chunk
1749   };
1750   CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1751 }
1752 
1753 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
1754                                           SourceLocation Loc) {
1755   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1756   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1757                          getThreadID(CGF, Loc)};
1758   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1759                       Args);
1760 }
1761 
1762 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
1763                                                  SourceLocation Loc,
1764                                                  unsigned IVSize,
1765                                                  bool IVSigned) {
1766   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
1767   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1768                          getThreadID(CGF, Loc)};
1769   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
1770 }
1771 
1772 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1773                                           SourceLocation Loc, unsigned IVSize,
1774                                           bool IVSigned, Address IL,
1775                                           Address LB, Address UB,
1776                                           Address ST) {
1777   // Call __kmpc_dispatch_next(
1778   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1779   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1780   //          kmp_int[32|64] *p_stride);
1781   llvm::Value *Args[] = {
1782       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1783       IL.getPointer(), // &isLastIter
1784       LB.getPointer(), // &Lower
1785       UB.getPointer(), // &Upper
1786       ST.getPointer()  // &Stride
1787   };
1788   llvm::Value *Call =
1789       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1790   return CGF.EmitScalarConversion(
1791       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1792       CGF.getContext().BoolTy, Loc);
1793 }
1794 
1795 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1796                                            llvm::Value *NumThreads,
1797                                            SourceLocation Loc) {
1798   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1799   llvm::Value *Args[] = {
1800       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1801       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1802   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1803                       Args);
1804 }
1805 
1806 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
1807                                          OpenMPProcBindClauseKind ProcBind,
1808                                          SourceLocation Loc) {
1809   // Constants for proc bind value accepted by the runtime.
1810   enum ProcBindTy {
1811     ProcBindFalse = 0,
1812     ProcBindTrue,
1813     ProcBindMaster,
1814     ProcBindClose,
1815     ProcBindSpread,
1816     ProcBindIntel,
1817     ProcBindDefault
1818   } RuntimeProcBind;
1819   switch (ProcBind) {
1820   case OMPC_PROC_BIND_master:
1821     RuntimeProcBind = ProcBindMaster;
1822     break;
1823   case OMPC_PROC_BIND_close:
1824     RuntimeProcBind = ProcBindClose;
1825     break;
1826   case OMPC_PROC_BIND_spread:
1827     RuntimeProcBind = ProcBindSpread;
1828     break;
1829   case OMPC_PROC_BIND_unknown:
1830     llvm_unreachable("Unsupported proc_bind value.");
1831   }
1832   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
1833   llvm::Value *Args[] = {
1834       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1835       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
1836   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
1837 }
1838 
1839 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1840                                 SourceLocation Loc) {
1841   // Build call void __kmpc_flush(ident_t *loc)
1842   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1843                       emitUpdateLocation(CGF, Loc));
1844 }
1845 
1846 namespace {
1847 /// \brief Indexes of fields for type kmp_task_t.
1848 enum KmpTaskTFields {
1849   /// \brief List of shared variables.
1850   KmpTaskTShareds,
1851   /// \brief Task routine.
1852   KmpTaskTRoutine,
1853   /// \brief Partition id for the untied tasks.
1854   KmpTaskTPartId,
1855   /// \brief Function with call of destructors for private variables.
1856   KmpTaskTDestructors,
1857 };
1858 } // anonymous namespace
1859 
1860 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
1861   if (!KmpRoutineEntryPtrTy) {
1862     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
1863     auto &C = CGM.getContext();
1864     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
1865     FunctionProtoType::ExtProtoInfo EPI;
1866     KmpRoutineEntryPtrQTy = C.getPointerType(
1867         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
1868     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
1869   }
1870 }
1871 
1872 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1873                                        QualType FieldTy) {
1874   auto *Field = FieldDecl::Create(
1875       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1876       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1877       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1878   Field->setAccess(AS_public);
1879   DC->addDecl(Field);
1880   return Field;
1881 }
1882 
1883 namespace {
1884 struct PrivateHelpersTy {
1885   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
1886                    const VarDecl *PrivateElemInit)
1887       : Original(Original), PrivateCopy(PrivateCopy),
1888         PrivateElemInit(PrivateElemInit) {}
1889   const VarDecl *Original;
1890   const VarDecl *PrivateCopy;
1891   const VarDecl *PrivateElemInit;
1892 };
1893 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
1894 } // anonymous namespace
1895 
1896 static RecordDecl *
1897 createPrivatesRecordDecl(CodeGenModule &CGM,
1898                          const ArrayRef<PrivateDataTy> Privates) {
1899   if (!Privates.empty()) {
1900     auto &C = CGM.getContext();
1901     // Build struct .kmp_privates_t. {
1902     //         /*  private vars  */
1903     //       };
1904     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
1905     RD->startDefinition();
1906     for (auto &&Pair : Privates) {
1907       auto *VD = Pair.second.Original;
1908       auto Type = VD->getType();
1909       Type = Type.getNonReferenceType();
1910       auto *FD = addFieldToRecordDecl(C, RD, Type);
1911       if (VD->hasAttrs()) {
1912         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
1913              E(VD->getAttrs().end());
1914              I != E; ++I)
1915           FD->addAttr(*I);
1916       }
1917     }
1918     RD->completeDefinition();
1919     return RD;
1920   }
1921   return nullptr;
1922 }
1923 
1924 static RecordDecl *
1925 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
1926                          QualType KmpRoutineEntryPointerQTy) {
1927   auto &C = CGM.getContext();
1928   // Build struct kmp_task_t {
1929   //         void *              shareds;
1930   //         kmp_routine_entry_t routine;
1931   //         kmp_int32           part_id;
1932   //         kmp_routine_entry_t destructors;
1933   //       };
1934   auto *RD = C.buildImplicitRecord("kmp_task_t");
1935   RD->startDefinition();
1936   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1937   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1938   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1939   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1940   RD->completeDefinition();
1941   return RD;
1942 }
1943 
1944 static RecordDecl *
1945 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
1946                                      const ArrayRef<PrivateDataTy> Privates) {
1947   auto &C = CGM.getContext();
1948   // Build struct kmp_task_t_with_privates {
1949   //         kmp_task_t task_data;
1950   //         .kmp_privates_t. privates;
1951   //       };
1952   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
1953   RD->startDefinition();
1954   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
1955   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
1956     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
1957   }
1958   RD->completeDefinition();
1959   return RD;
1960 }
1961 
1962 /// \brief Emit a proxy function which accepts kmp_task_t as the second
1963 /// argument.
1964 /// \code
1965 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
1966 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
1967 ///   tt->shareds);
1968 ///   return 0;
1969 /// }
1970 /// \endcode
1971 static llvm::Value *
1972 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
1973                       QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
1974                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
1975                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
1976                       llvm::Value *TaskPrivatesMap) {
1977   auto &C = CGM.getContext();
1978   FunctionArgList Args;
1979   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1980   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1981                                 /*Id=*/nullptr,
1982                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
1983   Args.push_back(&GtidArg);
1984   Args.push_back(&TaskTypeArg);
1985   FunctionType::ExtInfo Info;
1986   auto &TaskEntryFnInfo =
1987       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1988                                                     /*isVariadic=*/false);
1989   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
1990   auto *TaskEntry =
1991       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
1992                              ".omp_task_entry.", &CGM.getModule());
1993   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
1994   CodeGenFunction CGF(CGM);
1995   CGF.disableDebugInfo();
1996   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
1997 
1998   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
1999   // tt->task_data.shareds);
2000   auto *GtidParam = CGF.EmitLoadOfScalar(
2001       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
2002   LValue TDBase = emitLoadOfPointerLValue(
2003       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2004   auto *KmpTaskTWithPrivatesQTyRD =
2005       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2006   LValue Base =
2007       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
2008   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2009   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
2010   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
2011   auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
2012 
2013   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
2014   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
2015   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2016       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
2017       CGF.ConvertTypeForMem(SharedsPtrTy));
2018 
2019   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
2020   llvm::Value *PrivatesParam;
2021   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
2022     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
2023     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2024         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
2025   } else {
2026     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2027   }
2028 
2029   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
2030                              TaskPrivatesMap, SharedsParam};
2031   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
2032   CGF.EmitStoreThroughLValue(
2033       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
2034       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
2035   CGF.FinishFunction();
2036   return TaskEntry;
2037 }
2038 
2039 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
2040                                             SourceLocation Loc,
2041                                             QualType KmpInt32Ty,
2042                                             QualType KmpTaskTWithPrivatesPtrQTy,
2043                                             QualType KmpTaskTWithPrivatesQTy) {
2044   auto &C = CGM.getContext();
2045   FunctionArgList Args;
2046   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2047   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2048                                 /*Id=*/nullptr,
2049                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2050   Args.push_back(&GtidArg);
2051   Args.push_back(&TaskTypeArg);
2052   FunctionType::ExtInfo Info;
2053   auto &DestructorFnInfo =
2054       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2055                                                     /*isVariadic=*/false);
2056   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
2057   auto *DestructorFn =
2058       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
2059                              ".omp_task_destructor.", &CGM.getModule());
2060   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, DestructorFnInfo, DestructorFn);
2061   CodeGenFunction CGF(CGM);
2062   CGF.disableDebugInfo();
2063   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
2064                     Args);
2065 
2066   LValue Base = emitLoadOfPointerLValue(
2067       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2068   auto *KmpTaskTWithPrivatesQTyRD =
2069       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2070   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2071   Base = CGF.EmitLValueForField(Base, *FI);
2072   for (auto *Field :
2073        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
2074     if (auto DtorKind = Field->getType().isDestructedType()) {
2075       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
2076       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
2077     }
2078   }
2079   CGF.FinishFunction();
2080   return DestructorFn;
2081 }
2082 
2083 /// \brief Emit a privates mapping function for correct handling of private and
2084 /// firstprivate variables.
2085 /// \code
2086 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
2087 /// **noalias priv1,...,  <tyn> **noalias privn) {
2088 ///   *priv1 = &.privates.priv1;
2089 ///   ...;
2090 ///   *privn = &.privates.privn;
2091 /// }
2092 /// \endcode
2093 static llvm::Value *
2094 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
2095                                const ArrayRef<const Expr *> PrivateVars,
2096                                const ArrayRef<const Expr *> FirstprivateVars,
2097                                QualType PrivatesQTy,
2098                                const ArrayRef<PrivateDataTy> Privates) {
2099   auto &C = CGM.getContext();
2100   FunctionArgList Args;
2101   ImplicitParamDecl TaskPrivatesArg(
2102       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2103       C.getPointerType(PrivatesQTy).withConst().withRestrict());
2104   Args.push_back(&TaskPrivatesArg);
2105   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
2106   unsigned Counter = 1;
2107   for (auto *E: PrivateVars) {
2108     Args.push_back(ImplicitParamDecl::Create(
2109         C, /*DC=*/nullptr, Loc,
2110         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2111                             .withConst()
2112                             .withRestrict()));
2113     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2114     PrivateVarsPos[VD] = Counter;
2115     ++Counter;
2116   }
2117   for (auto *E : FirstprivateVars) {
2118     Args.push_back(ImplicitParamDecl::Create(
2119         C, /*DC=*/nullptr, Loc,
2120         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2121                             .withConst()
2122                             .withRestrict()));
2123     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2124     PrivateVarsPos[VD] = Counter;
2125     ++Counter;
2126   }
2127   FunctionType::ExtInfo Info;
2128   auto &TaskPrivatesMapFnInfo =
2129       CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
2130                                                     /*isVariadic=*/false);
2131   auto *TaskPrivatesMapTy =
2132       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
2133   auto *TaskPrivatesMap = llvm::Function::Create(
2134       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
2135       ".omp_task_privates_map.", &CGM.getModule());
2136   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskPrivatesMapFnInfo,
2137                                 TaskPrivatesMap);
2138   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
2139   CodeGenFunction CGF(CGM);
2140   CGF.disableDebugInfo();
2141   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
2142                     TaskPrivatesMapFnInfo, Args);
2143 
2144   // *privi = &.privates.privi;
2145   LValue Base = emitLoadOfPointerLValue(
2146       CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType());
2147   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
2148   Counter = 0;
2149   for (auto *Field : PrivatesQTyRD->fields()) {
2150     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
2151     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
2152     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
2153     auto RefLoadLVal =
2154         emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType());
2155     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
2156     ++Counter;
2157   }
2158   CGF.FinishFunction();
2159   return TaskPrivatesMap;
2160 }
2161 
2162 static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) {
2163   auto &C = CGF.getContext();
2164   llvm::Value *Size;
2165   auto SizeInChars = C.getTypeSizeInChars(Ty);
2166   if (SizeInChars.isZero()) {
2167     // getTypeSizeInChars() returns 0 for a VLA.
2168     Size = nullptr;
2169     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
2170       llvm::Value *ArraySize;
2171       std::tie(ArraySize, Ty) = CGF.getVLASize(VAT);
2172       Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
2173     }
2174     SizeInChars = C.getTypeSizeInChars(Ty);
2175     assert(!SizeInChars.isZero());
2176     Size = CGF.Builder.CreateNUWMul(
2177         Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()));
2178   } else
2179     Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity());
2180   return Size;
2181 }
2182 
2183 static int array_pod_sort_comparator(const PrivateDataTy *P1,
2184                                      const PrivateDataTy *P2) {
2185   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
2186 }
2187 
2188 void CGOpenMPRuntime::emitTaskCall(
2189     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
2190     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
2191     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
2192     const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
2193     ArrayRef<const Expr *> PrivateCopies,
2194     ArrayRef<const Expr *> FirstprivateVars,
2195     ArrayRef<const Expr *> FirstprivateCopies,
2196     ArrayRef<const Expr *> FirstprivateInits,
2197     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
2198   auto &C = CGM.getContext();
2199   llvm::SmallVector<PrivateDataTy, 8> Privates;
2200   // Aggregate privates and sort them by the alignment.
2201   auto I = PrivateCopies.begin();
2202   for (auto *E : PrivateVars) {
2203     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2204     Privates.push_back(std::make_pair(
2205         C.getDeclAlign(VD),
2206         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2207                          /*PrivateElemInit=*/nullptr)));
2208     ++I;
2209   }
2210   I = FirstprivateCopies.begin();
2211   auto IElemInitRef = FirstprivateInits.begin();
2212   for (auto *E : FirstprivateVars) {
2213     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2214     Privates.push_back(std::make_pair(
2215         C.getDeclAlign(VD),
2216         PrivateHelpersTy(
2217             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2218             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
2219     ++I, ++IElemInitRef;
2220   }
2221   llvm::array_pod_sort(Privates.begin(), Privates.end(),
2222                        array_pod_sort_comparator);
2223   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2224   // Build type kmp_routine_entry_t (if not built yet).
2225   emitKmpRoutineEntryT(KmpInt32Ty);
2226   // Build type kmp_task_t (if not built yet).
2227   if (KmpTaskTQTy.isNull()) {
2228     KmpTaskTQTy = C.getRecordType(
2229         createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
2230   }
2231   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2232   // Build particular struct kmp_task_t for the given task.
2233   auto *KmpTaskTWithPrivatesQTyRD =
2234       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
2235   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
2236   QualType KmpTaskTWithPrivatesPtrQTy =
2237       C.getPointerType(KmpTaskTWithPrivatesQTy);
2238   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
2239   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
2240   auto KmpTaskTWithPrivatesTySize =
2241       CGM.getSize(C.getTypeSizeInChars(KmpTaskTWithPrivatesQTy));
2242   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
2243 
2244   // Emit initial values for private copies (if any).
2245   llvm::Value *TaskPrivatesMap = nullptr;
2246   auto *TaskPrivatesMapTy =
2247       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
2248                 3)
2249           ->getType();
2250   if (!Privates.empty()) {
2251     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2252     TaskPrivatesMap = emitTaskPrivateMappingFunction(
2253         CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
2254     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2255         TaskPrivatesMap, TaskPrivatesMapTy);
2256   } else {
2257     TaskPrivatesMap = llvm::ConstantPointerNull::get(
2258         cast<llvm::PointerType>(TaskPrivatesMapTy));
2259   }
2260   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
2261   // kmp_task_t *tt);
2262   auto *TaskEntry = emitProxyTaskFunction(
2263       CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
2264       KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
2265 
2266   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2267   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2268   // kmp_routine_entry_t *task_entry);
2269   // Task flags. Format is taken from
2270   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
2271   // description of kmp_tasking_flags struct.
2272   const unsigned TiedFlag = 0x1;
2273   const unsigned FinalFlag = 0x2;
2274   unsigned Flags = Tied ? TiedFlag : 0;
2275   auto *TaskFlags =
2276       Final.getPointer()
2277           ? CGF.Builder.CreateSelect(Final.getPointer(),
2278                                      CGF.Builder.getInt32(FinalFlag),
2279                                      CGF.Builder.getInt32(/*C=*/0))
2280           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
2281   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
2282   auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
2283   llvm::Value *AllocArgs[] = {
2284       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags,
2285       KmpTaskTWithPrivatesTySize, CGM.getSize(SharedsSize),
2286       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry,
2287                                                       KmpRoutineEntryPtrTy)};
2288   auto *NewTask = CGF.EmitRuntimeCall(
2289       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
2290   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2291       NewTask, KmpTaskTWithPrivatesPtrTy);
2292   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
2293                                                KmpTaskTWithPrivatesQTy);
2294   LValue TDBase =
2295       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
2296   // Fill the data in the resulting kmp_task_t record.
2297   // Copy shareds if there are any.
2298   Address KmpTaskSharedsPtr = Address::invalid();
2299   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
2300     KmpTaskSharedsPtr =
2301         Address(CGF.EmitLoadOfScalar(
2302                     CGF.EmitLValueForField(
2303                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
2304                                            KmpTaskTShareds)),
2305                     Loc),
2306                 CGF.getNaturalTypeAlignment(SharedsTy));
2307     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
2308   }
2309   // Emit initial values for private copies (if any).
2310   bool NeedsCleanup = false;
2311   if (!Privates.empty()) {
2312     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2313     auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
2314     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
2315     LValue SharedsBase;
2316     if (!FirstprivateVars.empty()) {
2317       SharedsBase = CGF.MakeAddrLValue(
2318           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2319               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
2320           SharedsTy);
2321     }
2322     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
2323         cast<CapturedStmt>(*D.getAssociatedStmt()));
2324     for (auto &&Pair : Privates) {
2325       auto *VD = Pair.second.PrivateCopy;
2326       auto *Init = VD->getAnyInitializer();
2327       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
2328       if (Init) {
2329         if (auto *Elem = Pair.second.PrivateElemInit) {
2330           auto *OriginalVD = Pair.second.Original;
2331           auto *SharedField = CapturesInfo.lookup(OriginalVD);
2332           auto SharedRefLValue =
2333               CGF.EmitLValueForField(SharedsBase, SharedField);
2334           SharedRefLValue = CGF.MakeAddrLValue(
2335               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
2336               SharedRefLValue.getType(), AlignmentSource::Decl);
2337           QualType Type = OriginalVD->getType();
2338           if (Type->isArrayType()) {
2339             // Initialize firstprivate array.
2340             if (!isa<CXXConstructExpr>(Init) ||
2341                 CGF.isTrivialInitializer(Init)) {
2342               // Perform simple memcpy.
2343               CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
2344                                       SharedRefLValue.getAddress(), Type);
2345             } else {
2346               // Initialize firstprivate array using element-by-element
2347               // intialization.
2348               CGF.EmitOMPAggregateAssign(
2349                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
2350                   Type, [&CGF, Elem, Init, &CapturesInfo](
2351                             Address DestElement, Address SrcElement) {
2352                     // Clean up any temporaries needed by the initialization.
2353                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
2354                     InitScope.addPrivate(Elem, [SrcElement]() -> Address {
2355                       return SrcElement;
2356                     });
2357                     (void)InitScope.Privatize();
2358                     // Emit initialization for single element.
2359                     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
2360                         CGF, &CapturesInfo);
2361                     CGF.EmitAnyExprToMem(Init, DestElement,
2362                                          Init->getType().getQualifiers(),
2363                                          /*IsInitializer=*/false);
2364                   });
2365             }
2366           } else {
2367             CodeGenFunction::OMPPrivateScope InitScope(CGF);
2368             InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
2369               return SharedRefLValue.getAddress();
2370             });
2371             (void)InitScope.Privatize();
2372             CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
2373             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
2374                                /*capturedByInit=*/false);
2375           }
2376         } else {
2377           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
2378         }
2379       }
2380       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
2381       ++FI;
2382     }
2383   }
2384   // Provide pointer to function with destructors for privates.
2385   llvm::Value *DestructorFn =
2386       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
2387                                              KmpTaskTWithPrivatesPtrQTy,
2388                                              KmpTaskTWithPrivatesQTy)
2389                    : llvm::ConstantPointerNull::get(
2390                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
2391   LValue Destructor = CGF.EmitLValueForField(
2392       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
2393   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2394                             DestructorFn, KmpRoutineEntryPtrTy),
2395                         Destructor);
2396 
2397   // Process list of dependences.
2398   Address DependenciesArray = Address::invalid();
2399   unsigned NumDependencies = Dependences.size();
2400   if (NumDependencies) {
2401     // Dependence kind for RTL.
2402     enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 };
2403     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
2404     RecordDecl *KmpDependInfoRD;
2405     QualType FlagsTy = C.getIntTypeForBitwidth(
2406         C.toBits(C.getTypeSizeInChars(C.BoolTy)), /*Signed=*/false);
2407     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
2408     if (KmpDependInfoTy.isNull()) {
2409       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
2410       KmpDependInfoRD->startDefinition();
2411       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
2412       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
2413       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
2414       KmpDependInfoRD->completeDefinition();
2415       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
2416     } else {
2417       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
2418     }
2419     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
2420     // Define type kmp_depend_info[<Dependences.size()>];
2421     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
2422         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
2423         ArrayType::Normal, /*IndexTypeQuals=*/0);
2424     // kmp_depend_info[<Dependences.size()>] deps;
2425     DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
2426     for (unsigned i = 0; i < NumDependencies; ++i) {
2427       const Expr *E = Dependences[i].second;
2428       auto Addr = CGF.EmitLValue(E);
2429       llvm::Value *Size;
2430       QualType Ty = E->getType();
2431       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
2432         LValue UpAddrLVal =
2433             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
2434         llvm::Value *UpAddr =
2435             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
2436         llvm::Value *LowIntPtr =
2437             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
2438         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
2439         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
2440       } else {
2441         Size = getTypeSize(CGF, Ty);
2442       }
2443       auto Base = CGF.MakeAddrLValue(
2444           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
2445           KmpDependInfoTy);
2446       // deps[i].base_addr = &<Dependences[i].second>;
2447       auto BaseAddrLVal = CGF.EmitLValueForField(
2448           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
2449       CGF.EmitStoreOfScalar(
2450           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
2451           BaseAddrLVal);
2452       // deps[i].len = sizeof(<Dependences[i].second>);
2453       auto LenLVal = CGF.EmitLValueForField(
2454           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
2455       CGF.EmitStoreOfScalar(Size, LenLVal);
2456       // deps[i].flags = <Dependences[i].first>;
2457       RTLDependenceKindTy DepKind;
2458       switch (Dependences[i].first) {
2459       case OMPC_DEPEND_in:
2460         DepKind = DepIn;
2461         break;
2462       case OMPC_DEPEND_out:
2463         DepKind = DepOut;
2464         break;
2465       case OMPC_DEPEND_inout:
2466         DepKind = DepInOut;
2467         break;
2468       case OMPC_DEPEND_unknown:
2469         llvm_unreachable("Unknown task dependence type");
2470       }
2471       auto FlagsLVal = CGF.EmitLValueForField(
2472           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
2473       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
2474                             FlagsLVal);
2475     }
2476     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2477         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
2478         CGF.VoidPtrTy);
2479   }
2480 
2481   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
2482   // libcall.
2483   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2484   // *new_task);
2485   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2486   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2487   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
2488   // list is not empty
2489   auto *ThreadID = getThreadID(CGF, Loc);
2490   auto *UpLoc = emitUpdateLocation(CGF, Loc);
2491   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
2492   llvm::Value *DepTaskArgs[7];
2493   if (NumDependencies) {
2494     DepTaskArgs[0] = UpLoc;
2495     DepTaskArgs[1] = ThreadID;
2496     DepTaskArgs[2] = NewTask;
2497     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
2498     DepTaskArgs[4] = DependenciesArray.getPointer();
2499     DepTaskArgs[5] = CGF.Builder.getInt32(0);
2500     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2501   }
2502   auto &&ThenCodeGen = [this, NumDependencies,
2503                         &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
2504     // TODO: add check for untied tasks.
2505     if (NumDependencies) {
2506       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
2507                           DepTaskArgs);
2508     } else {
2509       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
2510                           TaskArgs);
2511     }
2512   };
2513   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
2514       IfCallEndCleanup;
2515 
2516   llvm::Value *DepWaitTaskArgs[6];
2517   if (NumDependencies) {
2518     DepWaitTaskArgs[0] = UpLoc;
2519     DepWaitTaskArgs[1] = ThreadID;
2520     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
2521     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
2522     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
2523     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2524   }
2525   auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
2526                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
2527     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
2528     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2529     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
2530     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
2531     // is specified.
2532     if (NumDependencies)
2533       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
2534                           DepWaitTaskArgs);
2535     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
2536     // kmp_task_t *new_task);
2537     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
2538                         TaskArgs);
2539     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
2540     // kmp_task_t *new_task);
2541     CGF.EHStack.pushCleanup<IfCallEndCleanup>(
2542         NormalAndEHCleanup,
2543         createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
2544         llvm::makeArrayRef(TaskArgs));
2545 
2546     // Call proxy_task_entry(gtid, new_task);
2547     llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
2548     CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
2549   };
2550 
2551   if (IfCond) {
2552     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
2553   } else {
2554     CodeGenFunction::RunCleanupsScope Scope(CGF);
2555     ThenCodeGen(CGF);
2556   }
2557 }
2558 
2559 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
2560                                           llvm::Type *ArgsType,
2561                                           ArrayRef<const Expr *> LHSExprs,
2562                                           ArrayRef<const Expr *> RHSExprs,
2563                                           ArrayRef<const Expr *> ReductionOps) {
2564   auto &C = CGM.getContext();
2565 
2566   // void reduction_func(void *LHSArg, void *RHSArg);
2567   FunctionArgList Args;
2568   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2569                            C.VoidPtrTy);
2570   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2571                            C.VoidPtrTy);
2572   Args.push_back(&LHSArg);
2573   Args.push_back(&RHSArg);
2574   FunctionType::ExtInfo EI;
2575   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
2576       C.VoidTy, Args, EI, /*isVariadic=*/false);
2577   auto *Fn = llvm::Function::Create(
2578       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2579       ".omp.reduction.reduction_func", &CGM.getModule());
2580   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
2581   CodeGenFunction CGF(CGM);
2582   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2583 
2584   // Dst = (void*[n])(LHSArg);
2585   // Src = (void*[n])(RHSArg);
2586   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2587       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2588       ArgsType), CGF.getPointerAlign());
2589   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2590       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2591       ArgsType), CGF.getPointerAlign());
2592 
2593   //  ...
2594   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
2595   //  ...
2596   CodeGenFunction::OMPPrivateScope Scope(CGF);
2597   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
2598     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
2599     Scope.addPrivate(RHSVar, [&]() -> Address {
2600       return emitAddrOfVarFromArray(CGF, RHS, I, RHSVar);
2601     });
2602     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
2603     Scope.addPrivate(LHSVar, [&]() -> Address {
2604       return emitAddrOfVarFromArray(CGF, LHS, I, LHSVar);
2605     });
2606   }
2607   Scope.Privatize();
2608   for (auto *E : ReductionOps) {
2609     CGF.EmitIgnoredExpr(E);
2610   }
2611   Scope.ForceCleanup();
2612   CGF.FinishFunction();
2613   return Fn;
2614 }
2615 
2616 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
2617                                     ArrayRef<const Expr *> LHSExprs,
2618                                     ArrayRef<const Expr *> RHSExprs,
2619                                     ArrayRef<const Expr *> ReductionOps,
2620                                     bool WithNowait, bool SimpleReduction) {
2621   // Next code should be emitted for reduction:
2622   //
2623   // static kmp_critical_name lock = { 0 };
2624   //
2625   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
2626   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
2627   //  ...
2628   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
2629   //  *(Type<n>-1*)rhs[<n>-1]);
2630   // }
2631   //
2632   // ...
2633   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
2634   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2635   // RedList, reduce_func, &<lock>)) {
2636   // case 1:
2637   //  ...
2638   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2639   //  ...
2640   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2641   // break;
2642   // case 2:
2643   //  ...
2644   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2645   //  ...
2646   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
2647   // break;
2648   // default:;
2649   // }
2650   //
2651   // if SimpleReduction is true, only the next code is generated:
2652   //  ...
2653   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2654   //  ...
2655 
2656   auto &C = CGM.getContext();
2657 
2658   if (SimpleReduction) {
2659     CodeGenFunction::RunCleanupsScope Scope(CGF);
2660     for (auto *E : ReductionOps) {
2661       CGF.EmitIgnoredExpr(E);
2662     }
2663     return;
2664   }
2665 
2666   // 1. Build a list of reduction variables.
2667   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
2668   llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size());
2669   QualType ReductionArrayTy =
2670       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2671                              /*IndexTypeQuals=*/0);
2672   Address ReductionList =
2673       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
2674   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
2675     Address Elem =
2676       CGF.Builder.CreateConstArrayGEP(ReductionList, I, CGF.getPointerSize());
2677     CGF.Builder.CreateStore(
2678         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2679             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
2680         Elem);
2681   }
2682 
2683   // 2. Emit reduce_func().
2684   auto *ReductionFn = emitReductionFunction(
2685       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs,
2686       RHSExprs, ReductionOps);
2687 
2688   // 3. Create static kmp_critical_name lock = { 0 };
2689   auto *Lock = getCriticalRegionLock(".reduction");
2690 
2691   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2692   // RedList, reduce_func, &<lock>);
2693   auto *IdentTLoc = emitUpdateLocation(
2694       CGF, Loc,
2695       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
2696   auto *ThreadId = getThreadID(CGF, Loc);
2697   auto *ReductionArrayTySize = llvm::ConstantInt::get(
2698       CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
2699   auto *RL =
2700     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
2701                                                     CGF.VoidPtrTy);
2702   llvm::Value *Args[] = {
2703       IdentTLoc,                             // ident_t *<loc>
2704       ThreadId,                              // i32 <gtid>
2705       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
2706       ReductionArrayTySize,                  // size_type sizeof(RedList)
2707       RL,                                    // void *RedList
2708       ReductionFn, // void (*) (void *, void *) <reduce_func>
2709       Lock         // kmp_critical_name *&<lock>
2710   };
2711   auto Res = CGF.EmitRuntimeCall(
2712       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
2713                                        : OMPRTL__kmpc_reduce),
2714       Args);
2715 
2716   // 5. Build switch(res)
2717   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
2718   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
2719 
2720   // 6. Build case 1:
2721   //  ...
2722   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2723   //  ...
2724   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2725   // break;
2726   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
2727   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
2728   CGF.EmitBlock(Case1BB);
2729 
2730   {
2731     CodeGenFunction::RunCleanupsScope Scope(CGF);
2732     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2733     llvm::Value *EndArgs[] = {
2734         IdentTLoc, // ident_t *<loc>
2735         ThreadId,  // i32 <gtid>
2736         Lock       // kmp_critical_name *&<lock>
2737     };
2738     CGF.EHStack
2739         .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2740             NormalAndEHCleanup,
2741             createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
2742                                              : OMPRTL__kmpc_end_reduce),
2743             llvm::makeArrayRef(EndArgs));
2744     for (auto *E : ReductionOps) {
2745       CGF.EmitIgnoredExpr(E);
2746     }
2747   }
2748 
2749   CGF.EmitBranch(DefaultBB);
2750 
2751   // 7. Build case 2:
2752   //  ...
2753   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2754   //  ...
2755   // break;
2756   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
2757   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
2758   CGF.EmitBlock(Case2BB);
2759 
2760   {
2761     CodeGenFunction::RunCleanupsScope Scope(CGF);
2762     if (!WithNowait) {
2763       // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
2764       llvm::Value *EndArgs[] = {
2765           IdentTLoc, // ident_t *<loc>
2766           ThreadId,  // i32 <gtid>
2767           Lock       // kmp_critical_name *&<lock>
2768       };
2769       CGF.EHStack
2770           .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2771               NormalAndEHCleanup,
2772               createRuntimeFunction(OMPRTL__kmpc_end_reduce),
2773               llvm::makeArrayRef(EndArgs));
2774     }
2775     auto I = LHSExprs.begin();
2776     for (auto *E : ReductionOps) {
2777       const Expr *XExpr = nullptr;
2778       const Expr *EExpr = nullptr;
2779       const Expr *UpExpr = nullptr;
2780       BinaryOperatorKind BO = BO_Comma;
2781       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
2782         if (BO->getOpcode() == BO_Assign) {
2783           XExpr = BO->getLHS();
2784           UpExpr = BO->getRHS();
2785         }
2786       }
2787       // Try to emit update expression as a simple atomic.
2788       auto *RHSExpr = UpExpr;
2789       if (RHSExpr) {
2790         // Analyze RHS part of the whole expression.
2791         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
2792                 RHSExpr->IgnoreParenImpCasts())) {
2793           // If this is a conditional operator, analyze its condition for
2794           // min/max reduction operator.
2795           RHSExpr = ACO->getCond();
2796         }
2797         if (auto *BORHS =
2798                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
2799           EExpr = BORHS->getRHS();
2800           BO = BORHS->getOpcode();
2801         }
2802       }
2803       if (XExpr) {
2804         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2805         LValue X = CGF.EmitLValue(XExpr);
2806         RValue E;
2807         if (EExpr)
2808           E = CGF.EmitAnyExpr(EExpr);
2809         CGF.EmitOMPAtomicSimpleUpdateExpr(
2810             X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
2811             [&CGF, UpExpr, VD](RValue XRValue) {
2812               CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
2813               PrivateScope.addPrivate(
2814                   VD, [&CGF, VD, XRValue]() -> Address {
2815                     Address LHSTemp = CGF.CreateMemTemp(VD->getType());
2816                     CGF.EmitStoreThroughLValue(
2817                         XRValue,
2818                         CGF.MakeAddrLValue(LHSTemp, VD->getType()));
2819                     return LHSTemp;
2820                   });
2821               (void)PrivateScope.Privatize();
2822               return CGF.EmitAnyExpr(UpExpr);
2823             });
2824       } else {
2825         // Emit as a critical region.
2826         emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) {
2827           CGF.EmitIgnoredExpr(E);
2828         }, Loc);
2829       }
2830       ++I;
2831     }
2832   }
2833 
2834   CGF.EmitBranch(DefaultBB);
2835   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
2836 }
2837 
2838 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
2839                                        SourceLocation Loc) {
2840   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
2841   // global_tid);
2842   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2843   // Ignore return result until untied tasks are supported.
2844   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
2845 }
2846 
2847 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
2848                                            OpenMPDirectiveKind InnerKind,
2849                                            const RegionCodeGenTy &CodeGen,
2850                                            bool HasCancel) {
2851   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
2852   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
2853 }
2854 
2855 namespace {
2856 enum RTCancelKind {
2857   CancelNoreq = 0,
2858   CancelParallel = 1,
2859   CancelLoop = 2,
2860   CancelSections = 3,
2861   CancelTaskgroup = 4
2862 };
2863 }
2864 
2865 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
2866   RTCancelKind CancelKind = CancelNoreq;
2867   if (CancelRegion == OMPD_parallel)
2868     CancelKind = CancelParallel;
2869   else if (CancelRegion == OMPD_for)
2870     CancelKind = CancelLoop;
2871   else if (CancelRegion == OMPD_sections)
2872     CancelKind = CancelSections;
2873   else {
2874     assert(CancelRegion == OMPD_taskgroup);
2875     CancelKind = CancelTaskgroup;
2876   }
2877   return CancelKind;
2878 }
2879 
2880 void CGOpenMPRuntime::emitCancellationPointCall(
2881     CodeGenFunction &CGF, SourceLocation Loc,
2882     OpenMPDirectiveKind CancelRegion) {
2883   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2884   // global_tid, kmp_int32 cncl_kind);
2885   if (auto *OMPRegionInfo =
2886           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2887     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
2888       return;
2889     if (OMPRegionInfo->hasCancel()) {
2890       llvm::Value *Args[] = {
2891           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2892           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
2893       // Ignore return result until untied tasks are supported.
2894       auto *Result = CGF.EmitRuntimeCall(
2895           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
2896       // if (__kmpc_cancellationpoint()) {
2897       //  __kmpc_cancel_barrier();
2898       //   exit from construct;
2899       // }
2900       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2901       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2902       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2903       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2904       CGF.EmitBlock(ExitBB);
2905       // __kmpc_cancel_barrier();
2906       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
2907       // exit from construct;
2908       auto CancelDest =
2909           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2910       CGF.EmitBranchThroughCleanup(CancelDest);
2911       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2912     }
2913   }
2914 }
2915 
2916 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
2917                                      const Expr *IfCond,
2918                                      OpenMPDirectiveKind CancelRegion) {
2919   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2920   // kmp_int32 cncl_kind);
2921   if (auto *OMPRegionInfo =
2922           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2923     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
2924       return;
2925     auto &&ThenGen = [this, Loc, CancelRegion,
2926                       OMPRegionInfo](CodeGenFunction &CGF) {
2927       llvm::Value *Args[] = {
2928           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2929           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
2930       // Ignore return result until untied tasks are supported.
2931       auto *Result =
2932           CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
2933       // if (__kmpc_cancel()) {
2934       //  __kmpc_cancel_barrier();
2935       //   exit from construct;
2936       // }
2937       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2938       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2939       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2940       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2941       CGF.EmitBlock(ExitBB);
2942       // __kmpc_cancel_barrier();
2943       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
2944       // exit from construct;
2945       auto CancelDest =
2946           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2947       CGF.EmitBranchThroughCleanup(CancelDest);
2948       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2949     };
2950     if (IfCond)
2951       emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {});
2952     else
2953       ThenGen(CGF);
2954   }
2955 }
2956