1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CGCleanup.h"
17 #include "clang/AST/Decl.h"
18 #include "clang/AST/StmtOpenMP.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cassert>
26 
27 using namespace clang;
28 using namespace CodeGen;
29 
30 namespace {
31 /// \brief Base class for handling code generation inside OpenMP regions.
32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
33 public:
34   /// \brief Kinds of OpenMP regions used in codegen.
35   enum CGOpenMPRegionKind {
36     /// \brief Region with outlined function for standalone 'parallel'
37     /// directive.
38     ParallelOutlinedRegion,
39     /// \brief Region with outlined function for standalone 'task' directive.
40     TaskOutlinedRegion,
41     /// \brief Region for constructs that do not require function outlining,
42     /// like 'for', 'sections', 'atomic' etc. directives.
43     InlinedRegion,
44   };
45 
46   CGOpenMPRegionInfo(const CapturedStmt &CS,
47                      const CGOpenMPRegionKind RegionKind,
48                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind)
49       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
50         CodeGen(CodeGen), Kind(Kind) {}
51 
52   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
53                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind)
54       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
55         Kind(Kind) {}
56 
57   /// \brief Get a variable or parameter for storing global thread id
58   /// inside OpenMP construct.
59   virtual const VarDecl *getThreadIDVariable() const = 0;
60 
61   /// \brief Emit the captured statement body.
62   virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
63 
64   /// \brief Get an LValue for the current ThreadID variable.
65   /// \return LValue for thread id variable. This LValue always has type int32*.
66   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
67 
68   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
69 
70   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
71 
72   static bool classof(const CGCapturedStmtInfo *Info) {
73     return Info->getKind() == CR_OpenMP;
74   }
75 
76 protected:
77   CGOpenMPRegionKind RegionKind;
78   const RegionCodeGenTy &CodeGen;
79   OpenMPDirectiveKind Kind;
80 };
81 
82 /// \brief API for captured statement code generation in OpenMP constructs.
83 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
84 public:
85   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
86                              const RegionCodeGenTy &CodeGen,
87                              OpenMPDirectiveKind Kind)
88       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind),
89         ThreadIDVar(ThreadIDVar) {
90     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
91   }
92   /// \brief Get a variable or parameter for storing global thread id
93   /// inside OpenMP construct.
94   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
95 
96   /// \brief Get the name of the capture helper.
97   StringRef getHelperName() const override { return ".omp_outlined."; }
98 
99   static bool classof(const CGCapturedStmtInfo *Info) {
100     return CGOpenMPRegionInfo::classof(Info) &&
101            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
102                ParallelOutlinedRegion;
103   }
104 
105 private:
106   /// \brief A variable or parameter storing global thread id for OpenMP
107   /// constructs.
108   const VarDecl *ThreadIDVar;
109 };
110 
111 /// \brief API for captured statement code generation in OpenMP constructs.
112 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
113 public:
114   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
115                                  const VarDecl *ThreadIDVar,
116                                  const RegionCodeGenTy &CodeGen,
117                                  OpenMPDirectiveKind Kind)
118       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind),
119         ThreadIDVar(ThreadIDVar) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122   /// \brief Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// \brief Get an LValue for the current ThreadID variable.
127   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
128 
129   /// \brief Get the name of the capture helper.
130   StringRef getHelperName() const override { return ".omp_outlined."; }
131 
132   static bool classof(const CGCapturedStmtInfo *Info) {
133     return CGOpenMPRegionInfo::classof(Info) &&
134            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
135                TaskOutlinedRegion;
136   }
137 
138 private:
139   /// \brief A variable or parameter storing global thread id for OpenMP
140   /// constructs.
141   const VarDecl *ThreadIDVar;
142 };
143 
144 /// \brief API for inlined captured statement code generation in OpenMP
145 /// constructs.
146 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
147 public:
148   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
149                             const RegionCodeGenTy &CodeGen,
150                             OpenMPDirectiveKind Kind)
151       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind), OldCSI(OldCSI),
152         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
153   // \brief Retrieve the value of the context parameter.
154   llvm::Value *getContextValue() const override {
155     if (OuterRegionInfo)
156       return OuterRegionInfo->getContextValue();
157     llvm_unreachable("No context value for inlined OpenMP region");
158   }
159   virtual void setContextValue(llvm::Value *V) override {
160     if (OuterRegionInfo) {
161       OuterRegionInfo->setContextValue(V);
162       return;
163     }
164     llvm_unreachable("No context value for inlined OpenMP region");
165   }
166   /// \brief Lookup the captured field decl for a variable.
167   const FieldDecl *lookup(const VarDecl *VD) const override {
168     if (OuterRegionInfo)
169       return OuterRegionInfo->lookup(VD);
170     // If there is no outer outlined region,no need to lookup in a list of
171     // captured variables, we can use the original one.
172     return nullptr;
173   }
174   FieldDecl *getThisFieldDecl() const override {
175     if (OuterRegionInfo)
176       return OuterRegionInfo->getThisFieldDecl();
177     return nullptr;
178   }
179   /// \brief Get a variable or parameter for storing global thread id
180   /// inside OpenMP construct.
181   const VarDecl *getThreadIDVariable() const override {
182     if (OuterRegionInfo)
183       return OuterRegionInfo->getThreadIDVariable();
184     return nullptr;
185   }
186 
187   /// \brief Get the name of the capture helper.
188   StringRef getHelperName() const override {
189     if (auto *OuterRegionInfo = getOldCSI())
190       return OuterRegionInfo->getHelperName();
191     llvm_unreachable("No helper name for inlined OpenMP construct");
192   }
193 
194   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
195 
196   static bool classof(const CGCapturedStmtInfo *Info) {
197     return CGOpenMPRegionInfo::classof(Info) &&
198            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
199   }
200 
201 private:
202   /// \brief CodeGen info about outer OpenMP region.
203   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
204   CGOpenMPRegionInfo *OuterRegionInfo;
205 };
206 
207 /// \brief RAII for emitting code of OpenMP constructs.
208 class InlinedOpenMPRegionRAII {
209   CodeGenFunction &CGF;
210 
211 public:
212   /// \brief Constructs region for combined constructs.
213   /// \param CodeGen Code generation sequence for combined directives. Includes
214   /// a list of functions used for code generation of implicitly inlined
215   /// regions.
216   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
217                           OpenMPDirectiveKind Kind)
218       : CGF(CGF) {
219     // Start emission for the construct.
220     CGF.CapturedStmtInfo =
221         new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen, Kind);
222   }
223   ~InlinedOpenMPRegionRAII() {
224     // Restore original CapturedStmtInfo only if we're done with code emission.
225     auto *OldCSI =
226         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
227     delete CGF.CapturedStmtInfo;
228     CGF.CapturedStmtInfo = OldCSI;
229   }
230 };
231 
232 } // namespace
233 
234 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
235   return CGF.MakeNaturalAlignAddrLValue(
236       CGF.Builder.CreateAlignedLoad(
237           CGF.GetAddrOfLocalVar(getThreadIDVariable()),
238           CGF.PointerAlignInBytes),
239       getThreadIDVariable()
240           ->getType()
241           ->castAs<PointerType>()
242           ->getPointeeType());
243 }
244 
245 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
246   // 1.2.2 OpenMP Language Terminology
247   // Structured block - An executable statement with a single entry at the
248   // top and a single exit at the bottom.
249   // The point of exit cannot be a branch out of the structured block.
250   // longjmp() and throw() must not violate the entry/exit criteria.
251   CGF.EHStack.pushTerminate();
252   {
253     CodeGenFunction::RunCleanupsScope Scope(CGF);
254     CodeGen(CGF);
255   }
256   CGF.EHStack.popTerminate();
257 }
258 
259 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
260     CodeGenFunction &CGF) {
261   return CGF.MakeNaturalAlignAddrLValue(
262       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
263       getThreadIDVariable()->getType());
264 }
265 
266 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
267     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
268   IdentTy = llvm::StructType::create(
269       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
270       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
271       CGM.Int8PtrTy /* psource */, nullptr);
272   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
273   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
274                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
275   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
276   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
277 }
278 
279 void CGOpenMPRuntime::clear() {
280   InternalVars.clear();
281 }
282 
283 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
284     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
285     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
286   assert(ThreadIDVar->getType()->isPointerType() &&
287          "thread id variable must be of type kmp_int32 *");
288   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
289   CodeGenFunction CGF(CGM, true);
290   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind);
291   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
292   return CGF.GenerateCapturedStmtFunction(*CS);
293 }
294 
295 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
296     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
297     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
298   assert(!ThreadIDVar->getType()->isPointerType() &&
299          "thread id variable must be of type kmp_int32 for tasks");
300   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
301   CodeGenFunction CGF(CGM, true);
302   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
303                                         InnermostKind);
304   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
305   return CGF.GenerateCapturedStmtFunction(*CS);
306 }
307 
308 llvm::Value *
309 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
310   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
311   if (!Entry) {
312     if (!DefaultOpenMPPSource) {
313       // Initialize default location for psource field of ident_t structure of
314       // all ident_t objects. Format is ";file;function;line;column;;".
315       // Taken from
316       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
317       DefaultOpenMPPSource =
318           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
319       DefaultOpenMPPSource =
320           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
321     }
322     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
323         CGM.getModule(), IdentTy, /*isConstant*/ true,
324         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
325     DefaultOpenMPLocation->setUnnamedAddr(true);
326 
327     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
328     llvm::Constant *Values[] = {Zero,
329                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
330                                 Zero, Zero, DefaultOpenMPPSource};
331     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
332     DefaultOpenMPLocation->setInitializer(Init);
333     OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
334     return DefaultOpenMPLocation;
335   }
336   return Entry;
337 }
338 
339 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
340                                                  SourceLocation Loc,
341                                                  OpenMPLocationFlags Flags) {
342   // If no debug info is generated - return global default location.
343   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
344       Loc.isInvalid())
345     return getOrCreateDefaultLocation(Flags);
346 
347   assert(CGF.CurFn && "No function in current CodeGenFunction.");
348 
349   llvm::Value *LocValue = nullptr;
350   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
351   if (I != OpenMPLocThreadIDMap.end())
352     LocValue = I->second.DebugLoc;
353   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
354   // GetOpenMPThreadID was called before this routine.
355   if (LocValue == nullptr) {
356     // Generate "ident_t .kmpc_loc.addr;"
357     llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
358     AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
359     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
360     Elem.second.DebugLoc = AI;
361     LocValue = AI;
362 
363     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
364     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
365     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
366                              llvm::ConstantExpr::getSizeOf(IdentTy),
367                              CGM.PointerAlignInBytes);
368   }
369 
370   // char **psource = &.kmpc_loc_<flags>.addr.psource;
371   auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0,
372                                                          IdentField_PSource);
373 
374   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
375   if (OMPDebugLoc == nullptr) {
376     SmallString<128> Buffer2;
377     llvm::raw_svector_ostream OS2(Buffer2);
378     // Build debug location
379     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
380     OS2 << ";" << PLoc.getFilename() << ";";
381     if (const FunctionDecl *FD =
382             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
383       OS2 << FD->getQualifiedNameAsString();
384     }
385     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
386     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
387     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
388   }
389   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
390   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
391 
392   return LocValue;
393 }
394 
395 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
396                                           SourceLocation Loc) {
397   assert(CGF.CurFn && "No function in current CodeGenFunction.");
398 
399   llvm::Value *ThreadID = nullptr;
400   // Check whether we've already cached a load of the thread id in this
401   // function.
402   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
403   if (I != OpenMPLocThreadIDMap.end()) {
404     ThreadID = I->second.ThreadID;
405     if (ThreadID != nullptr)
406       return ThreadID;
407   }
408   if (auto OMPRegionInfo =
409           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
410     if (OMPRegionInfo->getThreadIDVariable()) {
411       // Check if this an outlined function with thread id passed as argument.
412       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
413       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
414       // If value loaded in entry block, cache it and use it everywhere in
415       // function.
416       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
417         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
418         Elem.second.ThreadID = ThreadID;
419       }
420       return ThreadID;
421     }
422   }
423 
424   // This is not an outlined function region - need to call __kmpc_int32
425   // kmpc_global_thread_num(ident_t *loc).
426   // Generate thread id value and cache this value for use across the
427   // function.
428   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
429   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
430   ThreadID =
431       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
432                           emitUpdateLocation(CGF, Loc));
433   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
434   Elem.second.ThreadID = ThreadID;
435   return ThreadID;
436 }
437 
438 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
439   assert(CGF.CurFn && "No function in current CodeGenFunction.");
440   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
441     OpenMPLocThreadIDMap.erase(CGF.CurFn);
442 }
443 
444 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
445   return llvm::PointerType::getUnqual(IdentTy);
446 }
447 
448 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
449   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
450 }
451 
452 llvm::Constant *
453 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
454   llvm::Constant *RTLFn = nullptr;
455   switch (Function) {
456   case OMPRTL__kmpc_fork_call: {
457     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
458     // microtask, ...);
459     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
460                                 getKmpc_MicroPointerTy()};
461     llvm::FunctionType *FnTy =
462         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
463     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
464     break;
465   }
466   case OMPRTL__kmpc_global_thread_num: {
467     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
468     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
469     llvm::FunctionType *FnTy =
470         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
471     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
472     break;
473   }
474   case OMPRTL__kmpc_threadprivate_cached: {
475     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
476     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
477     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
478                                 CGM.VoidPtrTy, CGM.SizeTy,
479                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
480     llvm::FunctionType *FnTy =
481         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
482     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
483     break;
484   }
485   case OMPRTL__kmpc_critical: {
486     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
487     // kmp_critical_name *crit);
488     llvm::Type *TypeParams[] = {
489         getIdentTyPointerTy(), CGM.Int32Ty,
490         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
491     llvm::FunctionType *FnTy =
492         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
493     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
494     break;
495   }
496   case OMPRTL__kmpc_threadprivate_register: {
497     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
498     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
499     // typedef void *(*kmpc_ctor)(void *);
500     auto KmpcCtorTy =
501         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
502                                 /*isVarArg*/ false)->getPointerTo();
503     // typedef void *(*kmpc_cctor)(void *, void *);
504     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
505     auto KmpcCopyCtorTy =
506         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
507                                 /*isVarArg*/ false)->getPointerTo();
508     // typedef void (*kmpc_dtor)(void *);
509     auto KmpcDtorTy =
510         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
511             ->getPointerTo();
512     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
513                               KmpcCopyCtorTy, KmpcDtorTy};
514     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
515                                         /*isVarArg*/ false);
516     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
517     break;
518   }
519   case OMPRTL__kmpc_end_critical: {
520     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
521     // kmp_critical_name *crit);
522     llvm::Type *TypeParams[] = {
523         getIdentTyPointerTy(), CGM.Int32Ty,
524         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
525     llvm::FunctionType *FnTy =
526         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
527     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
528     break;
529   }
530   case OMPRTL__kmpc_cancel_barrier: {
531     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
532     // global_tid);
533     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
534     llvm::FunctionType *FnTy =
535         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
536     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
537     break;
538   }
539   case OMPRTL__kmpc_barrier: {
540     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
541     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
542     llvm::FunctionType *FnTy =
543         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
544     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
545     break;
546   }
547   case OMPRTL__kmpc_for_static_fini: {
548     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
549     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
550     llvm::FunctionType *FnTy =
551         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
552     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
553     break;
554   }
555   case OMPRTL__kmpc_push_num_threads: {
556     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
557     // kmp_int32 num_threads)
558     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
559                                 CGM.Int32Ty};
560     llvm::FunctionType *FnTy =
561         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
562     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
563     break;
564   }
565   case OMPRTL__kmpc_serialized_parallel: {
566     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
567     // global_tid);
568     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
569     llvm::FunctionType *FnTy =
570         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
571     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
572     break;
573   }
574   case OMPRTL__kmpc_end_serialized_parallel: {
575     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
576     // global_tid);
577     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
578     llvm::FunctionType *FnTy =
579         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
580     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
581     break;
582   }
583   case OMPRTL__kmpc_flush: {
584     // Build void __kmpc_flush(ident_t *loc);
585     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
586     llvm::FunctionType *FnTy =
587         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
588     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
589     break;
590   }
591   case OMPRTL__kmpc_master: {
592     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
593     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
594     llvm::FunctionType *FnTy =
595         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
596     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
597     break;
598   }
599   case OMPRTL__kmpc_end_master: {
600     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
601     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
602     llvm::FunctionType *FnTy =
603         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
604     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
605     break;
606   }
607   case OMPRTL__kmpc_omp_taskyield: {
608     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
609     // int end_part);
610     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
611     llvm::FunctionType *FnTy =
612         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
613     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
614     break;
615   }
616   case OMPRTL__kmpc_single: {
617     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
618     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
619     llvm::FunctionType *FnTy =
620         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
621     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
622     break;
623   }
624   case OMPRTL__kmpc_end_single: {
625     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
626     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
627     llvm::FunctionType *FnTy =
628         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
629     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
630     break;
631   }
632   case OMPRTL__kmpc_omp_task_alloc: {
633     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
634     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
635     // kmp_routine_entry_t *task_entry);
636     assert(KmpRoutineEntryPtrTy != nullptr &&
637            "Type kmp_routine_entry_t must be created.");
638     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
639                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
640     // Return void * and then cast to particular kmp_task_t type.
641     llvm::FunctionType *FnTy =
642         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
643     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
644     break;
645   }
646   case OMPRTL__kmpc_omp_task: {
647     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
648     // *new_task);
649     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
650                                 CGM.VoidPtrTy};
651     llvm::FunctionType *FnTy =
652         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
653     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
654     break;
655   }
656   case OMPRTL__kmpc_copyprivate: {
657     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
658     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
659     // kmp_int32 didit);
660     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
661     auto *CpyFnTy =
662         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
663     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
664                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
665                                 CGM.Int32Ty};
666     llvm::FunctionType *FnTy =
667         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
668     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
669     break;
670   }
671   case OMPRTL__kmpc_reduce: {
672     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
673     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
674     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
675     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
676     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
677                                                /*isVarArg=*/false);
678     llvm::Type *TypeParams[] = {
679         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
680         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
681         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
682     llvm::FunctionType *FnTy =
683         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
684     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
685     break;
686   }
687   case OMPRTL__kmpc_reduce_nowait: {
688     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
689     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
690     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
691     // *lck);
692     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
693     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
694                                                /*isVarArg=*/false);
695     llvm::Type *TypeParams[] = {
696         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
697         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
698         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
699     llvm::FunctionType *FnTy =
700         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
701     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
702     break;
703   }
704   case OMPRTL__kmpc_end_reduce: {
705     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
706     // kmp_critical_name *lck);
707     llvm::Type *TypeParams[] = {
708         getIdentTyPointerTy(), CGM.Int32Ty,
709         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
710     llvm::FunctionType *FnTy =
711         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
712     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
713     break;
714   }
715   case OMPRTL__kmpc_end_reduce_nowait: {
716     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
717     // kmp_critical_name *lck);
718     llvm::Type *TypeParams[] = {
719         getIdentTyPointerTy(), CGM.Int32Ty,
720         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
721     llvm::FunctionType *FnTy =
722         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
723     RTLFn =
724         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
725     break;
726   }
727   case OMPRTL__kmpc_omp_task_begin_if0: {
728     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
729     // *new_task);
730     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
731                                 CGM.VoidPtrTy};
732     llvm::FunctionType *FnTy =
733         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
734     RTLFn =
735         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
736     break;
737   }
738   case OMPRTL__kmpc_omp_task_complete_if0: {
739     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
740     // *new_task);
741     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
742                                 CGM.VoidPtrTy};
743     llvm::FunctionType *FnTy =
744         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
745     RTLFn = CGM.CreateRuntimeFunction(FnTy,
746                                       /*Name=*/"__kmpc_omp_task_complete_if0");
747     break;
748   }
749   case OMPRTL__kmpc_ordered: {
750     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
751     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
752     llvm::FunctionType *FnTy =
753         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
754     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
755     break;
756   }
757   case OMPRTL__kmpc_end_ordered: {
758     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
759     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
760     llvm::FunctionType *FnTy =
761         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
762     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
763     break;
764   }
765   case OMPRTL__kmpc_omp_taskwait: {
766     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
767     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
768     llvm::FunctionType *FnTy =
769         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
770     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
771     break;
772   }
773   case OMPRTL__kmpc_taskgroup: {
774     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
775     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
776     llvm::FunctionType *FnTy =
777         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
778     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
779     break;
780   }
781   case OMPRTL__kmpc_end_taskgroup: {
782     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
783     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
784     llvm::FunctionType *FnTy =
785         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
786     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
787     break;
788   }
789   case OMPRTL__kmpc_push_proc_bind: {
790     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
791     // int proc_bind)
792     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
793     llvm::FunctionType *FnTy =
794         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
795     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
796     break;
797   }
798   case OMPRTL__kmpc_omp_task_with_deps: {
799     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
800     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
801     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
802     llvm::Type *TypeParams[] = {
803         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
804         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
805     llvm::FunctionType *FnTy =
806         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
807     RTLFn =
808         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
809     break;
810   }
811   case OMPRTL__kmpc_omp_wait_deps: {
812     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
813     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
814     // kmp_depend_info_t *noalias_dep_list);
815     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
816                                 CGM.Int32Ty,           CGM.VoidPtrTy,
817                                 CGM.Int32Ty,           CGM.VoidPtrTy};
818     llvm::FunctionType *FnTy =
819         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
820     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
821     break;
822   }
823   case OMPRTL__kmpc_cancellationpoint: {
824     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
825     // global_tid, kmp_int32 cncl_kind)
826     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
827     llvm::FunctionType *FnTy =
828         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
829     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
830     break;
831   }
832   case OMPRTL__kmpc_cancel: {
833     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
834     // kmp_int32 cncl_kind)
835     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
836     llvm::FunctionType *FnTy =
837         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
838     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
839     break;
840   }
841   }
842   return RTLFn;
843 }
844 
845 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
846                                                              bool IVSigned) {
847   assert((IVSize == 32 || IVSize == 64) &&
848          "IV size is not compatible with the omp runtime");
849   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
850                                        : "__kmpc_for_static_init_4u")
851                            : (IVSigned ? "__kmpc_for_static_init_8"
852                                        : "__kmpc_for_static_init_8u");
853   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
854   auto PtrTy = llvm::PointerType::getUnqual(ITy);
855   llvm::Type *TypeParams[] = {
856     getIdentTyPointerTy(),                     // loc
857     CGM.Int32Ty,                               // tid
858     CGM.Int32Ty,                               // schedtype
859     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
860     PtrTy,                                     // p_lower
861     PtrTy,                                     // p_upper
862     PtrTy,                                     // p_stride
863     ITy,                                       // incr
864     ITy                                        // chunk
865   };
866   llvm::FunctionType *FnTy =
867       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
868   return CGM.CreateRuntimeFunction(FnTy, Name);
869 }
870 
871 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
872                                                             bool IVSigned) {
873   assert((IVSize == 32 || IVSize == 64) &&
874          "IV size is not compatible with the omp runtime");
875   auto Name =
876       IVSize == 32
877           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
878           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
879   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
880   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
881                                CGM.Int32Ty,           // tid
882                                CGM.Int32Ty,           // schedtype
883                                ITy,                   // lower
884                                ITy,                   // upper
885                                ITy,                   // stride
886                                ITy                    // chunk
887   };
888   llvm::FunctionType *FnTy =
889       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
890   return CGM.CreateRuntimeFunction(FnTy, Name);
891 }
892 
893 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
894                                                             bool IVSigned) {
895   assert((IVSize == 32 || IVSize == 64) &&
896          "IV size is not compatible with the omp runtime");
897   auto Name =
898       IVSize == 32
899           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
900           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
901   llvm::Type *TypeParams[] = {
902       getIdentTyPointerTy(), // loc
903       CGM.Int32Ty,           // tid
904   };
905   llvm::FunctionType *FnTy =
906       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
907   return CGM.CreateRuntimeFunction(FnTy, Name);
908 }
909 
910 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
911                                                             bool IVSigned) {
912   assert((IVSize == 32 || IVSize == 64) &&
913          "IV size is not compatible with the omp runtime");
914   auto Name =
915       IVSize == 32
916           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
917           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
918   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
919   auto PtrTy = llvm::PointerType::getUnqual(ITy);
920   llvm::Type *TypeParams[] = {
921     getIdentTyPointerTy(),                     // loc
922     CGM.Int32Ty,                               // tid
923     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
924     PtrTy,                                     // p_lower
925     PtrTy,                                     // p_upper
926     PtrTy                                      // p_stride
927   };
928   llvm::FunctionType *FnTy =
929       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
930   return CGM.CreateRuntimeFunction(FnTy, Name);
931 }
932 
933 llvm::Constant *
934 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
935   assert(!CGM.getLangOpts().OpenMPUseTLS ||
936          !CGM.getContext().getTargetInfo().isTLSSupported());
937   // Lookup the entry, lazily creating it if necessary.
938   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
939                                      Twine(CGM.getMangledName(VD)) + ".cache.");
940 }
941 
942 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
943                                                      const VarDecl *VD,
944                                                      llvm::Value *VDAddr,
945                                                      SourceLocation Loc) {
946   if (CGM.getLangOpts().OpenMPUseTLS &&
947       CGM.getContext().getTargetInfo().isTLSSupported())
948     return VDAddr;
949 
950   auto VarTy = VDAddr->getType()->getPointerElementType();
951   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
952                          CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
953                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
954                          getOrCreateThreadPrivateCache(VD)};
955   return CGF.EmitRuntimeCall(
956       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
957 }
958 
959 void CGOpenMPRuntime::emitThreadPrivateVarInit(
960     CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
961     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
962   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
963   // library.
964   auto OMPLoc = emitUpdateLocation(CGF, Loc);
965   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
966                       OMPLoc);
967   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
968   // to register constructor/destructor for variable.
969   llvm::Value *Args[] = {OMPLoc,
970                          CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
971                          Ctor, CopyCtor, Dtor};
972   CGF.EmitRuntimeCall(
973       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
974 }
975 
976 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
977     const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
978     bool PerformInit, CodeGenFunction *CGF) {
979   if (CGM.getLangOpts().OpenMPUseTLS &&
980       CGM.getContext().getTargetInfo().isTLSSupported())
981     return nullptr;
982 
983   VD = VD->getDefinition(CGM.getContext());
984   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
985     ThreadPrivateWithDefinition.insert(VD);
986     QualType ASTTy = VD->getType();
987 
988     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
989     auto Init = VD->getAnyInitializer();
990     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
991       // Generate function that re-emits the declaration's initializer into the
992       // threadprivate copy of the variable VD
993       CodeGenFunction CtorCGF(CGM);
994       FunctionArgList Args;
995       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
996                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
997       Args.push_back(&Dst);
998 
999       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1000           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
1001           /*isVariadic=*/false);
1002       auto FTy = CGM.getTypes().GetFunctionType(FI);
1003       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1004           FTy, ".__kmpc_global_ctor_.", Loc);
1005       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1006                             Args, SourceLocation());
1007       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1008           CtorCGF.GetAddrOfLocalVar(&Dst),
1009           /*Volatile=*/false, CGM.PointerAlignInBytes,
1010           CGM.getContext().VoidPtrTy, Dst.getLocation());
1011       auto Arg = CtorCGF.Builder.CreatePointerCast(
1012           ArgVal,
1013           CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
1014       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1015                                /*IsInitializer=*/true);
1016       ArgVal = CtorCGF.EmitLoadOfScalar(
1017           CtorCGF.GetAddrOfLocalVar(&Dst),
1018           /*Volatile=*/false, CGM.PointerAlignInBytes,
1019           CGM.getContext().VoidPtrTy, Dst.getLocation());
1020       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1021       CtorCGF.FinishFunction();
1022       Ctor = Fn;
1023     }
1024     if (VD->getType().isDestructedType() != QualType::DK_none) {
1025       // Generate function that emits destructor call for the threadprivate copy
1026       // of the variable VD
1027       CodeGenFunction DtorCGF(CGM);
1028       FunctionArgList Args;
1029       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1030                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1031       Args.push_back(&Dst);
1032 
1033       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1034           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
1035           /*isVariadic=*/false);
1036       auto FTy = CGM.getTypes().GetFunctionType(FI);
1037       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1038           FTy, ".__kmpc_global_dtor_.", Loc);
1039       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1040                             SourceLocation());
1041       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1042           DtorCGF.GetAddrOfLocalVar(&Dst),
1043           /*Volatile=*/false, CGM.PointerAlignInBytes,
1044           CGM.getContext().VoidPtrTy, Dst.getLocation());
1045       DtorCGF.emitDestroy(ArgVal, ASTTy,
1046                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1047                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1048       DtorCGF.FinishFunction();
1049       Dtor = Fn;
1050     }
1051     // Do not emit init function if it is not required.
1052     if (!Ctor && !Dtor)
1053       return nullptr;
1054 
1055     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1056     auto CopyCtorTy =
1057         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1058                                 /*isVarArg=*/false)->getPointerTo();
1059     // Copying constructor for the threadprivate variable.
1060     // Must be NULL - reserved by runtime, but currently it requires that this
1061     // parameter is always NULL. Otherwise it fires assertion.
1062     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1063     if (Ctor == nullptr) {
1064       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1065                                             /*isVarArg=*/false)->getPointerTo();
1066       Ctor = llvm::Constant::getNullValue(CtorTy);
1067     }
1068     if (Dtor == nullptr) {
1069       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1070                                             /*isVarArg=*/false)->getPointerTo();
1071       Dtor = llvm::Constant::getNullValue(DtorTy);
1072     }
1073     if (!CGF) {
1074       auto InitFunctionTy =
1075           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1076       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1077           InitFunctionTy, ".__omp_threadprivate_init_.");
1078       CodeGenFunction InitCGF(CGM);
1079       FunctionArgList ArgList;
1080       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1081                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1082                             Loc);
1083       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1084       InitCGF.FinishFunction();
1085       return InitFunction;
1086     }
1087     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1088   }
1089   return nullptr;
1090 }
1091 
1092 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1093 /// function. Here is the logic:
1094 /// if (Cond) {
1095 ///   ThenGen();
1096 /// } else {
1097 ///   ElseGen();
1098 /// }
1099 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1100                             const RegionCodeGenTy &ThenGen,
1101                             const RegionCodeGenTy &ElseGen) {
1102   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1103 
1104   // If the condition constant folds and can be elided, try to avoid emitting
1105   // the condition and the dead arm of the if/else.
1106   bool CondConstant;
1107   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1108     CodeGenFunction::RunCleanupsScope Scope(CGF);
1109     if (CondConstant) {
1110       ThenGen(CGF);
1111     } else {
1112       ElseGen(CGF);
1113     }
1114     return;
1115   }
1116 
1117   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1118   // emit the conditional branch.
1119   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1120   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1121   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1122   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1123 
1124   // Emit the 'then' code.
1125   CGF.EmitBlock(ThenBlock);
1126   {
1127     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1128     ThenGen(CGF);
1129   }
1130   CGF.EmitBranch(ContBlock);
1131   // Emit the 'else' code if present.
1132   {
1133     // There is no need to emit line number for unconditional branch.
1134     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1135     CGF.EmitBlock(ElseBlock);
1136   }
1137   {
1138     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1139     ElseGen(CGF);
1140   }
1141   {
1142     // There is no need to emit line number for unconditional branch.
1143     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1144     CGF.EmitBranch(ContBlock);
1145   }
1146   // Emit the continuation block for code after the if.
1147   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1148 }
1149 
1150 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1151                                        llvm::Value *OutlinedFn,
1152                                        llvm::Value *CapturedStruct,
1153                                        const Expr *IfCond) {
1154   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1155   auto &&ThenGen =
1156       [this, OutlinedFn, CapturedStruct, RTLoc](CodeGenFunction &CGF) {
1157         // Build call __kmpc_fork_call(loc, 1, microtask,
1158         // captured_struct/*context*/)
1159         llvm::Value *Args[] = {
1160             RTLoc,
1161             CGF.Builder.getInt32(
1162                 1), // Number of arguments after 'microtask' argument
1163             // (there is only one additional argument - 'context')
1164             CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
1165             CGF.EmitCastToVoidPtr(CapturedStruct)};
1166         auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
1167         CGF.EmitRuntimeCall(RTLFn, Args);
1168       };
1169   auto &&ElseGen = [this, OutlinedFn, CapturedStruct, RTLoc, Loc](
1170       CodeGenFunction &CGF) {
1171     auto ThreadID = getThreadID(CGF, Loc);
1172     // Build calls:
1173     // __kmpc_serialized_parallel(&Loc, GTid);
1174     llvm::Value *Args[] = {RTLoc, ThreadID};
1175     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
1176                         Args);
1177 
1178     // OutlinedFn(&GTid, &zero, CapturedStruct);
1179     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
1180     auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32,
1181                                                           /*Signed*/ true);
1182     auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
1183     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1184     llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
1185     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1186 
1187     // __kmpc_end_serialized_parallel(&Loc, GTid);
1188     llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
1189     CGF.EmitRuntimeCall(
1190         createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
1191   };
1192   if (IfCond) {
1193     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1194   } else {
1195     CodeGenFunction::RunCleanupsScope Scope(CGF);
1196     ThenGen(CGF);
1197   }
1198 }
1199 
1200 // If we're inside an (outlined) parallel region, use the region info's
1201 // thread-ID variable (it is passed in a first argument of the outlined function
1202 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1203 // regular serial code region, get thread ID by calling kmp_int32
1204 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1205 // return the address of that temp.
1206 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1207                                                   SourceLocation Loc) {
1208   if (auto OMPRegionInfo =
1209           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1210     if (OMPRegionInfo->getThreadIDVariable())
1211       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1212 
1213   auto ThreadID = getThreadID(CGF, Loc);
1214   auto Int32Ty =
1215       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1216   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1217   CGF.EmitStoreOfScalar(ThreadID,
1218                         CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
1219 
1220   return ThreadIDTemp;
1221 }
1222 
1223 llvm::Constant *
1224 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1225                                              const llvm::Twine &Name) {
1226   SmallString<256> Buffer;
1227   llvm::raw_svector_ostream Out(Buffer);
1228   Out << Name;
1229   auto RuntimeName = Out.str();
1230   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1231   if (Elem.second) {
1232     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1233            "OMP internal variable has different type than requested");
1234     return &*Elem.second;
1235   }
1236 
1237   return Elem.second = new llvm::GlobalVariable(
1238              CGM.getModule(), Ty, /*IsConstant*/ false,
1239              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1240              Elem.first());
1241 }
1242 
1243 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1244   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1245   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1246 }
1247 
1248 namespace {
1249 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup {
1250   llvm::Value *Callee;
1251   llvm::Value *Args[N];
1252 
1253 public:
1254   CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
1255       : Callee(Callee) {
1256     assert(CleanupArgs.size() == N);
1257     std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
1258   }
1259   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1260     CGF.EmitRuntimeCall(Callee, Args);
1261   }
1262 };
1263 } // namespace
1264 
1265 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1266                                          StringRef CriticalName,
1267                                          const RegionCodeGenTy &CriticalOpGen,
1268                                          SourceLocation Loc) {
1269   // __kmpc_critical(ident_t *, gtid, Lock);
1270   // CriticalOpGen();
1271   // __kmpc_end_critical(ident_t *, gtid, Lock);
1272   // Prepare arguments and build a call to __kmpc_critical
1273   {
1274     CodeGenFunction::RunCleanupsScope Scope(CGF);
1275     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1276                            getCriticalRegionLock(CriticalName)};
1277     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1278     // Build a call to __kmpc_end_critical
1279     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1280         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1281         llvm::makeArrayRef(Args));
1282     emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
1283   }
1284 }
1285 
1286 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1287                        OpenMPDirectiveKind Kind, SourceLocation Loc,
1288                        const RegionCodeGenTy &BodyOpGen) {
1289   llvm::Value *CallBool = CGF.EmitScalarConversion(
1290       IfCond,
1291       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1292       CGF.getContext().BoolTy, Loc);
1293 
1294   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1295   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1296   // Generate the branch (If-stmt)
1297   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1298   CGF.EmitBlock(ThenBlock);
1299   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
1300   // Emit the rest of bblocks/branches
1301   CGF.EmitBranch(ContBlock);
1302   CGF.EmitBlock(ContBlock, true);
1303 }
1304 
1305 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1306                                        const RegionCodeGenTy &MasterOpGen,
1307                                        SourceLocation Loc) {
1308   // if(__kmpc_master(ident_t *, gtid)) {
1309   //   MasterOpGen();
1310   //   __kmpc_end_master(ident_t *, gtid);
1311   // }
1312   // Prepare arguments and build a call to __kmpc_master
1313   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1314   auto *IsMaster =
1315       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1316   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1317       MasterCallEndCleanup;
1318   emitIfStmt(
1319       CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void {
1320         CodeGenFunction::RunCleanupsScope Scope(CGF);
1321         CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
1322             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1323             llvm::makeArrayRef(Args));
1324         MasterOpGen(CGF);
1325       });
1326 }
1327 
1328 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1329                                         SourceLocation Loc) {
1330   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1331   llvm::Value *Args[] = {
1332       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1333       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1334   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1335 }
1336 
1337 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
1338                                           const RegionCodeGenTy &TaskgroupOpGen,
1339                                           SourceLocation Loc) {
1340   // __kmpc_taskgroup(ident_t *, gtid);
1341   // TaskgroupOpGen();
1342   // __kmpc_end_taskgroup(ident_t *, gtid);
1343   // Prepare arguments and build a call to __kmpc_taskgroup
1344   {
1345     CodeGenFunction::RunCleanupsScope Scope(CGF);
1346     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1347     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
1348     // Build a call to __kmpc_end_taskgroup
1349     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1350         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
1351         llvm::makeArrayRef(Args));
1352     emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
1353   }
1354 }
1355 
1356 static llvm::Value *emitCopyprivateCopyFunction(
1357     CodeGenModule &CGM, llvm::Type *ArgsType,
1358     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1359     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1360   auto &C = CGM.getContext();
1361   // void copy_func(void *LHSArg, void *RHSArg);
1362   FunctionArgList Args;
1363   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1364                            C.VoidPtrTy);
1365   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1366                            C.VoidPtrTy);
1367   Args.push_back(&LHSArg);
1368   Args.push_back(&RHSArg);
1369   FunctionType::ExtInfo EI;
1370   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1371       C.VoidTy, Args, EI, /*isVariadic=*/false);
1372   auto *Fn = llvm::Function::Create(
1373       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1374       ".omp.copyprivate.copy_func", &CGM.getModule());
1375   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
1376   CodeGenFunction CGF(CGM);
1377   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1378   // Dest = (void*[n])(LHSArg);
1379   // Src = (void*[n])(RHSArg);
1380   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1381       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
1382                                     CGF.PointerAlignInBytes),
1383       ArgsType);
1384   auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1385       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
1386                                     CGF.PointerAlignInBytes),
1387       ArgsType);
1388   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1389   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1390   // ...
1391   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1392   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1393     auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1394         CGF.Builder.CreateAlignedLoad(
1395             CGF.Builder.CreateStructGEP(nullptr, LHS, I),
1396             CGM.PointerAlignInBytes),
1397         CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1398     auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1399         CGF.Builder.CreateAlignedLoad(
1400             CGF.Builder.CreateStructGEP(nullptr, RHS, I),
1401             CGM.PointerAlignInBytes),
1402         CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1403     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
1404     QualType Type = VD->getType();
1405     CGF.EmitOMPCopy(CGF, Type, DestAddr, SrcAddr,
1406                     cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()),
1407                     cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
1408                     AssignmentOps[I]);
1409   }
1410   CGF.FinishFunction();
1411   return Fn;
1412 }
1413 
1414 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1415                                        const RegionCodeGenTy &SingleOpGen,
1416                                        SourceLocation Loc,
1417                                        ArrayRef<const Expr *> CopyprivateVars,
1418                                        ArrayRef<const Expr *> SrcExprs,
1419                                        ArrayRef<const Expr *> DstExprs,
1420                                        ArrayRef<const Expr *> AssignmentOps) {
1421   assert(CopyprivateVars.size() == SrcExprs.size() &&
1422          CopyprivateVars.size() == DstExprs.size() &&
1423          CopyprivateVars.size() == AssignmentOps.size());
1424   auto &C = CGM.getContext();
1425   // int32 did_it = 0;
1426   // if(__kmpc_single(ident_t *, gtid)) {
1427   //   SingleOpGen();
1428   //   __kmpc_end_single(ident_t *, gtid);
1429   //   did_it = 1;
1430   // }
1431   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1432   // <copy_func>, did_it);
1433 
1434   llvm::AllocaInst *DidIt = nullptr;
1435   if (!CopyprivateVars.empty()) {
1436     // int32 did_it = 0;
1437     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1438     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1439     CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(0), DidIt,
1440                                    DidIt->getAlignment());
1441   }
1442   // Prepare arguments and build a call to __kmpc_single
1443   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1444   auto *IsSingle =
1445       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1446   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1447       SingleCallEndCleanup;
1448   emitIfStmt(
1449       CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void {
1450         CodeGenFunction::RunCleanupsScope Scope(CGF);
1451         CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
1452             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1453             llvm::makeArrayRef(Args));
1454         SingleOpGen(CGF);
1455         if (DidIt) {
1456           // did_it = 1;
1457           CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
1458                                          DidIt->getAlignment());
1459         }
1460       });
1461   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1462   // <copy_func>, did_it);
1463   if (DidIt) {
1464     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1465     auto CopyprivateArrayTy =
1466         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1467                                /*IndexTypeQuals=*/0);
1468     // Create a list of all private variables for copyprivate.
1469     auto *CopyprivateList =
1470         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1471     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1472       auto *Elem = CGF.Builder.CreateStructGEP(
1473           CopyprivateList->getAllocatedType(), CopyprivateList, I);
1474       CGF.Builder.CreateAlignedStore(
1475           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1476               CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
1477           Elem, CGM.PointerAlignInBytes);
1478     }
1479     // Build function that copies private values from single region to all other
1480     // threads in the corresponding parallel region.
1481     auto *CpyFn = emitCopyprivateCopyFunction(
1482         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1483         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1484     auto *BufSize = llvm::ConstantInt::get(
1485         CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
1486     auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1487                                                                CGF.VoidPtrTy);
1488     auto *DidItVal =
1489         CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes);
1490     llvm::Value *Args[] = {
1491         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1492         getThreadID(CGF, Loc),        // i32 <gtid>
1493         BufSize,                      // size_t <buf_size>
1494         CL,                           // void *<copyprivate list>
1495         CpyFn,                        // void (*) (void *, void *) <copy_func>
1496         DidItVal                      // i32 did_it
1497     };
1498     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1499   }
1500 }
1501 
1502 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
1503                                         const RegionCodeGenTy &OrderedOpGen,
1504                                         SourceLocation Loc) {
1505   // __kmpc_ordered(ident_t *, gtid);
1506   // OrderedOpGen();
1507   // __kmpc_end_ordered(ident_t *, gtid);
1508   // Prepare arguments and build a call to __kmpc_ordered
1509   {
1510     CodeGenFunction::RunCleanupsScope Scope(CGF);
1511     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1512     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
1513     // Build a call to __kmpc_end_ordered
1514     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1515         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
1516         llvm::makeArrayRef(Args));
1517     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
1518   }
1519 }
1520 
1521 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1522                                       OpenMPDirectiveKind Kind,
1523                                       bool CheckForCancel) {
1524   // Build call __kmpc_cancel_barrier(loc, thread_id);
1525   // Build call __kmpc_barrier(loc, thread_id);
1526   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1527   if (Kind == OMPD_for) {
1528     Flags =
1529         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1530   } else if (Kind == OMPD_sections) {
1531     Flags = static_cast<OpenMPLocationFlags>(Flags |
1532                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
1533   } else if (Kind == OMPD_single) {
1534     Flags =
1535         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1536   } else if (Kind == OMPD_barrier) {
1537     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1538   } else {
1539     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1540   }
1541   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
1542   // thread_id);
1543   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1544                          getThreadID(CGF, Loc)};
1545   if (auto *OMPRegionInfo =
1546           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1547     auto CancelDestination =
1548         CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
1549     if (CancelDestination.isValid()) {
1550       auto *Result = CGF.EmitRuntimeCall(
1551           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1552       if (CheckForCancel) {
1553         // if (__kmpc_cancel_barrier()) {
1554         //   exit from construct;
1555         // }
1556         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
1557         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
1558         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
1559         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
1560         CGF.EmitBlock(ExitBB);
1561         //   exit from construct;
1562         CGF.EmitBranchThroughCleanup(CancelDestination);
1563         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
1564       }
1565       return;
1566     }
1567   }
1568   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
1569 }
1570 
1571 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1572 /// the enum sched_type in kmp.h).
1573 enum OpenMPSchedType {
1574   /// \brief Lower bound for default (unordered) versions.
1575   OMP_sch_lower = 32,
1576   OMP_sch_static_chunked = 33,
1577   OMP_sch_static = 34,
1578   OMP_sch_dynamic_chunked = 35,
1579   OMP_sch_guided_chunked = 36,
1580   OMP_sch_runtime = 37,
1581   OMP_sch_auto = 38,
1582   /// \brief Lower bound for 'ordered' versions.
1583   OMP_ord_lower = 64,
1584   OMP_ord_static_chunked = 65,
1585   OMP_ord_static = 66,
1586   OMP_ord_dynamic_chunked = 67,
1587   OMP_ord_guided_chunked = 68,
1588   OMP_ord_runtime = 69,
1589   OMP_ord_auto = 70,
1590   OMP_sch_default = OMP_sch_static,
1591 };
1592 
1593 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
1594 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1595                                           bool Chunked, bool Ordered) {
1596   switch (ScheduleKind) {
1597   case OMPC_SCHEDULE_static:
1598     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
1599                    : (Ordered ? OMP_ord_static : OMP_sch_static);
1600   case OMPC_SCHEDULE_dynamic:
1601     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
1602   case OMPC_SCHEDULE_guided:
1603     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
1604   case OMPC_SCHEDULE_runtime:
1605     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
1606   case OMPC_SCHEDULE_auto:
1607     return Ordered ? OMP_ord_auto : OMP_sch_auto;
1608   case OMPC_SCHEDULE_unknown:
1609     assert(!Chunked && "chunk was specified but schedule kind not known");
1610     return Ordered ? OMP_ord_static : OMP_sch_static;
1611   }
1612   llvm_unreachable("Unexpected runtime schedule");
1613 }
1614 
1615 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1616                                          bool Chunked) const {
1617   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
1618   return Schedule == OMP_sch_static;
1619 }
1620 
1621 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1622   auto Schedule =
1623       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
1624   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1625   return Schedule != OMP_sch_static;
1626 }
1627 
1628 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
1629                                   OpenMPScheduleClauseKind ScheduleKind,
1630                                   unsigned IVSize, bool IVSigned, bool Ordered,
1631                                   llvm::Value *IL, llvm::Value *LB,
1632                                   llvm::Value *UB, llvm::Value *ST,
1633                                   llvm::Value *Chunk) {
1634   OpenMPSchedType Schedule =
1635       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1636   if (Ordered ||
1637       (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
1638        Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)) {
1639     // Call __kmpc_dispatch_init(
1640     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1641     //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1642     //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1643 
1644     // If the Chunk was not specified in the clause - use default value 1.
1645     if (Chunk == nullptr)
1646       Chunk = CGF.Builder.getIntN(IVSize, 1);
1647     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1648                             getThreadID(CGF, Loc),
1649                             CGF.Builder.getInt32(Schedule), // Schedule type
1650                             CGF.Builder.getIntN(IVSize, 0), // Lower
1651                             UB,                             // Upper
1652                             CGF.Builder.getIntN(IVSize, 1), // Stride
1653                             Chunk                           // Chunk
1654     };
1655     CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1656   } else {
1657     // Call __kmpc_for_static_init(
1658     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1659     //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1660     //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1661     //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1662     if (Chunk == nullptr) {
1663       assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
1664              "expected static non-chunked schedule");
1665       // If the Chunk was not specified in the clause - use default value 1.
1666       Chunk = CGF.Builder.getIntN(IVSize, 1);
1667     } else
1668       assert((Schedule == OMP_sch_static_chunked ||
1669               Schedule == OMP_ord_static_chunked) &&
1670              "expected static chunked schedule");
1671     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1672                             getThreadID(CGF, Loc),
1673                             CGF.Builder.getInt32(Schedule), // Schedule type
1674                             IL,                             // &isLastIter
1675                             LB,                             // &LB
1676                             UB,                             // &UB
1677                             ST,                             // &Stride
1678                             CGF.Builder.getIntN(IVSize, 1), // Incr
1679                             Chunk                           // Chunk
1680     };
1681     CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1682   }
1683 }
1684 
1685 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
1686                                           SourceLocation Loc) {
1687   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1688   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1689                          getThreadID(CGF, Loc)};
1690   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1691                       Args);
1692 }
1693 
1694 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
1695                                                  SourceLocation Loc,
1696                                                  unsigned IVSize,
1697                                                  bool IVSigned) {
1698   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
1699   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1700                          getThreadID(CGF, Loc)};
1701   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
1702 }
1703 
1704 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1705                                           SourceLocation Loc, unsigned IVSize,
1706                                           bool IVSigned, llvm::Value *IL,
1707                                           llvm::Value *LB, llvm::Value *UB,
1708                                           llvm::Value *ST) {
1709   // Call __kmpc_dispatch_next(
1710   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1711   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1712   //          kmp_int[32|64] *p_stride);
1713   llvm::Value *Args[] = {
1714       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1715       IL, // &isLastIter
1716       LB, // &Lower
1717       UB, // &Upper
1718       ST  // &Stride
1719   };
1720   llvm::Value *Call =
1721       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1722   return CGF.EmitScalarConversion(
1723       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1724       CGF.getContext().BoolTy, Loc);
1725 }
1726 
1727 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1728                                            llvm::Value *NumThreads,
1729                                            SourceLocation Loc) {
1730   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1731   llvm::Value *Args[] = {
1732       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1733       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1734   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1735                       Args);
1736 }
1737 
1738 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
1739                                          OpenMPProcBindClauseKind ProcBind,
1740                                          SourceLocation Loc) {
1741   // Constants for proc bind value accepted by the runtime.
1742   enum ProcBindTy {
1743     ProcBindFalse = 0,
1744     ProcBindTrue,
1745     ProcBindMaster,
1746     ProcBindClose,
1747     ProcBindSpread,
1748     ProcBindIntel,
1749     ProcBindDefault
1750   } RuntimeProcBind;
1751   switch (ProcBind) {
1752   case OMPC_PROC_BIND_master:
1753     RuntimeProcBind = ProcBindMaster;
1754     break;
1755   case OMPC_PROC_BIND_close:
1756     RuntimeProcBind = ProcBindClose;
1757     break;
1758   case OMPC_PROC_BIND_spread:
1759     RuntimeProcBind = ProcBindSpread;
1760     break;
1761   case OMPC_PROC_BIND_unknown:
1762     llvm_unreachable("Unsupported proc_bind value.");
1763   }
1764   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
1765   llvm::Value *Args[] = {
1766       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1767       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
1768   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
1769 }
1770 
1771 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1772                                 SourceLocation Loc) {
1773   // Build call void __kmpc_flush(ident_t *loc)
1774   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1775                       emitUpdateLocation(CGF, Loc));
1776 }
1777 
1778 namespace {
1779 /// \brief Indexes of fields for type kmp_task_t.
1780 enum KmpTaskTFields {
1781   /// \brief List of shared variables.
1782   KmpTaskTShareds,
1783   /// \brief Task routine.
1784   KmpTaskTRoutine,
1785   /// \brief Partition id for the untied tasks.
1786   KmpTaskTPartId,
1787   /// \brief Function with call of destructors for private variables.
1788   KmpTaskTDestructors,
1789 };
1790 } // namespace
1791 
1792 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
1793   if (!KmpRoutineEntryPtrTy) {
1794     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
1795     auto &C = CGM.getContext();
1796     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
1797     FunctionProtoType::ExtProtoInfo EPI;
1798     KmpRoutineEntryPtrQTy = C.getPointerType(
1799         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
1800     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
1801   }
1802 }
1803 
1804 static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1805                                  QualType FieldTy) {
1806   auto *Field = FieldDecl::Create(
1807       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1808       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1809       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1810   Field->setAccess(AS_public);
1811   DC->addDecl(Field);
1812 }
1813 
1814 namespace {
1815 struct PrivateHelpersTy {
1816   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
1817                    const VarDecl *PrivateElemInit)
1818       : Original(Original), PrivateCopy(PrivateCopy),
1819         PrivateElemInit(PrivateElemInit) {}
1820   const VarDecl *Original;
1821   const VarDecl *PrivateCopy;
1822   const VarDecl *PrivateElemInit;
1823 };
1824 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
1825 } // namespace
1826 
1827 static RecordDecl *
1828 createPrivatesRecordDecl(CodeGenModule &CGM,
1829                          const ArrayRef<PrivateDataTy> Privates) {
1830   if (!Privates.empty()) {
1831     auto &C = CGM.getContext();
1832     // Build struct .kmp_privates_t. {
1833     //         /*  private vars  */
1834     //       };
1835     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
1836     RD->startDefinition();
1837     for (auto &&Pair : Privates) {
1838       auto Type = Pair.second.Original->getType();
1839       Type = Type.getNonReferenceType();
1840       addFieldToRecordDecl(C, RD, Type);
1841     }
1842     RD->completeDefinition();
1843     return RD;
1844   }
1845   return nullptr;
1846 }
1847 
1848 static RecordDecl *
1849 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
1850                          QualType KmpRoutineEntryPointerQTy) {
1851   auto &C = CGM.getContext();
1852   // Build struct kmp_task_t {
1853   //         void *              shareds;
1854   //         kmp_routine_entry_t routine;
1855   //         kmp_int32           part_id;
1856   //         kmp_routine_entry_t destructors;
1857   //       };
1858   auto *RD = C.buildImplicitRecord("kmp_task_t");
1859   RD->startDefinition();
1860   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1861   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1862   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1863   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1864   RD->completeDefinition();
1865   return RD;
1866 }
1867 
1868 static RecordDecl *
1869 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
1870                                      const ArrayRef<PrivateDataTy> Privates) {
1871   auto &C = CGM.getContext();
1872   // Build struct kmp_task_t_with_privates {
1873   //         kmp_task_t task_data;
1874   //         .kmp_privates_t. privates;
1875   //       };
1876   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
1877   RD->startDefinition();
1878   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
1879   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
1880     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
1881   }
1882   RD->completeDefinition();
1883   return RD;
1884 }
1885 
1886 /// \brief Emit a proxy function which accepts kmp_task_t as the second
1887 /// argument.
1888 /// \code
1889 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
1890 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
1891 ///   tt->shareds);
1892 ///   return 0;
1893 /// }
1894 /// \endcode
1895 static llvm::Value *
1896 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
1897                       QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
1898                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
1899                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
1900                       llvm::Value *TaskPrivatesMap) {
1901   auto &C = CGM.getContext();
1902   FunctionArgList Args;
1903   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1904   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1905                                 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
1906   Args.push_back(&GtidArg);
1907   Args.push_back(&TaskTypeArg);
1908   FunctionType::ExtInfo Info;
1909   auto &TaskEntryFnInfo =
1910       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1911                                                     /*isVariadic=*/false);
1912   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
1913   auto *TaskEntry =
1914       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
1915                              ".omp_task_entry.", &CGM.getModule());
1916   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
1917   CodeGenFunction CGF(CGM);
1918   CGF.disableDebugInfo();
1919   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
1920 
1921   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
1922   // tt->task_data.shareds);
1923   auto *GtidParam = CGF.EmitLoadOfScalar(
1924       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
1925       C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
1926   auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
1927       CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
1928   LValue TDBase =
1929       CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
1930   auto *KmpTaskTWithPrivatesQTyRD =
1931       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
1932   LValue Base =
1933       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
1934   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
1935   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
1936   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
1937   auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
1938 
1939   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
1940   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
1941   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1942       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
1943       CGF.ConvertTypeForMem(SharedsPtrTy));
1944 
1945   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
1946   llvm::Value *PrivatesParam;
1947   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
1948     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
1949     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1950         PrivatesLVal.getAddress(), CGF.VoidPtrTy);
1951   } else {
1952     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
1953   }
1954 
1955   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
1956                              TaskPrivatesMap, SharedsParam};
1957   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
1958   CGF.EmitStoreThroughLValue(
1959       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
1960       CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
1961   CGF.FinishFunction();
1962   return TaskEntry;
1963 }
1964 
1965 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
1966                                             SourceLocation Loc,
1967                                             QualType KmpInt32Ty,
1968                                             QualType KmpTaskTWithPrivatesPtrQTy,
1969                                             QualType KmpTaskTWithPrivatesQTy) {
1970   auto &C = CGM.getContext();
1971   FunctionArgList Args;
1972   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1973   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1974                                 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
1975   Args.push_back(&GtidArg);
1976   Args.push_back(&TaskTypeArg);
1977   FunctionType::ExtInfo Info;
1978   auto &DestructorFnInfo =
1979       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1980                                                     /*isVariadic=*/false);
1981   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
1982   auto *DestructorFn =
1983       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
1984                              ".omp_task_destructor.", &CGM.getModule());
1985   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, DestructorFnInfo, DestructorFn);
1986   CodeGenFunction CGF(CGM);
1987   CGF.disableDebugInfo();
1988   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
1989                     Args);
1990 
1991   auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
1992       CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
1993   LValue Base =
1994       CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
1995   auto *KmpTaskTWithPrivatesQTyRD =
1996       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
1997   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
1998   Base = CGF.EmitLValueForField(Base, *FI);
1999   for (auto *Field :
2000        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
2001     if (auto DtorKind = Field->getType().isDestructedType()) {
2002       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
2003       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
2004     }
2005   }
2006   CGF.FinishFunction();
2007   return DestructorFn;
2008 }
2009 
2010 /// \brief Emit a privates mapping function for correct handling of private and
2011 /// firstprivate variables.
2012 /// \code
2013 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
2014 /// **noalias priv1,...,  <tyn> **noalias privn) {
2015 ///   *priv1 = &.privates.priv1;
2016 ///   ...;
2017 ///   *privn = &.privates.privn;
2018 /// }
2019 /// \endcode
2020 static llvm::Value *
2021 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
2022                                const ArrayRef<const Expr *> PrivateVars,
2023                                const ArrayRef<const Expr *> FirstprivateVars,
2024                                QualType PrivatesQTy,
2025                                const ArrayRef<PrivateDataTy> Privates) {
2026   auto &C = CGM.getContext();
2027   FunctionArgList Args;
2028   ImplicitParamDecl TaskPrivatesArg(
2029       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2030       C.getPointerType(PrivatesQTy).withConst().withRestrict());
2031   Args.push_back(&TaskPrivatesArg);
2032   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
2033   unsigned Counter = 1;
2034   for (auto *E: PrivateVars) {
2035     Args.push_back(ImplicitParamDecl::Create(
2036         C, /*DC=*/nullptr, Loc,
2037         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2038                             .withConst()
2039                             .withRestrict()));
2040     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2041     PrivateVarsPos[VD] = Counter;
2042     ++Counter;
2043   }
2044   for (auto *E : FirstprivateVars) {
2045     Args.push_back(ImplicitParamDecl::Create(
2046         C, /*DC=*/nullptr, Loc,
2047         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2048                             .withConst()
2049                             .withRestrict()));
2050     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2051     PrivateVarsPos[VD] = Counter;
2052     ++Counter;
2053   }
2054   FunctionType::ExtInfo Info;
2055   auto &TaskPrivatesMapFnInfo =
2056       CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
2057                                                     /*isVariadic=*/false);
2058   auto *TaskPrivatesMapTy =
2059       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
2060   auto *TaskPrivatesMap = llvm::Function::Create(
2061       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
2062       ".omp_task_privates_map.", &CGM.getModule());
2063   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskPrivatesMapFnInfo,
2064                                 TaskPrivatesMap);
2065   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
2066   CodeGenFunction CGF(CGM);
2067   CGF.disableDebugInfo();
2068   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
2069                     TaskPrivatesMapFnInfo, Args);
2070 
2071   // *privi = &.privates.privi;
2072   auto *TaskPrivatesArgAddr = CGF.Builder.CreateAlignedLoad(
2073       CGF.GetAddrOfLocalVar(&TaskPrivatesArg), CGM.PointerAlignInBytes);
2074   LValue Base =
2075       CGF.MakeNaturalAlignAddrLValue(TaskPrivatesArgAddr, PrivatesQTy);
2076   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
2077   Counter = 0;
2078   for (auto *Field : PrivatesQTyRD->fields()) {
2079     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
2080     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
2081     auto RefLVal = CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(VD),
2082                                                   VD->getType());
2083     auto RefLoadRVal = CGF.EmitLoadOfLValue(RefLVal, Loc);
2084     CGF.EmitStoreOfScalar(
2085         FieldLVal.getAddress(),
2086         CGF.MakeNaturalAlignAddrLValue(RefLoadRVal.getScalarVal(),
2087                                        RefLVal.getType()->getPointeeType()));
2088     ++Counter;
2089   }
2090   CGF.FinishFunction();
2091   return TaskPrivatesMap;
2092 }
2093 
2094 static int array_pod_sort_comparator(const PrivateDataTy *P1,
2095                                      const PrivateDataTy *P2) {
2096   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
2097 }
2098 
2099 void CGOpenMPRuntime::emitTaskCall(
2100     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
2101     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
2102     llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
2103     const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
2104     ArrayRef<const Expr *> PrivateCopies,
2105     ArrayRef<const Expr *> FirstprivateVars,
2106     ArrayRef<const Expr *> FirstprivateCopies,
2107     ArrayRef<const Expr *> FirstprivateInits,
2108     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
2109   auto &C = CGM.getContext();
2110   llvm::SmallVector<PrivateDataTy, 8> Privates;
2111   // Aggregate privates and sort them by the alignment.
2112   auto I = PrivateCopies.begin();
2113   for (auto *E : PrivateVars) {
2114     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2115     Privates.push_back(std::make_pair(
2116         C.getTypeAlignInChars(VD->getType()),
2117         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2118                          /*PrivateElemInit=*/nullptr)));
2119     ++I;
2120   }
2121   I = FirstprivateCopies.begin();
2122   auto IElemInitRef = FirstprivateInits.begin();
2123   for (auto *E : FirstprivateVars) {
2124     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2125     Privates.push_back(std::make_pair(
2126         C.getTypeAlignInChars(VD->getType()),
2127         PrivateHelpersTy(
2128             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2129             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
2130     ++I, ++IElemInitRef;
2131   }
2132   llvm::array_pod_sort(Privates.begin(), Privates.end(),
2133                        array_pod_sort_comparator);
2134   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2135   // Build type kmp_routine_entry_t (if not built yet).
2136   emitKmpRoutineEntryT(KmpInt32Ty);
2137   // Build type kmp_task_t (if not built yet).
2138   if (KmpTaskTQTy.isNull()) {
2139     KmpTaskTQTy = C.getRecordType(
2140         createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
2141   }
2142   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2143   // Build particular struct kmp_task_t for the given task.
2144   auto *KmpTaskTWithPrivatesQTyRD =
2145       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
2146   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
2147   QualType KmpTaskTWithPrivatesPtrQTy =
2148       C.getPointerType(KmpTaskTWithPrivatesQTy);
2149   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
2150   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
2151   auto KmpTaskTWithPrivatesTySize =
2152       CGM.getSize(C.getTypeSizeInChars(KmpTaskTWithPrivatesQTy));
2153   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
2154 
2155   // Emit initial values for private copies (if any).
2156   llvm::Value *TaskPrivatesMap = nullptr;
2157   auto *TaskPrivatesMapTy =
2158       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
2159                 3)
2160           ->getType();
2161   if (!Privates.empty()) {
2162     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2163     TaskPrivatesMap = emitTaskPrivateMappingFunction(
2164         CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
2165     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2166         TaskPrivatesMap, TaskPrivatesMapTy);
2167   } else {
2168     TaskPrivatesMap = llvm::ConstantPointerNull::get(
2169         cast<llvm::PointerType>(TaskPrivatesMapTy));
2170   }
2171   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
2172   // kmp_task_t *tt);
2173   auto *TaskEntry = emitProxyTaskFunction(
2174       CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
2175       KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
2176 
2177   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2178   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2179   // kmp_routine_entry_t *task_entry);
2180   // Task flags. Format is taken from
2181   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
2182   // description of kmp_tasking_flags struct.
2183   const unsigned TiedFlag = 0x1;
2184   const unsigned FinalFlag = 0x2;
2185   unsigned Flags = Tied ? TiedFlag : 0;
2186   auto *TaskFlags =
2187       Final.getPointer()
2188           ? CGF.Builder.CreateSelect(Final.getPointer(),
2189                                      CGF.Builder.getInt32(FinalFlag),
2190                                      CGF.Builder.getInt32(/*C=*/0))
2191           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
2192   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
2193   auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
2194   llvm::Value *AllocArgs[] = {
2195       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags,
2196       KmpTaskTWithPrivatesTySize, CGM.getSize(SharedsSize),
2197       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry,
2198                                                       KmpRoutineEntryPtrTy)};
2199   auto *NewTask = CGF.EmitRuntimeCall(
2200       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
2201   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2202       NewTask, KmpTaskTWithPrivatesPtrTy);
2203   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
2204                                                KmpTaskTWithPrivatesQTy);
2205   LValue TDBase =
2206       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
2207   // Fill the data in the resulting kmp_task_t record.
2208   // Copy shareds if there are any.
2209   llvm::Value *KmpTaskSharedsPtr = nullptr;
2210   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
2211     KmpTaskSharedsPtr = CGF.EmitLoadOfScalar(
2212         CGF.EmitLValueForField(
2213             TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
2214         Loc);
2215     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
2216   }
2217   // Emit initial values for private copies (if any).
2218   bool NeedsCleanup = false;
2219   if (!Privates.empty()) {
2220     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2221     auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
2222     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
2223     LValue SharedsBase;
2224     if (!FirstprivateVars.empty()) {
2225       SharedsBase = CGF.MakeNaturalAlignAddrLValue(
2226           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2227               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
2228           SharedsTy);
2229     }
2230     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
2231         cast<CapturedStmt>(*D.getAssociatedStmt()));
2232     for (auto &&Pair : Privates) {
2233       auto *VD = Pair.second.PrivateCopy;
2234       auto *Init = VD->getAnyInitializer();
2235       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
2236       if (Init) {
2237         if (auto *Elem = Pair.second.PrivateElemInit) {
2238           auto *OriginalVD = Pair.second.Original;
2239           auto *SharedField = CapturesInfo.lookup(OriginalVD);
2240           auto SharedRefLValue =
2241               CGF.EmitLValueForField(SharedsBase, SharedField);
2242           QualType Type = OriginalVD->getType();
2243           if (Type->isArrayType()) {
2244             // Initialize firstprivate array.
2245             if (!isa<CXXConstructExpr>(Init) ||
2246                 CGF.isTrivialInitializer(Init)) {
2247               // Perform simple memcpy.
2248               CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
2249                                       SharedRefLValue.getAddress(), Type);
2250             } else {
2251               // Initialize firstprivate array using element-by-element
2252               // intialization.
2253               CGF.EmitOMPAggregateAssign(
2254                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
2255                   Type, [&CGF, Elem, Init, &CapturesInfo](
2256                             llvm::Value *DestElement, llvm::Value *SrcElement) {
2257                     // Clean up any temporaries needed by the initialization.
2258                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
2259                     InitScope.addPrivate(Elem, [SrcElement]() -> llvm::Value *{
2260                       return SrcElement;
2261                     });
2262                     (void)InitScope.Privatize();
2263                     // Emit initialization for single element.
2264                     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
2265                         CGF, &CapturesInfo);
2266                     CGF.EmitAnyExprToMem(Init, DestElement,
2267                                          Init->getType().getQualifiers(),
2268                                          /*IsInitializer=*/false);
2269                   });
2270             }
2271           } else {
2272             CodeGenFunction::OMPPrivateScope InitScope(CGF);
2273             InitScope.addPrivate(Elem, [SharedRefLValue]() -> llvm::Value *{
2274               return SharedRefLValue.getAddress();
2275             });
2276             (void)InitScope.Privatize();
2277             CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
2278             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
2279                                /*capturedByInit=*/false);
2280           }
2281         } else {
2282           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
2283         }
2284       }
2285       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
2286       ++FI;
2287     }
2288   }
2289   // Provide pointer to function with destructors for privates.
2290   llvm::Value *DestructorFn =
2291       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
2292                                              KmpTaskTWithPrivatesPtrQTy,
2293                                              KmpTaskTWithPrivatesQTy)
2294                    : llvm::ConstantPointerNull::get(
2295                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
2296   LValue Destructor = CGF.EmitLValueForField(
2297       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
2298   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2299                             DestructorFn, KmpRoutineEntryPtrTy),
2300                         Destructor);
2301 
2302   // Process list of dependences.
2303   llvm::Value *DependInfo = nullptr;
2304   unsigned DependencesNumber = Dependences.size();
2305   if (!Dependences.empty()) {
2306     // Dependence kind for RTL.
2307     enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 };
2308     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
2309     RecordDecl *KmpDependInfoRD;
2310     QualType FlagsTy = C.getIntTypeForBitwidth(
2311         C.toBits(C.getTypeSizeInChars(C.BoolTy)), /*Signed=*/false);
2312     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
2313     if (KmpDependInfoTy.isNull()) {
2314       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
2315       KmpDependInfoRD->startDefinition();
2316       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
2317       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
2318       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
2319       KmpDependInfoRD->completeDefinition();
2320       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
2321     } else {
2322       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
2323     }
2324     // Define type kmp_depend_info[<Dependences.size()>];
2325     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
2326         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, Dependences.size()),
2327         ArrayType::Normal, /*IndexTypeQuals=*/0);
2328     // kmp_depend_info[<Dependences.size()>] deps;
2329     DependInfo = CGF.CreateMemTemp(KmpDependInfoArrayTy);
2330     for (unsigned i = 0; i < DependencesNumber; ++i) {
2331       auto Addr = CGF.EmitLValue(Dependences[i].second);
2332       auto *Size = llvm::ConstantInt::get(
2333           CGF.SizeTy,
2334           C.getTypeSizeInChars(Dependences[i].second->getType()).getQuantity());
2335       auto Base = CGF.MakeNaturalAlignAddrLValue(
2336           CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, i),
2337           KmpDependInfoTy);
2338       // deps[i].base_addr = &<Dependences[i].second>;
2339       auto BaseAddrLVal = CGF.EmitLValueForField(
2340           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
2341       CGF.EmitStoreOfScalar(
2342           CGF.Builder.CreatePtrToInt(Addr.getAddress(), CGF.IntPtrTy),
2343           BaseAddrLVal);
2344       // deps[i].len = sizeof(<Dependences[i].second>);
2345       auto LenLVal = CGF.EmitLValueForField(
2346           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
2347       CGF.EmitStoreOfScalar(Size, LenLVal);
2348       // deps[i].flags = <Dependences[i].first>;
2349       RTLDependenceKindTy DepKind;
2350       switch (Dependences[i].first) {
2351       case OMPC_DEPEND_in:
2352         DepKind = DepIn;
2353         break;
2354       case OMPC_DEPEND_out:
2355         DepKind = DepOut;
2356         break;
2357       case OMPC_DEPEND_inout:
2358         DepKind = DepInOut;
2359         break;
2360       case OMPC_DEPEND_unknown:
2361         llvm_unreachable("Unknown task dependence type");
2362       }
2363       auto FlagsLVal = CGF.EmitLValueForField(
2364           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
2365       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
2366                             FlagsLVal);
2367     }
2368     DependInfo = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2369         CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, 0),
2370         CGF.VoidPtrTy);
2371   }
2372 
2373   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
2374   // libcall.
2375   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2376   // *new_task);
2377   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2378   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2379   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
2380   // list is not empty
2381   auto *ThreadID = getThreadID(CGF, Loc);
2382   auto *UpLoc = emitUpdateLocation(CGF, Loc);
2383   llvm::Value *TaskArgs[] = {UpLoc, ThreadID, NewTask};
2384   llvm::Value *DepTaskArgs[] = {
2385       UpLoc,
2386       ThreadID,
2387       NewTask,
2388       DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr,
2389       DependInfo,
2390       DependInfo ? CGF.Builder.getInt32(0) : nullptr,
2391       DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr};
2392   auto &&ThenCodeGen = [this, DependInfo, &TaskArgs,
2393                         &DepTaskArgs](CodeGenFunction &CGF) {
2394     // TODO: add check for untied tasks.
2395     CGF.EmitRuntimeCall(
2396         createRuntimeFunction(DependInfo ? OMPRTL__kmpc_omp_task_with_deps
2397                                          : OMPRTL__kmpc_omp_task),
2398         DependInfo ? makeArrayRef(DepTaskArgs) : makeArrayRef(TaskArgs));
2399   };
2400   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
2401       IfCallEndCleanup;
2402   llvm::Value *DepWaitTaskArgs[] = {
2403       UpLoc,
2404       ThreadID,
2405       DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr,
2406       DependInfo,
2407       DependInfo ? CGF.Builder.getInt32(0) : nullptr,
2408       DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr};
2409   auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
2410                         DependInfo, &DepWaitTaskArgs](CodeGenFunction &CGF) {
2411     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
2412     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2413     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
2414     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
2415     // is specified.
2416     if (DependInfo)
2417       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
2418                           DepWaitTaskArgs);
2419     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
2420     // kmp_task_t *new_task);
2421     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
2422                         TaskArgs);
2423     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
2424     // kmp_task_t *new_task);
2425     CGF.EHStack.pushCleanup<IfCallEndCleanup>(
2426         NormalAndEHCleanup,
2427         createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
2428         llvm::makeArrayRef(TaskArgs));
2429 
2430     // Call proxy_task_entry(gtid, new_task);
2431     llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
2432     CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
2433   };
2434   if (IfCond) {
2435     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
2436   } else {
2437     CodeGenFunction::RunCleanupsScope Scope(CGF);
2438     ThenCodeGen(CGF);
2439   }
2440 }
2441 
2442 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
2443                                           llvm::Type *ArgsType,
2444                                           ArrayRef<const Expr *> LHSExprs,
2445                                           ArrayRef<const Expr *> RHSExprs,
2446                                           ArrayRef<const Expr *> ReductionOps) {
2447   auto &C = CGM.getContext();
2448 
2449   // void reduction_func(void *LHSArg, void *RHSArg);
2450   FunctionArgList Args;
2451   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2452                            C.VoidPtrTy);
2453   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2454                            C.VoidPtrTy);
2455   Args.push_back(&LHSArg);
2456   Args.push_back(&RHSArg);
2457   FunctionType::ExtInfo EI;
2458   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
2459       C.VoidTy, Args, EI, /*isVariadic=*/false);
2460   auto *Fn = llvm::Function::Create(
2461       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2462       ".omp.reduction.reduction_func", &CGM.getModule());
2463   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
2464   CodeGenFunction CGF(CGM);
2465   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2466 
2467   // Dst = (void*[n])(LHSArg);
2468   // Src = (void*[n])(RHSArg);
2469   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2470       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
2471                                     CGF.PointerAlignInBytes),
2472       ArgsType);
2473   auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2474       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
2475                                     CGF.PointerAlignInBytes),
2476       ArgsType);
2477 
2478   //  ...
2479   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
2480   //  ...
2481   CodeGenFunction::OMPPrivateScope Scope(CGF);
2482   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
2483     Scope.addPrivate(
2484         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()),
2485         [&]() -> llvm::Value *{
2486           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2487               CGF.Builder.CreateAlignedLoad(
2488                   CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I),
2489                   CGM.PointerAlignInBytes),
2490               CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType())));
2491         });
2492     Scope.addPrivate(
2493         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()),
2494         [&]() -> llvm::Value *{
2495           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2496               CGF.Builder.CreateAlignedLoad(
2497                   CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I),
2498                   CGM.PointerAlignInBytes),
2499               CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType())));
2500         });
2501   }
2502   Scope.Privatize();
2503   for (auto *E : ReductionOps) {
2504     CGF.EmitIgnoredExpr(E);
2505   }
2506   Scope.ForceCleanup();
2507   CGF.FinishFunction();
2508   return Fn;
2509 }
2510 
2511 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
2512                                     ArrayRef<const Expr *> LHSExprs,
2513                                     ArrayRef<const Expr *> RHSExprs,
2514                                     ArrayRef<const Expr *> ReductionOps,
2515                                     bool WithNowait, bool SimpleReduction) {
2516   // Next code should be emitted for reduction:
2517   //
2518   // static kmp_critical_name lock = { 0 };
2519   //
2520   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
2521   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
2522   //  ...
2523   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
2524   //  *(Type<n>-1*)rhs[<n>-1]);
2525   // }
2526   //
2527   // ...
2528   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
2529   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2530   // RedList, reduce_func, &<lock>)) {
2531   // case 1:
2532   //  ...
2533   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2534   //  ...
2535   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2536   // break;
2537   // case 2:
2538   //  ...
2539   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2540   //  ...
2541   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
2542   // break;
2543   // default:;
2544   // }
2545   //
2546   // if SimpleReduction is true, only the next code is generated:
2547   //  ...
2548   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2549   //  ...
2550 
2551   auto &C = CGM.getContext();
2552 
2553   if (SimpleReduction) {
2554     CodeGenFunction::RunCleanupsScope Scope(CGF);
2555     for (auto *E : ReductionOps) {
2556       CGF.EmitIgnoredExpr(E);
2557     }
2558     return;
2559   }
2560 
2561   // 1. Build a list of reduction variables.
2562   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
2563   llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size());
2564   QualType ReductionArrayTy =
2565       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2566                              /*IndexTypeQuals=*/0);
2567   auto *ReductionList =
2568       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
2569   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
2570     auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I);
2571     CGF.Builder.CreateAlignedStore(
2572         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2573             CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy),
2574         Elem, CGM.PointerAlignInBytes);
2575   }
2576 
2577   // 2. Emit reduce_func().
2578   auto *ReductionFn = emitReductionFunction(
2579       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs,
2580       RHSExprs, ReductionOps);
2581 
2582   // 3. Create static kmp_critical_name lock = { 0 };
2583   auto *Lock = getCriticalRegionLock(".reduction");
2584 
2585   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2586   // RedList, reduce_func, &<lock>);
2587   auto *IdentTLoc = emitUpdateLocation(
2588       CGF, Loc,
2589       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
2590   auto *ThreadId = getThreadID(CGF, Loc);
2591   auto *ReductionArrayTySize = llvm::ConstantInt::get(
2592       CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
2593   auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList,
2594                                                              CGF.VoidPtrTy);
2595   llvm::Value *Args[] = {
2596       IdentTLoc,                             // ident_t *<loc>
2597       ThreadId,                              // i32 <gtid>
2598       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
2599       ReductionArrayTySize,                  // size_type sizeof(RedList)
2600       RL,                                    // void *RedList
2601       ReductionFn, // void (*) (void *, void *) <reduce_func>
2602       Lock         // kmp_critical_name *&<lock>
2603   };
2604   auto Res = CGF.EmitRuntimeCall(
2605       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
2606                                        : OMPRTL__kmpc_reduce),
2607       Args);
2608 
2609   // 5. Build switch(res)
2610   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
2611   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
2612 
2613   // 6. Build case 1:
2614   //  ...
2615   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2616   //  ...
2617   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2618   // break;
2619   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
2620   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
2621   CGF.EmitBlock(Case1BB);
2622 
2623   {
2624     CodeGenFunction::RunCleanupsScope Scope(CGF);
2625     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2626     llvm::Value *EndArgs[] = {
2627         IdentTLoc, // ident_t *<loc>
2628         ThreadId,  // i32 <gtid>
2629         Lock       // kmp_critical_name *&<lock>
2630     };
2631     CGF.EHStack
2632         .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2633             NormalAndEHCleanup,
2634             createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
2635                                              : OMPRTL__kmpc_end_reduce),
2636             llvm::makeArrayRef(EndArgs));
2637     for (auto *E : ReductionOps) {
2638       CGF.EmitIgnoredExpr(E);
2639     }
2640   }
2641 
2642   CGF.EmitBranch(DefaultBB);
2643 
2644   // 7. Build case 2:
2645   //  ...
2646   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2647   //  ...
2648   // break;
2649   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
2650   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
2651   CGF.EmitBlock(Case2BB);
2652 
2653   {
2654     CodeGenFunction::RunCleanupsScope Scope(CGF);
2655     if (!WithNowait) {
2656       // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
2657       llvm::Value *EndArgs[] = {
2658           IdentTLoc, // ident_t *<loc>
2659           ThreadId,  // i32 <gtid>
2660           Lock       // kmp_critical_name *&<lock>
2661       };
2662       CGF.EHStack
2663           .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2664               NormalAndEHCleanup,
2665               createRuntimeFunction(OMPRTL__kmpc_end_reduce),
2666               llvm::makeArrayRef(EndArgs));
2667     }
2668     auto I = LHSExprs.begin();
2669     for (auto *E : ReductionOps) {
2670       const Expr *XExpr = nullptr;
2671       const Expr *EExpr = nullptr;
2672       const Expr *UpExpr = nullptr;
2673       BinaryOperatorKind BO = BO_Comma;
2674       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
2675         if (BO->getOpcode() == BO_Assign) {
2676           XExpr = BO->getLHS();
2677           UpExpr = BO->getRHS();
2678         }
2679       }
2680       // Try to emit update expression as a simple atomic.
2681       auto *RHSExpr = UpExpr;
2682       if (RHSExpr) {
2683         // Analyze RHS part of the whole expression.
2684         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
2685                 RHSExpr->IgnoreParenImpCasts())) {
2686           // If this is a conditional operator, analyze its condition for
2687           // min/max reduction operator.
2688           RHSExpr = ACO->getCond();
2689         }
2690         if (auto *BORHS =
2691                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
2692           EExpr = BORHS->getRHS();
2693           BO = BORHS->getOpcode();
2694         }
2695       }
2696       if (XExpr) {
2697         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2698         LValue X = CGF.EmitLValue(XExpr);
2699         RValue E;
2700         if (EExpr)
2701           E = CGF.EmitAnyExpr(EExpr);
2702         CGF.EmitOMPAtomicSimpleUpdateExpr(
2703             X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
2704             [&CGF, UpExpr, VD](RValue XRValue) {
2705               CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
2706               PrivateScope.addPrivate(
2707                   VD, [&CGF, VD, XRValue]() -> llvm::Value *{
2708                     auto *LHSTemp = CGF.CreateMemTemp(VD->getType());
2709                     CGF.EmitStoreThroughLValue(
2710                         XRValue,
2711                         CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType()));
2712                     return LHSTemp;
2713                   });
2714               (void)PrivateScope.Privatize();
2715               return CGF.EmitAnyExpr(UpExpr);
2716             });
2717       } else {
2718         // Emit as a critical region.
2719         emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) {
2720           CGF.EmitIgnoredExpr(E);
2721         }, Loc);
2722       }
2723       ++I;
2724     }
2725   }
2726 
2727   CGF.EmitBranch(DefaultBB);
2728   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
2729 }
2730 
2731 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
2732                                        SourceLocation Loc) {
2733   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
2734   // global_tid);
2735   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2736   // Ignore return result until untied tasks are supported.
2737   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
2738 }
2739 
2740 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
2741                                            OpenMPDirectiveKind InnerKind,
2742                                            const RegionCodeGenTy &CodeGen) {
2743   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind);
2744   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
2745 }
2746 
2747 namespace {
2748 enum RTCancelKind {
2749   CancelNoreq = 0,
2750   CancelParallel = 1,
2751   CancelLoop = 2,
2752   CancelSections = 3,
2753   CancelTaskgroup = 4
2754 };
2755 }
2756 
2757 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
2758   RTCancelKind CancelKind = CancelNoreq;
2759   if (CancelRegion == OMPD_parallel)
2760     CancelKind = CancelParallel;
2761   else if (CancelRegion == OMPD_for)
2762     CancelKind = CancelLoop;
2763   else if (CancelRegion == OMPD_sections)
2764     CancelKind = CancelSections;
2765   else {
2766     assert(CancelRegion == OMPD_taskgroup);
2767     CancelKind = CancelTaskgroup;
2768   }
2769   return CancelKind;
2770 }
2771 
2772 void CGOpenMPRuntime::emitCancellationPointCall(
2773     CodeGenFunction &CGF, SourceLocation Loc,
2774     OpenMPDirectiveKind CancelRegion) {
2775   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2776   // global_tid, kmp_int32 cncl_kind);
2777   if (auto *OMPRegionInfo =
2778           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2779     auto CancelDest =
2780         CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2781     if (CancelDest.isValid()) {
2782       llvm::Value *Args[] = {
2783           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2784           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
2785       // Ignore return result until untied tasks are supported.
2786       auto *Result = CGF.EmitRuntimeCall(
2787           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
2788       // if (__kmpc_cancellationpoint()) {
2789       //  __kmpc_cancel_barrier();
2790       //   exit from construct;
2791       // }
2792       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2793       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2794       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2795       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2796       CGF.EmitBlock(ExitBB);
2797       // __kmpc_cancel_barrier();
2798       emitBarrierCall(CGF, Loc, OMPD_unknown, /*CheckForCancel=*/false);
2799       // exit from construct;
2800       CGF.EmitBranchThroughCleanup(CancelDest);
2801       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2802     }
2803   }
2804 }
2805 
2806 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
2807                                      OpenMPDirectiveKind CancelRegion) {
2808   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2809   // kmp_int32 cncl_kind);
2810   if (auto *OMPRegionInfo =
2811           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2812     auto CancelDest =
2813         CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2814     if (CancelDest.isValid()) {
2815       llvm::Value *Args[] = {
2816           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2817           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
2818       // Ignore return result until untied tasks are supported.
2819       auto *Result =
2820           CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
2821       // if (__kmpc_cancel()) {
2822       //  __kmpc_cancel_barrier();
2823       //   exit from construct;
2824       // }
2825       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2826       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2827       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2828       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2829       CGF.EmitBlock(ExitBB);
2830       // __kmpc_cancel_barrier();
2831       emitBarrierCall(CGF, Loc, OMPD_unknown, /*CheckForCancel=*/false);
2832       // exit from construct;
2833       CGF.EmitBranchThroughCleanup(CancelDest);
2834       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2835     }
2836   }
2837 }
2838 
2839