1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/StmtOpenMP.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/IR/CallSite.h"
20 #include "llvm/IR/DerivedTypes.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/IR/Value.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include <cassert>
25 
26 using namespace clang;
27 using namespace CodeGen;
28 
29 namespace {
30 /// \brief Base class for handling code generation inside OpenMP regions.
31 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
32 public:
33   CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS)
34       : CGCapturedStmtInfo(CS, CR_OpenMP), Directive(D) {}
35 
36   CGOpenMPRegionInfo(const OMPExecutableDirective &D)
37       : CGCapturedStmtInfo(CR_OpenMP), Directive(D) {}
38 
39   /// \brief Get a variable or parameter for storing global thread id
40   /// inside OpenMP construct.
41   virtual const VarDecl *getThreadIDVariable() const = 0;
42 
43   /// \brief Get an LValue for the current ThreadID variable.
44   LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
45 
46     /// \brief Emit the captured statement body.
47   virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
48 
49   static bool classof(const CGCapturedStmtInfo *Info) {
50     return Info->getKind() == CR_OpenMP;
51   }
52 protected:
53   /// \brief OpenMP executable directive associated with the region.
54   const OMPExecutableDirective &Directive;
55 };
56 
57 /// \brief API for captured statement code generation in OpenMP constructs.
58 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
59 public:
60   CGOpenMPOutlinedRegionInfo(const OMPExecutableDirective &D,
61                              const CapturedStmt &CS, const VarDecl *ThreadIDVar)
62       : CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar) {
63     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
64   }
65   /// \brief Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const override {
68     return ThreadIDVar;
69   }
70   /// \brief Get the name of the capture helper.
71   StringRef getHelperName() const override { return ".omp_outlined."; }
72 
73 private:
74   /// \brief A variable or parameter storing global thread id for OpenMP
75   /// constructs.
76   const VarDecl *ThreadIDVar;
77 };
78 
79 /// \brief API for inlined captured statement code generation in OpenMP
80 /// constructs.
81 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
82 public:
83   CGOpenMPInlinedRegionInfo(const OMPExecutableDirective &D,
84                             CodeGenFunction::CGCapturedStmtInfo *OldCSI)
85       : CGOpenMPRegionInfo(D), OldCSI(OldCSI),
86         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
87   // \brief Retrieve the value of the context parameter.
88   virtual llvm::Value *getContextValue() const override {
89     if (OuterRegionInfo)
90       return OuterRegionInfo->getContextValue();
91     llvm_unreachable("No context value for inlined OpenMP region");
92   }
93   /// \brief Lookup the captured field decl for a variable.
94   virtual const FieldDecl *lookup(const VarDecl *VD) const override {
95     if (OuterRegionInfo)
96       return OuterRegionInfo->lookup(VD);
97     llvm_unreachable("Trying to reference VarDecl that is neither local nor "
98                      "captured in outer OpenMP region");
99   }
100   virtual FieldDecl *getThisFieldDecl() const override {
101     if (OuterRegionInfo)
102       return OuterRegionInfo->getThisFieldDecl();
103     return nullptr;
104   }
105   /// \brief Get a variable or parameter for storing global thread id
106   /// inside OpenMP construct.
107   virtual const VarDecl *getThreadIDVariable() const override {
108     if (OuterRegionInfo)
109       return OuterRegionInfo->getThreadIDVariable();
110     return nullptr;
111   }
112   /// \brief Get the name of the capture helper.
113   virtual StringRef getHelperName() const override {
114     llvm_unreachable("No helper name for inlined OpenMP construct");
115   }
116 
117   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
118 
119 private:
120   /// \brief CodeGen info about outer OpenMP region.
121   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
122   CGOpenMPRegionInfo *OuterRegionInfo;
123 };
124 } // namespace
125 
126 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
127   return CGF.MakeNaturalAlignAddrLValue(
128       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
129       CGF.getContext().getPointerType(getThreadIDVariable()->getType()));
130 }
131 
132 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
133   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
134   CGF.EmitOMPPrivateClause(Directive, PrivateScope);
135   CGF.EmitOMPFirstprivateClause(Directive, PrivateScope);
136   if (PrivateScope.Privatize())
137     // Emit implicit barrier to synchronize threads and avoid data races.
138     CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Directive.getLocStart(),
139                                                /*IsExplicit=*/false);
140   CGCapturedStmtInfo::EmitBody(CGF, S);
141 }
142 
143 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
144     : CGM(CGM), DefaultOpenMPPSource(nullptr) {
145   IdentTy = llvm::StructType::create(
146       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
147       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
148       CGM.Int8PtrTy /* psource */, nullptr);
149   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
150   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
151                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
152   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
153   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
154 }
155 
156 llvm::Value *
157 CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D,
158                                       const VarDecl *ThreadIDVar) {
159   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
160   CodeGenFunction CGF(CGM, true);
161   CGOpenMPOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar);
162   CGF.CapturedStmtInfo = &CGInfo;
163   return CGF.GenerateCapturedStmtFunction(*CS);
164 }
165 
166 llvm::Value *
167 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
168   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
169   if (!Entry) {
170     if (!DefaultOpenMPPSource) {
171       // Initialize default location for psource field of ident_t structure of
172       // all ident_t objects. Format is ";file;function;line;column;;".
173       // Taken from
174       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
175       DefaultOpenMPPSource =
176           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
177       DefaultOpenMPPSource =
178           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
179     }
180     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
181         CGM.getModule(), IdentTy, /*isConstant*/ true,
182         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
183     DefaultOpenMPLocation->setUnnamedAddr(true);
184 
185     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
186     llvm::Constant *Values[] = {Zero,
187                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
188                                 Zero, Zero, DefaultOpenMPPSource};
189     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
190     DefaultOpenMPLocation->setInitializer(Init);
191     OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
192     return DefaultOpenMPLocation;
193   }
194   return Entry;
195 }
196 
197 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
198                                                  SourceLocation Loc,
199                                                  OpenMPLocationFlags Flags) {
200   // If no debug info is generated - return global default location.
201   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
202       Loc.isInvalid())
203     return getOrCreateDefaultLocation(Flags);
204 
205   assert(CGF.CurFn && "No function in current CodeGenFunction.");
206 
207   llvm::Value *LocValue = nullptr;
208   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
209   if (I != OpenMPLocThreadIDMap.end())
210     LocValue = I->second.DebugLoc;
211   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
212   // GetOpenMPThreadID was called before this routine.
213   if (LocValue == nullptr) {
214     // Generate "ident_t .kmpc_loc.addr;"
215     llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
216     AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
217     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
218     Elem.second.DebugLoc = AI;
219     LocValue = AI;
220 
221     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
222     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
223     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
224                              llvm::ConstantExpr::getSizeOf(IdentTy),
225                              CGM.PointerAlignInBytes);
226   }
227 
228   // char **psource = &.kmpc_loc_<flags>.addr.psource;
229   auto *PSource =
230       CGF.Builder.CreateConstInBoundsGEP2_32(LocValue, 0, IdentField_PSource);
231 
232   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
233   if (OMPDebugLoc == nullptr) {
234     SmallString<128> Buffer2;
235     llvm::raw_svector_ostream OS2(Buffer2);
236     // Build debug location
237     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
238     OS2 << ";" << PLoc.getFilename() << ";";
239     if (const FunctionDecl *FD =
240             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
241       OS2 << FD->getQualifiedNameAsString();
242     }
243     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
244     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
245     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
246   }
247   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
248   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
249 
250   return LocValue;
251 }
252 
253 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
254                                           SourceLocation Loc) {
255   assert(CGF.CurFn && "No function in current CodeGenFunction.");
256 
257   llvm::Value *ThreadID = nullptr;
258   // Check whether we've already cached a load of the thread id in this
259   // function.
260   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
261   if (I != OpenMPLocThreadIDMap.end()) {
262     ThreadID = I->second.ThreadID;
263     if (ThreadID != nullptr)
264       return ThreadID;
265   }
266   if (auto OMPRegionInfo =
267           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
268     if (auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable()) {
269       // Check if this an outlined function with thread id passed as argument.
270       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
271       auto RVal = CGF.EmitLoadOfLValue(LVal, Loc);
272       LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
273                                             ThreadIDVar->getType());
274       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
275       // If value loaded in entry block, cache it and use it everywhere in
276       // function.
277       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
278         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
279         Elem.second.ThreadID = ThreadID;
280       }
281       return ThreadID;
282     }
283   }
284 
285   // This is not an outlined function region - need to call __kmpc_int32
286   // kmpc_global_thread_num(ident_t *loc).
287   // Generate thread id value and cache this value for use across the
288   // function.
289   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
290   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
291   ThreadID =
292       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
293                           emitUpdateLocation(CGF, Loc));
294   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
295   Elem.second.ThreadID = ThreadID;
296   return ThreadID;
297 }
298 
299 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
300   assert(CGF.CurFn && "No function in current CodeGenFunction.");
301   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
302     OpenMPLocThreadIDMap.erase(CGF.CurFn);
303 }
304 
305 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
306   return llvm::PointerType::getUnqual(IdentTy);
307 }
308 
309 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
310   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
311 }
312 
313 llvm::Constant *
314 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
315   llvm::Constant *RTLFn = nullptr;
316   switch (Function) {
317   case OMPRTL__kmpc_fork_call: {
318     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
319     // microtask, ...);
320     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
321                                 getKmpc_MicroPointerTy()};
322     llvm::FunctionType *FnTy =
323         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
324     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
325     break;
326   }
327   case OMPRTL__kmpc_global_thread_num: {
328     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
329     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
330     llvm::FunctionType *FnTy =
331         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
332     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
333     break;
334   }
335   case OMPRTL__kmpc_threadprivate_cached: {
336     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
337     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
338     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
339                                 CGM.VoidPtrTy, CGM.SizeTy,
340                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
341     llvm::FunctionType *FnTy =
342         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
343     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
344     break;
345   }
346   case OMPRTL__kmpc_critical: {
347     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
348     // kmp_critical_name *crit);
349     llvm::Type *TypeParams[] = {
350         getIdentTyPointerTy(), CGM.Int32Ty,
351         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
352     llvm::FunctionType *FnTy =
353         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
354     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
355     break;
356   }
357   case OMPRTL__kmpc_threadprivate_register: {
358     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
359     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
360     // typedef void *(*kmpc_ctor)(void *);
361     auto KmpcCtorTy =
362         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
363                                 /*isVarArg*/ false)->getPointerTo();
364     // typedef void *(*kmpc_cctor)(void *, void *);
365     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
366     auto KmpcCopyCtorTy =
367         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
368                                 /*isVarArg*/ false)->getPointerTo();
369     // typedef void (*kmpc_dtor)(void *);
370     auto KmpcDtorTy =
371         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
372             ->getPointerTo();
373     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
374                               KmpcCopyCtorTy, KmpcDtorTy};
375     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
376                                         /*isVarArg*/ false);
377     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
378     break;
379   }
380   case OMPRTL__kmpc_end_critical: {
381     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
382     // kmp_critical_name *crit);
383     llvm::Type *TypeParams[] = {
384         getIdentTyPointerTy(), CGM.Int32Ty,
385         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
386     llvm::FunctionType *FnTy =
387         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
388     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
389     break;
390   }
391   case OMPRTL__kmpc_cancel_barrier: {
392     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
393     // global_tid);
394     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
395     llvm::FunctionType *FnTy =
396         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
397     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
398     break;
399   }
400   // Build __kmpc_for_static_init*(
401   //               ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
402   //               kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
403   //               kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
404   //               kmp_int[32|64] incr, kmp_int[32|64] chunk);
405   case OMPRTL__kmpc_for_static_init_4: {
406     auto ITy = CGM.Int32Ty;
407     auto PtrTy = llvm::PointerType::getUnqual(ITy);
408     llvm::Type *TypeParams[] = {
409         getIdentTyPointerTy(),                     // loc
410         CGM.Int32Ty,                               // tid
411         CGM.Int32Ty,                               // schedtype
412         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
413         PtrTy,                                     // p_lower
414         PtrTy,                                     // p_upper
415         PtrTy,                                     // p_stride
416         ITy,                                       // incr
417         ITy                                        // chunk
418     };
419     llvm::FunctionType *FnTy =
420         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
421     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4");
422     break;
423   }
424   case OMPRTL__kmpc_for_static_init_4u: {
425     auto ITy = CGM.Int32Ty;
426     auto PtrTy = llvm::PointerType::getUnqual(ITy);
427     llvm::Type *TypeParams[] = {
428         getIdentTyPointerTy(),                     // loc
429         CGM.Int32Ty,                               // tid
430         CGM.Int32Ty,                               // schedtype
431         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
432         PtrTy,                                     // p_lower
433         PtrTy,                                     // p_upper
434         PtrTy,                                     // p_stride
435         ITy,                                       // incr
436         ITy                                        // chunk
437     };
438     llvm::FunctionType *FnTy =
439         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
440     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4u");
441     break;
442   }
443   case OMPRTL__kmpc_for_static_init_8: {
444     auto ITy = CGM.Int64Ty;
445     auto PtrTy = llvm::PointerType::getUnqual(ITy);
446     llvm::Type *TypeParams[] = {
447         getIdentTyPointerTy(),                     // loc
448         CGM.Int32Ty,                               // tid
449         CGM.Int32Ty,                               // schedtype
450         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
451         PtrTy,                                     // p_lower
452         PtrTy,                                     // p_upper
453         PtrTy,                                     // p_stride
454         ITy,                                       // incr
455         ITy                                        // chunk
456     };
457     llvm::FunctionType *FnTy =
458         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
459     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8");
460     break;
461   }
462   case OMPRTL__kmpc_for_static_init_8u: {
463     auto ITy = CGM.Int64Ty;
464     auto PtrTy = llvm::PointerType::getUnqual(ITy);
465     llvm::Type *TypeParams[] = {
466         getIdentTyPointerTy(),                     // loc
467         CGM.Int32Ty,                               // tid
468         CGM.Int32Ty,                               // schedtype
469         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
470         PtrTy,                                     // p_lower
471         PtrTy,                                     // p_upper
472         PtrTy,                                     // p_stride
473         ITy,                                       // incr
474         ITy                                        // chunk
475     };
476     llvm::FunctionType *FnTy =
477         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
478     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8u");
479     break;
480   }
481   case OMPRTL__kmpc_for_static_fini: {
482     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
483     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
484     llvm::FunctionType *FnTy =
485         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
486     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
487     break;
488   }
489   case OMPRTL__kmpc_push_num_threads: {
490     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
491     // kmp_int32 num_threads)
492     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
493                                 CGM.Int32Ty};
494     llvm::FunctionType *FnTy =
495         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
496     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
497     break;
498   }
499   case OMPRTL__kmpc_serialized_parallel: {
500     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
501     // global_tid);
502     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
503     llvm::FunctionType *FnTy =
504         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
505     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
506     break;
507   }
508   case OMPRTL__kmpc_end_serialized_parallel: {
509     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
510     // global_tid);
511     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
512     llvm::FunctionType *FnTy =
513         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
514     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
515     break;
516   }
517   case OMPRTL__kmpc_flush: {
518     // Build void __kmpc_flush(ident_t *loc);
519     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
520     llvm::FunctionType *FnTy =
521         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
522     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
523     break;
524   }
525   case OMPRTL__kmpc_master: {
526     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
527     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
528     llvm::FunctionType *FnTy =
529         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
530     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
531     break;
532   }
533   case OMPRTL__kmpc_end_master: {
534     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
535     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
536     llvm::FunctionType *FnTy =
537         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
538     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
539     break;
540   }
541   case OMPRTL__kmpc_omp_taskyield: {
542     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
543     // int end_part);
544     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
545     llvm::FunctionType *FnTy =
546         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
547     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
548     break;
549   }
550   case OMPRTL__kmpc_single: {
551     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
552     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
553     llvm::FunctionType *FnTy =
554         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
555     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
556     break;
557   }
558   case OMPRTL__kmpc_end_single: {
559     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
560     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
561     llvm::FunctionType *FnTy =
562         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
563     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
564     break;
565   }
566   }
567   return RTLFn;
568 }
569 
570 llvm::Constant *
571 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
572   // Lookup the entry, lazily creating it if necessary.
573   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
574                                      Twine(CGM.getMangledName(VD)) + ".cache.");
575 }
576 
577 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
578                                                      const VarDecl *VD,
579                                                      llvm::Value *VDAddr,
580                                                      SourceLocation Loc) {
581   auto VarTy = VDAddr->getType()->getPointerElementType();
582   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
583                          CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
584                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
585                          getOrCreateThreadPrivateCache(VD)};
586   return CGF.EmitRuntimeCall(
587       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
588 }
589 
590 void CGOpenMPRuntime::emitThreadPrivateVarInit(
591     CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
592     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
593   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
594   // library.
595   auto OMPLoc = emitUpdateLocation(CGF, Loc);
596   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
597                       OMPLoc);
598   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
599   // to register constructor/destructor for variable.
600   llvm::Value *Args[] = {OMPLoc,
601                          CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
602                          Ctor, CopyCtor, Dtor};
603   CGF.EmitRuntimeCall(
604       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
605 }
606 
607 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
608     const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
609     bool PerformInit, CodeGenFunction *CGF) {
610   VD = VD->getDefinition(CGM.getContext());
611   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
612     ThreadPrivateWithDefinition.insert(VD);
613     QualType ASTTy = VD->getType();
614 
615     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
616     auto Init = VD->getAnyInitializer();
617     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
618       // Generate function that re-emits the declaration's initializer into the
619       // threadprivate copy of the variable VD
620       CodeGenFunction CtorCGF(CGM);
621       FunctionArgList Args;
622       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
623                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
624       Args.push_back(&Dst);
625 
626       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
627           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
628           /*isVariadic=*/false);
629       auto FTy = CGM.getTypes().GetFunctionType(FI);
630       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
631           FTy, ".__kmpc_global_ctor_.", Loc);
632       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
633                             Args, SourceLocation());
634       auto ArgVal = CtorCGF.EmitLoadOfScalar(
635           CtorCGF.GetAddrOfLocalVar(&Dst),
636           /*Volatile=*/false, CGM.PointerAlignInBytes,
637           CGM.getContext().VoidPtrTy, Dst.getLocation());
638       auto Arg = CtorCGF.Builder.CreatePointerCast(
639           ArgVal,
640           CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
641       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
642                                /*IsInitializer=*/true);
643       ArgVal = CtorCGF.EmitLoadOfScalar(
644           CtorCGF.GetAddrOfLocalVar(&Dst),
645           /*Volatile=*/false, CGM.PointerAlignInBytes,
646           CGM.getContext().VoidPtrTy, Dst.getLocation());
647       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
648       CtorCGF.FinishFunction();
649       Ctor = Fn;
650     }
651     if (VD->getType().isDestructedType() != QualType::DK_none) {
652       // Generate function that emits destructor call for the threadprivate copy
653       // of the variable VD
654       CodeGenFunction DtorCGF(CGM);
655       FunctionArgList Args;
656       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
657                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
658       Args.push_back(&Dst);
659 
660       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
661           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
662           /*isVariadic=*/false);
663       auto FTy = CGM.getTypes().GetFunctionType(FI);
664       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
665           FTy, ".__kmpc_global_dtor_.", Loc);
666       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
667                             SourceLocation());
668       auto ArgVal = DtorCGF.EmitLoadOfScalar(
669           DtorCGF.GetAddrOfLocalVar(&Dst),
670           /*Volatile=*/false, CGM.PointerAlignInBytes,
671           CGM.getContext().VoidPtrTy, Dst.getLocation());
672       DtorCGF.emitDestroy(ArgVal, ASTTy,
673                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
674                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
675       DtorCGF.FinishFunction();
676       Dtor = Fn;
677     }
678     // Do not emit init function if it is not required.
679     if (!Ctor && !Dtor)
680       return nullptr;
681 
682     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
683     auto CopyCtorTy =
684         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
685                                 /*isVarArg=*/false)->getPointerTo();
686     // Copying constructor for the threadprivate variable.
687     // Must be NULL - reserved by runtime, but currently it requires that this
688     // parameter is always NULL. Otherwise it fires assertion.
689     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
690     if (Ctor == nullptr) {
691       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
692                                             /*isVarArg=*/false)->getPointerTo();
693       Ctor = llvm::Constant::getNullValue(CtorTy);
694     }
695     if (Dtor == nullptr) {
696       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
697                                             /*isVarArg=*/false)->getPointerTo();
698       Dtor = llvm::Constant::getNullValue(DtorTy);
699     }
700     if (!CGF) {
701       auto InitFunctionTy =
702           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
703       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
704           InitFunctionTy, ".__omp_threadprivate_init_.");
705       CodeGenFunction InitCGF(CGM);
706       FunctionArgList ArgList;
707       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
708                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
709                             Loc);
710       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
711       InitCGF.FinishFunction();
712       return InitFunction;
713     }
714     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
715   }
716   return nullptr;
717 }
718 
719 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
720                                        llvm::Value *OutlinedFn,
721                                        llvm::Value *CapturedStruct) {
722   // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/)
723   llvm::Value *Args[] = {
724       emitUpdateLocation(CGF, Loc),
725       CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument
726       // (there is only one additional argument - 'context')
727       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
728       CGF.EmitCastToVoidPtr(CapturedStruct)};
729   auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
730   CGF.EmitRuntimeCall(RTLFn, Args);
731 }
732 
733 void CGOpenMPRuntime::emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc,
734                                      llvm::Value *OutlinedFn,
735                                      llvm::Value *CapturedStruct) {
736   auto ThreadID = getThreadID(CGF, Loc);
737   // Build calls:
738   // __kmpc_serialized_parallel(&Loc, GTid);
739   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), ThreadID};
740   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
741                       Args);
742 
743   // OutlinedFn(&GTid, &zero, CapturedStruct);
744   auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
745   auto Int32Ty =
746       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
747   auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
748   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
749   llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
750   CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
751 
752   // __kmpc_end_serialized_parallel(&Loc, GTid);
753   llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
754   CGF.EmitRuntimeCall(
755       createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
756 }
757 
758 // If we're inside an (outlined) parallel region, use the region info's
759 // thread-ID variable (it is passed in a first argument of the outlined function
760 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
761 // regular serial code region, get thread ID by calling kmp_int32
762 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
763 // return the address of that temp.
764 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
765                                                   SourceLocation Loc) {
766   if (auto OMPRegionInfo =
767           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
768     if (OMPRegionInfo->getThreadIDVariable())
769       return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF),
770                                   Loc).getScalarVal();
771 
772   auto ThreadID = getThreadID(CGF, Loc);
773   auto Int32Ty =
774       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
775   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
776   CGF.EmitStoreOfScalar(ThreadID,
777                         CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
778 
779   return ThreadIDTemp;
780 }
781 
782 llvm::Constant *
783 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
784                                              const llvm::Twine &Name) {
785   SmallString<256> Buffer;
786   llvm::raw_svector_ostream Out(Buffer);
787   Out << Name;
788   auto RuntimeName = Out.str();
789   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
790   if (Elem.second) {
791     assert(Elem.second->getType()->getPointerElementType() == Ty &&
792            "OMP internal variable has different type than requested");
793     return &*Elem.second;
794   }
795 
796   return Elem.second = new llvm::GlobalVariable(
797              CGM.getModule(), Ty, /*IsConstant*/ false,
798              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
799              Elem.first());
800 }
801 
802 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
803   llvm::Twine Name(".gomp_critical_user_", CriticalName);
804   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
805 }
806 
807 void CGOpenMPRuntime::emitCriticalRegion(
808     CodeGenFunction &CGF, StringRef CriticalName,
809     const std::function<void()> &CriticalOpGen, SourceLocation Loc) {
810   auto RegionLock = getCriticalRegionLock(CriticalName);
811   // __kmpc_critical(ident_t *, gtid, Lock);
812   // CriticalOpGen();
813   // __kmpc_end_critical(ident_t *, gtid, Lock);
814   // Prepare arguments and build a call to __kmpc_critical
815   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
816                          RegionLock};
817   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
818   CriticalOpGen();
819   // Build a call to __kmpc_end_critical
820   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
821 }
822 
823 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
824                        const std::function<void()> &BodyOpGen) {
825   llvm::Value *CallBool = CGF.EmitScalarConversion(
826       IfCond,
827       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
828       CGF.getContext().BoolTy);
829 
830   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
831   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
832   // Generate the branch (If-stmt)
833   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
834   CGF.EmitBlock(ThenBlock);
835   BodyOpGen();
836   // Emit the rest of bblocks/branches
837   CGF.EmitBranch(ContBlock);
838   CGF.EmitBlock(ContBlock, true);
839 }
840 
841 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
842                                        const std::function<void()> &MasterOpGen,
843                                        SourceLocation Loc) {
844   // if(__kmpc_master(ident_t *, gtid)) {
845   //   MasterOpGen();
846   //   __kmpc_end_master(ident_t *, gtid);
847   // }
848   // Prepare arguments and build a call to __kmpc_master
849   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
850   auto *IsMaster =
851       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
852   emitIfStmt(CGF, IsMaster, [&]() -> void {
853     MasterOpGen();
854     // Build a call to __kmpc_end_master.
855     // OpenMP [1.2.2 OpenMP Language Terminology]
856     // For C/C++, an executable statement, possibly compound, with a single
857     // entry at the top and a single exit at the bottom, or an OpenMP construct.
858     // * Access to the structured block must not be the result of a branch.
859     // * The point of exit cannot be a branch out of the structured block.
860     // * The point of entry must not be a call to setjmp().
861     // * longjmp() and throw() must not violate the entry/exit criteria.
862     // * An expression statement, iteration statement, selection statement, or
863     // try block is considered to be a structured block if the corresponding
864     // compound statement obtained by enclosing it in { and } would be a
865     // structured block.
866     // It is analyzed in Sema, so we can just call __kmpc_end_master() on
867     // fallthrough rather than pushing a normal cleanup for it.
868     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_master), Args);
869   });
870 }
871 
872 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
873                                         SourceLocation Loc) {
874   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
875   llvm::Value *Args[] = {
876       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
877       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
878   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
879 }
880 
881 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
882                                        const std::function<void()> &SingleOpGen,
883                                        SourceLocation Loc) {
884   // if(__kmpc_single(ident_t *, gtid)) {
885   //   SingleOpGen();
886   //   __kmpc_end_single(ident_t *, gtid);
887   // }
888   // Prepare arguments and build a call to __kmpc_single
889   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
890   auto *IsSingle =
891       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
892   emitIfStmt(CGF, IsSingle, [&]() -> void {
893     SingleOpGen();
894     // Build a call to __kmpc_end_single.
895     // OpenMP [1.2.2 OpenMP Language Terminology]
896     // For C/C++, an executable statement, possibly compound, with a single
897     // entry at the top and a single exit at the bottom, or an OpenMP construct.
898     // * Access to the structured block must not be the result of a branch.
899     // * The point of exit cannot be a branch out of the structured block.
900     // * The point of entry must not be a call to setjmp().
901     // * longjmp() and throw() must not violate the entry/exit criteria.
902     // * An expression statement, iteration statement, selection statement, or
903     // try block is considered to be a structured block if the corresponding
904     // compound statement obtained by enclosing it in { and } would be a
905     // structured block.
906     // It is analyzed in Sema, so we can just call __kmpc_end_single() on
907     // fallthrough rather than pushing a normal cleanup for it.
908     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_single), Args);
909   });
910 }
911 
912 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
913                                       bool IsExplicit) {
914   // Build call __kmpc_cancel_barrier(loc, thread_id);
915   auto Flags = static_cast<OpenMPLocationFlags>(
916       OMP_IDENT_KMPC |
917       (IsExplicit ? OMP_IDENT_BARRIER_EXPL : OMP_IDENT_BARRIER_IMPL));
918   // Build call __kmpc_cancel_barrier(loc, thread_id);
919   // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this
920   // one provides the same functionality and adds initial support for
921   // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier()
922   // is provided default by the runtime library so it safe to make such
923   // replacement.
924   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
925                          getThreadID(CGF, Loc)};
926   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
927 }
928 
929 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
930 /// the enum sched_type in kmp.h).
931 enum OpenMPSchedType {
932   /// \brief Lower bound for default (unordered) versions.
933   OMP_sch_lower = 32,
934   OMP_sch_static_chunked = 33,
935   OMP_sch_static = 34,
936   OMP_sch_dynamic_chunked = 35,
937   OMP_sch_guided_chunked = 36,
938   OMP_sch_runtime = 37,
939   OMP_sch_auto = 38,
940   /// \brief Lower bound for 'ordered' versions.
941   OMP_ord_lower = 64,
942   /// \brief Lower bound for 'nomerge' versions.
943   OMP_nm_lower = 160,
944 };
945 
946 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
947 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
948                                           bool Chunked) {
949   switch (ScheduleKind) {
950   case OMPC_SCHEDULE_static:
951     return Chunked ? OMP_sch_static_chunked : OMP_sch_static;
952   case OMPC_SCHEDULE_dynamic:
953     return OMP_sch_dynamic_chunked;
954   case OMPC_SCHEDULE_guided:
955     return OMP_sch_guided_chunked;
956   case OMPC_SCHEDULE_auto:
957     return OMP_sch_auto;
958   case OMPC_SCHEDULE_runtime:
959     return OMP_sch_runtime;
960   case OMPC_SCHEDULE_unknown:
961     assert(!Chunked && "chunk was specified but schedule kind not known");
962     return OMP_sch_static;
963   }
964   llvm_unreachable("Unexpected runtime schedule");
965 }
966 
967 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
968                                          bool Chunked) const {
969   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
970   return Schedule == OMP_sch_static;
971 }
972 
973 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
974   auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false);
975   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
976   return Schedule != OMP_sch_static;
977 }
978 
979 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
980                                   OpenMPScheduleClauseKind ScheduleKind,
981                                   unsigned IVSize, bool IVSigned,
982                                   llvm::Value *IL, llvm::Value *LB,
983                                   llvm::Value *UB, llvm::Value *ST,
984                                   llvm::Value *Chunk) {
985   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
986   // Call __kmpc_for_static_init(
987   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
988   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
989   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
990   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
991   // TODO: Implement dynamic schedule.
992 
993   // If the Chunk was not specified in the clause - use default value 1.
994   if (Chunk == nullptr)
995     Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1);
996 
997   llvm::Value *Args[] = {
998       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
999       CGF.Builder.getInt32(Schedule), // Schedule type
1000       IL,                             // &isLastIter
1001       LB,                             // &LB
1002       UB,                             // &UB
1003       ST,                             // &Stride
1004       CGF.Builder.getIntN(IVSize, 1), // Incr
1005       Chunk                           // Chunk
1006   };
1007   assert((IVSize == 32 || IVSize == 64) &&
1008          "Index size is not compatible with the omp runtime");
1009   auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4
1010                                     : OMPRTL__kmpc_for_static_init_4u)
1011                         : (IVSigned ? OMPRTL__kmpc_for_static_init_8
1012                                     : OMPRTL__kmpc_for_static_init_8u);
1013   CGF.EmitRuntimeCall(createRuntimeFunction(F), Args);
1014 }
1015 
1016 void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
1017                                     OpenMPScheduleClauseKind ScheduleKind) {
1018   assert((ScheduleKind == OMPC_SCHEDULE_static ||
1019           ScheduleKind == OMPC_SCHEDULE_unknown) &&
1020          "Non-static schedule kinds are not yet implemented");
1021   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1022   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1023                          getThreadID(CGF, Loc)};
1024   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1025                       Args);
1026 }
1027 
1028 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1029                                            llvm::Value *NumThreads,
1030                                            SourceLocation Loc) {
1031   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1032   llvm::Value *Args[] = {
1033       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1034       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1035   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1036                       Args);
1037 }
1038 
1039 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1040                                 SourceLocation Loc) {
1041   // Build call void __kmpc_flush(ident_t *loc)
1042   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1043                       emitUpdateLocation(CGF, Loc));
1044 }
1045 
1046 InlinedOpenMPRegionRAII::InlinedOpenMPRegionRAII(
1047     CodeGenFunction &CGF, const OMPExecutableDirective &D)
1048     : CGF(CGF) {
1049   CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(D, CGF.CapturedStmtInfo);
1050 }
1051 
1052 InlinedOpenMPRegionRAII::~InlinedOpenMPRegionRAII() {
1053   auto *OldCSI =
1054       cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
1055   delete CGF.CapturedStmtInfo;
1056   CGF.CapturedStmtInfo = OldCSI;
1057 }
1058 
1059