1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/StmtOpenMP.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/IR/CallSite.h"
20 #include "llvm/IR/DerivedTypes.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/IR/Value.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include <cassert>
25 
26 using namespace clang;
27 using namespace CodeGen;
28 
29 namespace {
30 /// \brief API for captured statement code generation in OpenMP constructs.
31 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
32 public:
33   CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS,
34                      const VarDecl *ThreadIDVar)
35       : CGCapturedStmtInfo(CS, CR_OpenMP), ThreadIDVar(ThreadIDVar),
36         Directive(D) {
37     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
38   }
39 
40   /// \brief Gets a variable or parameter for storing global thread id
41   /// inside OpenMP construct.
42   const VarDecl *getThreadIDVariable() const { return ThreadIDVar; }
43 
44   /// \brief Gets an LValue for the current ThreadID variable.
45   LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
46 
47   static bool classof(const CGCapturedStmtInfo *Info) {
48     return Info->getKind() == CR_OpenMP;
49   }
50 
51   /// \brief Emit the captured statement body.
52   void EmitBody(CodeGenFunction &CGF, Stmt *S) override;
53 
54   /// \brief Get the name of the capture helper.
55   StringRef getHelperName() const override { return ".omp_outlined."; }
56 
57 private:
58   /// \brief A variable or parameter storing global thread id for OpenMP
59   /// constructs.
60   const VarDecl *ThreadIDVar;
61   /// \brief OpenMP executable directive associated with the region.
62   const OMPExecutableDirective &Directive;
63 };
64 } // namespace
65 
66 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
67   return CGF.MakeNaturalAlignAddrLValue(
68       CGF.GetAddrOfLocalVar(ThreadIDVar),
69       CGF.getContext().getPointerType(ThreadIDVar->getType()));
70 }
71 
72 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, Stmt *S) {
73   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
74   CGF.EmitOMPPrivateClause(Directive, PrivateScope);
75   CGF.EmitOMPFirstprivateClause(Directive, PrivateScope);
76   if (PrivateScope.Privatize())
77     // Emit implicit barrier to synchronize threads and avoid data races.
78     CGF.CGM.getOpenMPRuntime().EmitOMPBarrierCall(CGF, Directive.getLocStart(),
79                                                   /*IsExplicit=*/false);
80   CGCapturedStmtInfo::EmitBody(CGF, S);
81 }
82 
83 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
84     : CGM(CGM), DefaultOpenMPPSource(nullptr) {
85   IdentTy = llvm::StructType::create(
86       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
87       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
88       CGM.Int8PtrTy /* psource */, nullptr);
89   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
90   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
91                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
92   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
93   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
94 }
95 
96 llvm::Value *
97 CGOpenMPRuntime::EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D,
98                                             const VarDecl *ThreadIDVar) {
99   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
100   CodeGenFunction CGF(CGM, true);
101   CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar);
102   CGF.CapturedStmtInfo = &CGInfo;
103   return CGF.GenerateCapturedStmtFunction(*CS);
104 }
105 
106 llvm::Value *
107 CGOpenMPRuntime::GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags) {
108   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
109   if (!Entry) {
110     if (!DefaultOpenMPPSource) {
111       // Initialize default location for psource field of ident_t structure of
112       // all ident_t objects. Format is ";file;function;line;column;;".
113       // Taken from
114       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
115       DefaultOpenMPPSource =
116           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
117       DefaultOpenMPPSource =
118           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
119     }
120     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
121         CGM.getModule(), IdentTy, /*isConstant*/ true,
122         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
123     DefaultOpenMPLocation->setUnnamedAddr(true);
124 
125     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
126     llvm::Constant *Values[] = {Zero,
127                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
128                                 Zero, Zero, DefaultOpenMPPSource};
129     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
130     DefaultOpenMPLocation->setInitializer(Init);
131     OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
132     return DefaultOpenMPLocation;
133   }
134   return Entry;
135 }
136 
137 llvm::Value *CGOpenMPRuntime::EmitOpenMPUpdateLocation(
138     CodeGenFunction &CGF, SourceLocation Loc, OpenMPLocationFlags Flags) {
139   // If no debug info is generated - return global default location.
140   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
141       Loc.isInvalid())
142     return GetOrCreateDefaultOpenMPLocation(Flags);
143 
144   assert(CGF.CurFn && "No function in current CodeGenFunction.");
145 
146   llvm::Value *LocValue = nullptr;
147   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
148   if (I != OpenMPLocThreadIDMap.end())
149     LocValue = I->second.DebugLoc;
150   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
151   // GetOpenMPThreadID was called before this routine.
152   if (LocValue == nullptr) {
153     // Generate "ident_t .kmpc_loc.addr;"
154     llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
155     AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
156     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
157     Elem.second.DebugLoc = AI;
158     LocValue = AI;
159 
160     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
161     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
162     CGF.Builder.CreateMemCpy(LocValue, GetOrCreateDefaultOpenMPLocation(Flags),
163                              llvm::ConstantExpr::getSizeOf(IdentTy),
164                              CGM.PointerAlignInBytes);
165   }
166 
167   // char **psource = &.kmpc_loc_<flags>.addr.psource;
168   auto *PSource =
169       CGF.Builder.CreateConstInBoundsGEP2_32(LocValue, 0, IdentField_PSource);
170 
171   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
172   if (OMPDebugLoc == nullptr) {
173     SmallString<128> Buffer2;
174     llvm::raw_svector_ostream OS2(Buffer2);
175     // Build debug location
176     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
177     OS2 << ";" << PLoc.getFilename() << ";";
178     if (const FunctionDecl *FD =
179             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
180       OS2 << FD->getQualifiedNameAsString();
181     }
182     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
183     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
184     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
185   }
186   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
187   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
188 
189   return LocValue;
190 }
191 
192 llvm::Value *CGOpenMPRuntime::GetOpenMPThreadID(CodeGenFunction &CGF,
193                                                 SourceLocation Loc) {
194   assert(CGF.CurFn && "No function in current CodeGenFunction.");
195 
196   llvm::Value *ThreadID = nullptr;
197   // Check whether we've already cached a load of the thread id in this
198   // function.
199   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
200   if (I != OpenMPLocThreadIDMap.end()) {
201     ThreadID = I->second.ThreadID;
202     if (ThreadID != nullptr)
203       return ThreadID;
204   }
205   if (auto OMPRegionInfo =
206           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
207     // Check if this an outlined function with thread id passed as argument.
208     auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable();
209     auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
210     auto RVal = CGF.EmitLoadOfLValue(LVal, Loc);
211     LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
212                                           ThreadIDVar->getType());
213     ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
214     // If value loaded in entry block, cache it and use it everywhere in
215     // function.
216     if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
217       auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
218       Elem.second.ThreadID = ThreadID;
219     }
220   } else {
221     // This is not an outlined function region - need to call __kmpc_int32
222     // kmpc_global_thread_num(ident_t *loc).
223     // Generate thread id value and cache this value for use across the
224     // function.
225     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
226     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
227     llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc)};
228     ThreadID = CGF.EmitRuntimeCall(
229         CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args);
230     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
231     Elem.second.ThreadID = ThreadID;
232   }
233   return ThreadID;
234 }
235 
236 void CGOpenMPRuntime::FunctionFinished(CodeGenFunction &CGF) {
237   assert(CGF.CurFn && "No function in current CodeGenFunction.");
238   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
239     OpenMPLocThreadIDMap.erase(CGF.CurFn);
240 }
241 
242 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
243   return llvm::PointerType::getUnqual(IdentTy);
244 }
245 
246 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
247   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
248 }
249 
250 llvm::Constant *
251 CGOpenMPRuntime::CreateRuntimeFunction(OpenMPRTLFunction Function) {
252   llvm::Constant *RTLFn = nullptr;
253   switch (Function) {
254   case OMPRTL__kmpc_fork_call: {
255     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
256     // microtask, ...);
257     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
258                                 getKmpc_MicroPointerTy()};
259     llvm::FunctionType *FnTy =
260         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
261     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
262     break;
263   }
264   case OMPRTL__kmpc_global_thread_num: {
265     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
266     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
267     llvm::FunctionType *FnTy =
268         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
269     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
270     break;
271   }
272   case OMPRTL__kmpc_threadprivate_cached: {
273     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
274     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
275     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
276                                 CGM.VoidPtrTy, CGM.SizeTy,
277                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
278     llvm::FunctionType *FnTy =
279         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
280     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
281     break;
282   }
283   case OMPRTL__kmpc_critical: {
284     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
285     // kmp_critical_name *crit);
286     llvm::Type *TypeParams[] = {
287         getIdentTyPointerTy(), CGM.Int32Ty,
288         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
289     llvm::FunctionType *FnTy =
290         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
291     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
292     break;
293   }
294   case OMPRTL__kmpc_threadprivate_register: {
295     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
296     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
297     // typedef void *(*kmpc_ctor)(void *);
298     auto KmpcCtorTy =
299         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
300                                 /*isVarArg*/ false)->getPointerTo();
301     // typedef void *(*kmpc_cctor)(void *, void *);
302     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
303     auto KmpcCopyCtorTy =
304         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
305                                 /*isVarArg*/ false)->getPointerTo();
306     // typedef void (*kmpc_dtor)(void *);
307     auto KmpcDtorTy =
308         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
309             ->getPointerTo();
310     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
311                               KmpcCopyCtorTy, KmpcDtorTy};
312     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
313                                         /*isVarArg*/ false);
314     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
315     break;
316   }
317   case OMPRTL__kmpc_end_critical: {
318     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
319     // kmp_critical_name *crit);
320     llvm::Type *TypeParams[] = {
321         getIdentTyPointerTy(), CGM.Int32Ty,
322         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
323     llvm::FunctionType *FnTy =
324         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
325     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
326     break;
327   }
328   case OMPRTL__kmpc_cancel_barrier: {
329     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
330     // global_tid);
331     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
332     llvm::FunctionType *FnTy =
333         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
334     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
335     break;
336   }
337   // Build __kmpc_for_static_init*(
338   //               ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
339   //               kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
340   //               kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
341   //               kmp_int[32|64] incr, kmp_int[32|64] chunk);
342   case OMPRTL__kmpc_for_static_init_4: {
343     auto ITy = CGM.Int32Ty;
344     auto PtrTy = llvm::PointerType::getUnqual(ITy);
345     llvm::Type *TypeParams[] = {
346         getIdentTyPointerTy(),                     // loc
347         CGM.Int32Ty,                               // tid
348         CGM.Int32Ty,                               // schedtype
349         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
350         PtrTy,                                     // p_lower
351         PtrTy,                                     // p_upper
352         PtrTy,                                     // p_stride
353         ITy,                                       // incr
354         ITy                                        // chunk
355     };
356     llvm::FunctionType *FnTy =
357         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
358     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4");
359     break;
360   }
361   case OMPRTL__kmpc_for_static_init_4u: {
362     auto ITy = CGM.Int32Ty;
363     auto PtrTy = llvm::PointerType::getUnqual(ITy);
364     llvm::Type *TypeParams[] = {
365         getIdentTyPointerTy(),                     // loc
366         CGM.Int32Ty,                               // tid
367         CGM.Int32Ty,                               // schedtype
368         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
369         PtrTy,                                     // p_lower
370         PtrTy,                                     // p_upper
371         PtrTy,                                     // p_stride
372         ITy,                                       // incr
373         ITy                                        // chunk
374     };
375     llvm::FunctionType *FnTy =
376         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
377     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4u");
378     break;
379   }
380   case OMPRTL__kmpc_for_static_init_8: {
381     auto ITy = CGM.Int64Ty;
382     auto PtrTy = llvm::PointerType::getUnqual(ITy);
383     llvm::Type *TypeParams[] = {
384         getIdentTyPointerTy(),                     // loc
385         CGM.Int32Ty,                               // tid
386         CGM.Int32Ty,                               // schedtype
387         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
388         PtrTy,                                     // p_lower
389         PtrTy,                                     // p_upper
390         PtrTy,                                     // p_stride
391         ITy,                                       // incr
392         ITy                                        // chunk
393     };
394     llvm::FunctionType *FnTy =
395         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
396     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8");
397     break;
398   }
399   case OMPRTL__kmpc_for_static_init_8u: {
400     auto ITy = CGM.Int64Ty;
401     auto PtrTy = llvm::PointerType::getUnqual(ITy);
402     llvm::Type *TypeParams[] = {
403         getIdentTyPointerTy(),                     // loc
404         CGM.Int32Ty,                               // tid
405         CGM.Int32Ty,                               // schedtype
406         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
407         PtrTy,                                     // p_lower
408         PtrTy,                                     // p_upper
409         PtrTy,                                     // p_stride
410         ITy,                                       // incr
411         ITy                                        // chunk
412     };
413     llvm::FunctionType *FnTy =
414         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
415     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8u");
416     break;
417   }
418   case OMPRTL__kmpc_for_static_fini: {
419     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
420     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
421     llvm::FunctionType *FnTy =
422         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
423     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
424     break;
425   }
426   case OMPRTL__kmpc_push_num_threads: {
427     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
428     // kmp_int32 num_threads)
429     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
430                                 CGM.Int32Ty};
431     llvm::FunctionType *FnTy =
432         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
433     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
434     break;
435   }
436   case OMPRTL__kmpc_serialized_parallel: {
437     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
438     // global_tid);
439     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
440     llvm::FunctionType *FnTy =
441         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
442     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
443     break;
444   }
445   case OMPRTL__kmpc_end_serialized_parallel: {
446     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
447     // global_tid);
448     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
449     llvm::FunctionType *FnTy =
450         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
451     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
452     break;
453   }
454   case OMPRTL__kmpc_flush: {
455     // Build void __kmpc_flush(ident_t *loc);
456     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
457     llvm::FunctionType *FnTy =
458         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
459     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
460     break;
461   }
462   case OMPRTL__kmpc_master: {
463     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
464     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
465     llvm::FunctionType *FnTy =
466         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
467     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
468     break;
469   }
470   case OMPRTL__kmpc_end_master: {
471     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
472     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
473     llvm::FunctionType *FnTy =
474         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
475     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
476     break;
477   }
478   case OMPRTL__kmpc_omp_taskyield: {
479     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
480     // int end_part);
481     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
482     llvm::FunctionType *FnTy =
483         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
484     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
485     break;
486   }
487   case OMPRTL__kmpc_single: {
488     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
489     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
490     llvm::FunctionType *FnTy =
491         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
492     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
493     break;
494   }
495   case OMPRTL__kmpc_end_single: {
496     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
497     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
498     llvm::FunctionType *FnTy =
499         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
500     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
501     break;
502   }
503   }
504   return RTLFn;
505 }
506 
507 llvm::Constant *
508 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
509   // Lookup the entry, lazily creating it if necessary.
510   return GetOrCreateInternalVariable(CGM.Int8PtrPtrTy,
511                                      Twine(CGM.getMangledName(VD)) + ".cache.");
512 }
513 
514 llvm::Value *CGOpenMPRuntime::getOMPAddrOfThreadPrivate(CodeGenFunction &CGF,
515                                                         const VarDecl *VD,
516                                                         llvm::Value *VDAddr,
517                                                         SourceLocation Loc) {
518   auto VarTy = VDAddr->getType()->getPointerElementType();
519   llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc),
520                          GetOpenMPThreadID(CGF, Loc),
521                          CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
522                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
523                          getOrCreateThreadPrivateCache(VD)};
524   return CGF.EmitRuntimeCall(
525       CreateRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
526 }
527 
528 void CGOpenMPRuntime::EmitOMPThreadPrivateVarInit(
529     CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
530     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
531   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
532   // library.
533   auto OMPLoc = EmitOpenMPUpdateLocation(CGF, Loc);
534   CGF.EmitRuntimeCall(CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num),
535                       OMPLoc);
536   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
537   // to register constructor/destructor for variable.
538   llvm::Value *Args[] = {OMPLoc,
539                          CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
540                          Ctor, CopyCtor, Dtor};
541   CGF.EmitRuntimeCall(
542       CreateRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
543 }
544 
545 llvm::Function *CGOpenMPRuntime::EmitOMPThreadPrivateVarDefinition(
546     const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
547     bool PerformInit, CodeGenFunction *CGF) {
548   VD = VD->getDefinition(CGM.getContext());
549   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
550     ThreadPrivateWithDefinition.insert(VD);
551     QualType ASTTy = VD->getType();
552 
553     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
554     auto Init = VD->getAnyInitializer();
555     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
556       // Generate function that re-emits the declaration's initializer into the
557       // threadprivate copy of the variable VD
558       CodeGenFunction CtorCGF(CGM);
559       FunctionArgList Args;
560       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
561                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
562       Args.push_back(&Dst);
563 
564       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
565           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
566           /*isVariadic=*/false);
567       auto FTy = CGM.getTypes().GetFunctionType(FI);
568       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
569           FTy, ".__kmpc_global_ctor_.", Loc);
570       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
571                             Args, SourceLocation());
572       auto ArgVal = CtorCGF.EmitLoadOfScalar(
573           CtorCGF.GetAddrOfLocalVar(&Dst),
574           /*Volatile=*/false, CGM.PointerAlignInBytes,
575           CGM.getContext().VoidPtrTy, Dst.getLocation());
576       auto Arg = CtorCGF.Builder.CreatePointerCast(
577           ArgVal,
578           CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
579       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
580                                /*IsInitializer=*/true);
581       ArgVal = CtorCGF.EmitLoadOfScalar(
582           CtorCGF.GetAddrOfLocalVar(&Dst),
583           /*Volatile=*/false, CGM.PointerAlignInBytes,
584           CGM.getContext().VoidPtrTy, Dst.getLocation());
585       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
586       CtorCGF.FinishFunction();
587       Ctor = Fn;
588     }
589     if (VD->getType().isDestructedType() != QualType::DK_none) {
590       // Generate function that emits destructor call for the threadprivate copy
591       // of the variable VD
592       CodeGenFunction DtorCGF(CGM);
593       FunctionArgList Args;
594       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
595                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
596       Args.push_back(&Dst);
597 
598       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
599           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
600           /*isVariadic=*/false);
601       auto FTy = CGM.getTypes().GetFunctionType(FI);
602       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
603           FTy, ".__kmpc_global_dtor_.", Loc);
604       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
605                             SourceLocation());
606       auto ArgVal = DtorCGF.EmitLoadOfScalar(
607           DtorCGF.GetAddrOfLocalVar(&Dst),
608           /*Volatile=*/false, CGM.PointerAlignInBytes,
609           CGM.getContext().VoidPtrTy, Dst.getLocation());
610       DtorCGF.emitDestroy(ArgVal, ASTTy,
611                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
612                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
613       DtorCGF.FinishFunction();
614       Dtor = Fn;
615     }
616     // Do not emit init function if it is not required.
617     if (!Ctor && !Dtor)
618       return nullptr;
619 
620     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
621     auto CopyCtorTy =
622         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
623                                 /*isVarArg=*/false)->getPointerTo();
624     // Copying constructor for the threadprivate variable.
625     // Must be NULL - reserved by runtime, but currently it requires that this
626     // parameter is always NULL. Otherwise it fires assertion.
627     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
628     if (Ctor == nullptr) {
629       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
630                                             /*isVarArg=*/false)->getPointerTo();
631       Ctor = llvm::Constant::getNullValue(CtorTy);
632     }
633     if (Dtor == nullptr) {
634       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
635                                             /*isVarArg=*/false)->getPointerTo();
636       Dtor = llvm::Constant::getNullValue(DtorTy);
637     }
638     if (!CGF) {
639       auto InitFunctionTy =
640           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
641       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
642           InitFunctionTy, ".__omp_threadprivate_init_.");
643       CodeGenFunction InitCGF(CGM);
644       FunctionArgList ArgList;
645       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
646                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
647                             Loc);
648       EmitOMPThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
649       InitCGF.FinishFunction();
650       return InitFunction;
651     }
652     EmitOMPThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
653   }
654   return nullptr;
655 }
656 
657 void CGOpenMPRuntime::EmitOMPParallelCall(CodeGenFunction &CGF,
658                                           SourceLocation Loc,
659                                           llvm::Value *OutlinedFn,
660                                           llvm::Value *CapturedStruct) {
661   // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/)
662   llvm::Value *Args[] = {
663       EmitOpenMPUpdateLocation(CGF, Loc),
664       CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument
665       // (there is only one additional argument - 'context')
666       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
667       CGF.EmitCastToVoidPtr(CapturedStruct)};
668   auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_fork_call);
669   CGF.EmitRuntimeCall(RTLFn, Args);
670 }
671 
672 void CGOpenMPRuntime::EmitOMPSerialCall(CodeGenFunction &CGF,
673                                         SourceLocation Loc,
674                                         llvm::Value *OutlinedFn,
675                                         llvm::Value *CapturedStruct) {
676   auto ThreadID = GetOpenMPThreadID(CGF, Loc);
677   // Build calls:
678   // __kmpc_serialized_parallel(&Loc, GTid);
679   llvm::Value *SerArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc), ThreadID};
680   auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_serialized_parallel);
681   CGF.EmitRuntimeCall(RTLFn, SerArgs);
682 
683   // OutlinedFn(&GTid, &zero, CapturedStruct);
684   auto ThreadIDAddr = EmitThreadIDAddress(CGF, Loc);
685   auto Int32Ty =
686       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
687   auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
688   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
689   llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
690   CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
691 
692   // __kmpc_end_serialized_parallel(&Loc, GTid);
693   llvm::Value *EndSerArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc), ThreadID};
694   RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel);
695   CGF.EmitRuntimeCall(RTLFn, EndSerArgs);
696 }
697 
698 // If we're inside an (outlined) parallel region, use the region info's
699 // thread-ID variable (it is passed in a first argument of the outlined function
700 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
701 // regular serial code region, get thread ID by calling kmp_int32
702 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
703 // return the address of that temp.
704 llvm::Value *CGOpenMPRuntime::EmitThreadIDAddress(CodeGenFunction &CGF,
705                                                   SourceLocation Loc) {
706   if (auto OMPRegionInfo =
707           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
708     return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF),
709                                 SourceLocation()).getScalarVal();
710   auto ThreadID = GetOpenMPThreadID(CGF, Loc);
711   auto Int32Ty =
712       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
713   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
714   CGF.EmitStoreOfScalar(ThreadID,
715                         CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
716 
717   return ThreadIDTemp;
718 }
719 
720 llvm::Constant *
721 CGOpenMPRuntime::GetOrCreateInternalVariable(llvm::Type *Ty,
722                                              const llvm::Twine &Name) {
723   SmallString<256> Buffer;
724   llvm::raw_svector_ostream Out(Buffer);
725   Out << Name;
726   auto RuntimeName = Out.str();
727   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
728   if (Elem.second) {
729     assert(Elem.second->getType()->getPointerElementType() == Ty &&
730            "OMP internal variable has different type than requested");
731     return &*Elem.second;
732   }
733 
734   return Elem.second = new llvm::GlobalVariable(
735              CGM.getModule(), Ty, /*IsConstant*/ false,
736              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
737              Elem.first());
738 }
739 
740 llvm::Value *CGOpenMPRuntime::GetCriticalRegionLock(StringRef CriticalName) {
741   llvm::Twine Name(".gomp_critical_user_", CriticalName);
742   return GetOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
743 }
744 
745 void CGOpenMPRuntime::EmitOMPCriticalRegion(
746     CodeGenFunction &CGF, StringRef CriticalName,
747     const std::function<void()> &CriticalOpGen, SourceLocation Loc) {
748   auto RegionLock = GetCriticalRegionLock(CriticalName);
749   // __kmpc_critical(ident_t *, gtid, Lock);
750   // CriticalOpGen();
751   // __kmpc_end_critical(ident_t *, gtid, Lock);
752   // Prepare arguments and build a call to __kmpc_critical
753   llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc),
754                          GetOpenMPThreadID(CGF, Loc), RegionLock};
755   auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_critical);
756   CGF.EmitRuntimeCall(RTLFn, Args);
757   CriticalOpGen();
758   // Build a call to __kmpc_end_critical
759   RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_end_critical);
760   CGF.EmitRuntimeCall(RTLFn, Args);
761 }
762 
763 static void EmitOMPIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
764                           const std::function<void()> &BodyOpGen) {
765   llvm::Value *CallBool = CGF.EmitScalarConversion(
766       IfCond,
767       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
768       CGF.getContext().BoolTy);
769 
770   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
771   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
772   // Generate the branch (If-stmt)
773   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
774   CGF.EmitBlock(ThenBlock);
775   BodyOpGen();
776   // Emit the rest of bblocks/branches
777   CGF.EmitBranch(ContBlock);
778   CGF.EmitBlock(ContBlock, true);
779 }
780 
781 void CGOpenMPRuntime::EmitOMPMasterRegion(
782     CodeGenFunction &CGF, const std::function<void()> &MasterOpGen,
783     SourceLocation Loc) {
784   // if(__kmpc_master(ident_t *, gtid)) {
785   //   MasterOpGen();
786   //   __kmpc_end_master(ident_t *, gtid);
787   // }
788   // Prepare arguments and build a call to __kmpc_master
789   llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc),
790                          GetOpenMPThreadID(CGF, Loc)};
791   auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_master);
792   auto *IsMaster = CGF.EmitRuntimeCall(RTLFn, Args);
793   EmitOMPIfStmt(CGF, IsMaster, [&]() -> void {
794     MasterOpGen();
795     // Build a call to __kmpc_end_master.
796     // OpenMP [1.2.2 OpenMP Language Terminology]
797     // For C/C++, an executable statement, possibly compound, with a single
798     // entry at the top and a single exit at the bottom, or an OpenMP construct.
799     // * Access to the structured block must not be the result of a branch.
800     // * The point of exit cannot be a branch out of the structured block.
801     // * The point of entry must not be a call to setjmp().
802     // * longjmp() and throw() must not violate the entry/exit criteria.
803     // * An expression statement, iteration statement, selection statement, or
804     // try block is considered to be a structured block if the corresponding
805     // compound statement obtained by enclosing it in { and } would be a
806     // structured block.
807     // It is analyzed in Sema, so we can just call __kmpc_end_master() on
808     // fallthrough rather than pushing a normal cleanup for it.
809     RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_end_master);
810     CGF.EmitRuntimeCall(RTLFn, Args);
811   });
812 }
813 
814 void CGOpenMPRuntime::EmitOMPTaskyieldCall(CodeGenFunction &CGF,
815                                            SourceLocation Loc) {
816   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
817   llvm::Value *Args[] = {
818       EmitOpenMPUpdateLocation(CGF, Loc), GetOpenMPThreadID(CGF, Loc),
819       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
820   auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_omp_taskyield);
821   CGF.EmitRuntimeCall(RTLFn, Args);
822 }
823 
824 void CGOpenMPRuntime::EmitOMPSingleRegion(
825     CodeGenFunction &CGF, const std::function<void()> &SingleOpGen,
826     SourceLocation Loc) {
827   // if(__kmpc_single(ident_t *, gtid)) {
828   //   SingleOpGen();
829   //   __kmpc_end_single(ident_t *, gtid);
830   // }
831   // Prepare arguments and build a call to __kmpc_single
832   llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc),
833                          GetOpenMPThreadID(CGF, Loc)};
834   auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_single);
835   auto *IsSingle = CGF.EmitRuntimeCall(RTLFn, Args);
836   EmitOMPIfStmt(CGF, IsSingle, [&]() -> void {
837     SingleOpGen();
838     // Build a call to __kmpc_end_single.
839     // OpenMP [1.2.2 OpenMP Language Terminology]
840     // For C/C++, an executable statement, possibly compound, with a single
841     // entry at the top and a single exit at the bottom, or an OpenMP construct.
842     // * Access to the structured block must not be the result of a branch.
843     // * The point of exit cannot be a branch out of the structured block.
844     // * The point of entry must not be a call to setjmp().
845     // * longjmp() and throw() must not violate the entry/exit criteria.
846     // * An expression statement, iteration statement, selection statement, or
847     // try block is considered to be a structured block if the corresponding
848     // compound statement obtained by enclosing it in { and } would be a
849     // structured block.
850     // It is analyzed in Sema, so we can just call __kmpc_end_single() on
851     // fallthrough rather than pushing a normal cleanup for it.
852     RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_end_single);
853     CGF.EmitRuntimeCall(RTLFn, Args);
854   });
855 }
856 
857 void CGOpenMPRuntime::EmitOMPBarrierCall(CodeGenFunction &CGF,
858                                          SourceLocation Loc, bool IsExplicit) {
859   // Build call __kmpc_cancel_barrier(loc, thread_id);
860   auto Flags = static_cast<OpenMPLocationFlags>(
861       OMP_IDENT_KMPC |
862       (IsExplicit ? OMP_IDENT_BARRIER_EXPL : OMP_IDENT_BARRIER_IMPL));
863   // Build call __kmpc_cancel_barrier(loc, thread_id);
864   // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this
865   // one provides the same functionality and adds initial support for
866   // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier()
867   // is provided default by the runtime library so it safe to make such
868   // replacement.
869   llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc, Flags),
870                          GetOpenMPThreadID(CGF, Loc)};
871   auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_cancel_barrier);
872   CGF.EmitRuntimeCall(RTLFn, Args);
873 }
874 
875 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
876 /// the enum sched_type in kmp.h).
877 enum OpenMPSchedType {
878   /// \brief Lower bound for default (unordered) versions.
879   OMP_sch_lower = 32,
880   OMP_sch_static_chunked = 33,
881   OMP_sch_static = 34,
882   OMP_sch_dynamic_chunked = 35,
883   OMP_sch_guided_chunked = 36,
884   OMP_sch_runtime = 37,
885   OMP_sch_auto = 38,
886   /// \brief Lower bound for 'ordered' versions.
887   OMP_ord_lower = 64,
888   /// \brief Lower bound for 'nomerge' versions.
889   OMP_nm_lower = 160,
890 };
891 
892 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
893 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
894                                           bool Chunked) {
895   switch (ScheduleKind) {
896   case OMPC_SCHEDULE_static:
897     return Chunked ? OMP_sch_static_chunked : OMP_sch_static;
898   case OMPC_SCHEDULE_dynamic:
899     return OMP_sch_dynamic_chunked;
900   case OMPC_SCHEDULE_guided:
901     return OMP_sch_guided_chunked;
902   case OMPC_SCHEDULE_auto:
903     return OMP_sch_auto;
904   case OMPC_SCHEDULE_runtime:
905     return OMP_sch_runtime;
906   case OMPC_SCHEDULE_unknown:
907     assert(!Chunked && "chunk was specified but schedule kind not known");
908     return OMP_sch_static;
909   }
910   llvm_unreachable("Unexpected runtime schedule");
911 }
912 
913 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
914                                          bool Chunked) const {
915   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
916   return Schedule == OMP_sch_static;
917 }
918 
919 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
920   auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false);
921   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
922   return Schedule != OMP_sch_static;
923 }
924 
925 void CGOpenMPRuntime::EmitOMPForInit(CodeGenFunction &CGF, SourceLocation Loc,
926                                      OpenMPScheduleClauseKind ScheduleKind,
927                                      unsigned IVSize, bool IVSigned,
928                                      llvm::Value *IL, llvm::Value *LB,
929                                      llvm::Value *UB, llvm::Value *ST,
930                                      llvm::Value *Chunk) {
931   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
932   // Call __kmpc_for_static_init(
933   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
934   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
935   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
936   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
937   // TODO: Implement dynamic schedule.
938 
939   // If the Chunk was not specified in the clause - use default value 1.
940   if (Chunk == nullptr)
941     Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1);
942 
943   llvm::Value *Args[] = {
944       EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
945       GetOpenMPThreadID(CGF, Loc),
946       CGF.Builder.getInt32(Schedule), // Schedule type
947       IL,                             // &isLastIter
948       LB,                             // &LB
949       UB,                             // &UB
950       ST,                             // &Stride
951       CGF.Builder.getIntN(IVSize, 1), // Incr
952       Chunk                           // Chunk
953   };
954   assert((IVSize == 32 || IVSize == 64) &&
955          "Index size is not compatible with the omp runtime");
956   auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4
957                                     : OMPRTL__kmpc_for_static_init_4u)
958                         : (IVSigned ? OMPRTL__kmpc_for_static_init_8
959                                     : OMPRTL__kmpc_for_static_init_8u);
960   auto RTLFn = CreateRuntimeFunction(F);
961   CGF.EmitRuntimeCall(RTLFn, Args);
962 }
963 
964 void CGOpenMPRuntime::EmitOMPForFinish(CodeGenFunction &CGF, SourceLocation Loc,
965                                        OpenMPScheduleClauseKind ScheduleKind) {
966   assert((ScheduleKind == OMPC_SCHEDULE_static ||
967           ScheduleKind == OMPC_SCHEDULE_unknown) &&
968          "Non-static schedule kinds are not yet implemented");
969   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
970   llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
971                          GetOpenMPThreadID(CGF, Loc)};
972   auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_for_static_fini);
973   CGF.EmitRuntimeCall(RTLFn, Args);
974 }
975 
976 void CGOpenMPRuntime::EmitOMPNumThreadsClause(CodeGenFunction &CGF,
977                                               llvm::Value *NumThreads,
978                                               SourceLocation Loc) {
979   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
980   llvm::Value *Args[] = {
981       EmitOpenMPUpdateLocation(CGF, Loc), GetOpenMPThreadID(CGF, Loc),
982       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
983   llvm::Constant *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_push_num_threads);
984   CGF.EmitRuntimeCall(RTLFn, Args);
985 }
986 
987 void CGOpenMPRuntime::EmitOMPFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
988                                    SourceLocation Loc) {
989   // Build call void __kmpc_flush(ident_t *loc)
990   auto *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_flush);
991   CGF.EmitRuntimeCall(RTLFn, EmitOpenMPUpdateLocation(CGF, Loc));
992 }
993