1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/StmtOpenMP.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/IR/CallSite.h"
20 #include "llvm/IR/DerivedTypes.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/IR/Value.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include <cassert>
25 
26 using namespace clang;
27 using namespace CodeGen;
28 
29 namespace {
30 /// \brief API for captured statement code generation in OpenMP constructs.
31 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
32 public:
33   CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS,
34                      const VarDecl *ThreadIDVar)
35       : CGCapturedStmtInfo(CS, CR_OpenMP), ThreadIDVar(ThreadIDVar),
36         Directive(D) {
37     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
38   }
39 
40   /// \brief Gets a variable or parameter for storing global thread id
41   /// inside OpenMP construct.
42   const VarDecl *getThreadIDVariable() const { return ThreadIDVar; }
43 
44   /// \brief Gets an LValue for the current ThreadID variable.
45   LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
46 
47   static bool classof(const CGCapturedStmtInfo *Info) {
48     return Info->getKind() == CR_OpenMP;
49   }
50 
51   /// \brief Emit the captured statement body.
52   void EmitBody(CodeGenFunction &CGF, Stmt *S) override;
53 
54   /// \brief Get the name of the capture helper.
55   StringRef getHelperName() const override { return ".omp_outlined."; }
56 
57 private:
58   /// \brief A variable or parameter storing global thread id for OpenMP
59   /// constructs.
60   const VarDecl *ThreadIDVar;
61   /// \brief OpenMP executable directive associated with the region.
62   const OMPExecutableDirective &Directive;
63 };
64 } // namespace
65 
66 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
67   return CGF.MakeNaturalAlignAddrLValue(
68       CGF.GetAddrOfLocalVar(ThreadIDVar),
69       CGF.getContext().getPointerType(ThreadIDVar->getType()));
70 }
71 
72 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, Stmt *S) {
73   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
74   CGF.EmitOMPPrivateClause(Directive, PrivateScope);
75   CGF.EmitOMPFirstprivateClause(Directive, PrivateScope);
76   if (PrivateScope.Privatize())
77     // Emit implicit barrier to synchronize threads and avoid data races.
78     CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Directive.getLocStart(),
79                                                /*IsExplicit=*/false);
80   CGCapturedStmtInfo::EmitBody(CGF, S);
81 }
82 
83 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
84     : CGM(CGM), DefaultOpenMPPSource(nullptr) {
85   IdentTy = llvm::StructType::create(
86       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
87       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
88       CGM.Int8PtrTy /* psource */, nullptr);
89   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
90   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
91                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
92   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
93   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
94 }
95 
96 llvm::Value *
97 CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D,
98                                       const VarDecl *ThreadIDVar) {
99   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
100   CodeGenFunction CGF(CGM, true);
101   CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar);
102   CGF.CapturedStmtInfo = &CGInfo;
103   return CGF.GenerateCapturedStmtFunction(*CS);
104 }
105 
106 llvm::Value *
107 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
108   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
109   if (!Entry) {
110     if (!DefaultOpenMPPSource) {
111       // Initialize default location for psource field of ident_t structure of
112       // all ident_t objects. Format is ";file;function;line;column;;".
113       // Taken from
114       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
115       DefaultOpenMPPSource =
116           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
117       DefaultOpenMPPSource =
118           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
119     }
120     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
121         CGM.getModule(), IdentTy, /*isConstant*/ true,
122         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
123     DefaultOpenMPLocation->setUnnamedAddr(true);
124 
125     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
126     llvm::Constant *Values[] = {Zero,
127                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
128                                 Zero, Zero, DefaultOpenMPPSource};
129     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
130     DefaultOpenMPLocation->setInitializer(Init);
131     OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
132     return DefaultOpenMPLocation;
133   }
134   return Entry;
135 }
136 
137 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
138                                                  SourceLocation Loc,
139                                                  OpenMPLocationFlags Flags) {
140   // If no debug info is generated - return global default location.
141   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
142       Loc.isInvalid())
143     return getOrCreateDefaultLocation(Flags);
144 
145   assert(CGF.CurFn && "No function in current CodeGenFunction.");
146 
147   llvm::Value *LocValue = nullptr;
148   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
149   if (I != OpenMPLocThreadIDMap.end())
150     LocValue = I->second.DebugLoc;
151   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
152   // GetOpenMPThreadID was called before this routine.
153   if (LocValue == nullptr) {
154     // Generate "ident_t .kmpc_loc.addr;"
155     llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
156     AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
157     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
158     Elem.second.DebugLoc = AI;
159     LocValue = AI;
160 
161     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
162     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
163     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
164                              llvm::ConstantExpr::getSizeOf(IdentTy),
165                              CGM.PointerAlignInBytes);
166   }
167 
168   // char **psource = &.kmpc_loc_<flags>.addr.psource;
169   auto *PSource =
170       CGF.Builder.CreateConstInBoundsGEP2_32(LocValue, 0, IdentField_PSource);
171 
172   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
173   if (OMPDebugLoc == nullptr) {
174     SmallString<128> Buffer2;
175     llvm::raw_svector_ostream OS2(Buffer2);
176     // Build debug location
177     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
178     OS2 << ";" << PLoc.getFilename() << ";";
179     if (const FunctionDecl *FD =
180             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
181       OS2 << FD->getQualifiedNameAsString();
182     }
183     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
184     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
185     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
186   }
187   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
188   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
189 
190   return LocValue;
191 }
192 
193 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
194                                           SourceLocation Loc) {
195   assert(CGF.CurFn && "No function in current CodeGenFunction.");
196 
197   llvm::Value *ThreadID = nullptr;
198   // Check whether we've already cached a load of the thread id in this
199   // function.
200   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
201   if (I != OpenMPLocThreadIDMap.end()) {
202     ThreadID = I->second.ThreadID;
203     if (ThreadID != nullptr)
204       return ThreadID;
205   }
206   if (auto OMPRegionInfo =
207           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
208     // Check if this an outlined function with thread id passed as argument.
209     auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable();
210     auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
211     auto RVal = CGF.EmitLoadOfLValue(LVal, Loc);
212     LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
213                                           ThreadIDVar->getType());
214     ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
215     // If value loaded in entry block, cache it and use it everywhere in
216     // function.
217     if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
218       auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
219       Elem.second.ThreadID = ThreadID;
220     }
221   } else {
222     // This is not an outlined function region - need to call __kmpc_int32
223     // kmpc_global_thread_num(ident_t *loc).
224     // Generate thread id value and cache this value for use across the
225     // function.
226     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
227     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
228     ThreadID = CGF.EmitRuntimeCall(
229         createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
230         emitUpdateLocation(CGF, Loc));
231     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
232     Elem.second.ThreadID = ThreadID;
233   }
234   return ThreadID;
235 }
236 
237 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
238   assert(CGF.CurFn && "No function in current CodeGenFunction.");
239   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
240     OpenMPLocThreadIDMap.erase(CGF.CurFn);
241 }
242 
243 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
244   return llvm::PointerType::getUnqual(IdentTy);
245 }
246 
247 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
248   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
249 }
250 
251 llvm::Constant *
252 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
253   llvm::Constant *RTLFn = nullptr;
254   switch (Function) {
255   case OMPRTL__kmpc_fork_call: {
256     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
257     // microtask, ...);
258     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
259                                 getKmpc_MicroPointerTy()};
260     llvm::FunctionType *FnTy =
261         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
262     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
263     break;
264   }
265   case OMPRTL__kmpc_global_thread_num: {
266     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
267     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
268     llvm::FunctionType *FnTy =
269         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
270     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
271     break;
272   }
273   case OMPRTL__kmpc_threadprivate_cached: {
274     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
275     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
276     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
277                                 CGM.VoidPtrTy, CGM.SizeTy,
278                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
279     llvm::FunctionType *FnTy =
280         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
281     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
282     break;
283   }
284   case OMPRTL__kmpc_critical: {
285     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
286     // kmp_critical_name *crit);
287     llvm::Type *TypeParams[] = {
288         getIdentTyPointerTy(), CGM.Int32Ty,
289         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
290     llvm::FunctionType *FnTy =
291         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
292     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
293     break;
294   }
295   case OMPRTL__kmpc_threadprivate_register: {
296     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
297     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
298     // typedef void *(*kmpc_ctor)(void *);
299     auto KmpcCtorTy =
300         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
301                                 /*isVarArg*/ false)->getPointerTo();
302     // typedef void *(*kmpc_cctor)(void *, void *);
303     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
304     auto KmpcCopyCtorTy =
305         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
306                                 /*isVarArg*/ false)->getPointerTo();
307     // typedef void (*kmpc_dtor)(void *);
308     auto KmpcDtorTy =
309         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
310             ->getPointerTo();
311     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
312                               KmpcCopyCtorTy, KmpcDtorTy};
313     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
314                                         /*isVarArg*/ false);
315     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
316     break;
317   }
318   case OMPRTL__kmpc_end_critical: {
319     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
320     // kmp_critical_name *crit);
321     llvm::Type *TypeParams[] = {
322         getIdentTyPointerTy(), CGM.Int32Ty,
323         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
324     llvm::FunctionType *FnTy =
325         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
326     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
327     break;
328   }
329   case OMPRTL__kmpc_cancel_barrier: {
330     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
331     // global_tid);
332     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
333     llvm::FunctionType *FnTy =
334         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
335     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
336     break;
337   }
338   // Build __kmpc_for_static_init*(
339   //               ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
340   //               kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
341   //               kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
342   //               kmp_int[32|64] incr, kmp_int[32|64] chunk);
343   case OMPRTL__kmpc_for_static_init_4: {
344     auto ITy = CGM.Int32Ty;
345     auto PtrTy = llvm::PointerType::getUnqual(ITy);
346     llvm::Type *TypeParams[] = {
347         getIdentTyPointerTy(),                     // loc
348         CGM.Int32Ty,                               // tid
349         CGM.Int32Ty,                               // schedtype
350         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
351         PtrTy,                                     // p_lower
352         PtrTy,                                     // p_upper
353         PtrTy,                                     // p_stride
354         ITy,                                       // incr
355         ITy                                        // chunk
356     };
357     llvm::FunctionType *FnTy =
358         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
359     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4");
360     break;
361   }
362   case OMPRTL__kmpc_for_static_init_4u: {
363     auto ITy = CGM.Int32Ty;
364     auto PtrTy = llvm::PointerType::getUnqual(ITy);
365     llvm::Type *TypeParams[] = {
366         getIdentTyPointerTy(),                     // loc
367         CGM.Int32Ty,                               // tid
368         CGM.Int32Ty,                               // schedtype
369         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
370         PtrTy,                                     // p_lower
371         PtrTy,                                     // p_upper
372         PtrTy,                                     // p_stride
373         ITy,                                       // incr
374         ITy                                        // chunk
375     };
376     llvm::FunctionType *FnTy =
377         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
378     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4u");
379     break;
380   }
381   case OMPRTL__kmpc_for_static_init_8: {
382     auto ITy = CGM.Int64Ty;
383     auto PtrTy = llvm::PointerType::getUnqual(ITy);
384     llvm::Type *TypeParams[] = {
385         getIdentTyPointerTy(),                     // loc
386         CGM.Int32Ty,                               // tid
387         CGM.Int32Ty,                               // schedtype
388         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
389         PtrTy,                                     // p_lower
390         PtrTy,                                     // p_upper
391         PtrTy,                                     // p_stride
392         ITy,                                       // incr
393         ITy                                        // chunk
394     };
395     llvm::FunctionType *FnTy =
396         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
397     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8");
398     break;
399   }
400   case OMPRTL__kmpc_for_static_init_8u: {
401     auto ITy = CGM.Int64Ty;
402     auto PtrTy = llvm::PointerType::getUnqual(ITy);
403     llvm::Type *TypeParams[] = {
404         getIdentTyPointerTy(),                     // loc
405         CGM.Int32Ty,                               // tid
406         CGM.Int32Ty,                               // schedtype
407         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
408         PtrTy,                                     // p_lower
409         PtrTy,                                     // p_upper
410         PtrTy,                                     // p_stride
411         ITy,                                       // incr
412         ITy                                        // chunk
413     };
414     llvm::FunctionType *FnTy =
415         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
416     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8u");
417     break;
418   }
419   case OMPRTL__kmpc_for_static_fini: {
420     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
421     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
422     llvm::FunctionType *FnTy =
423         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
424     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
425     break;
426   }
427   case OMPRTL__kmpc_push_num_threads: {
428     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
429     // kmp_int32 num_threads)
430     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
431                                 CGM.Int32Ty};
432     llvm::FunctionType *FnTy =
433         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
434     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
435     break;
436   }
437   case OMPRTL__kmpc_serialized_parallel: {
438     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
439     // global_tid);
440     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
441     llvm::FunctionType *FnTy =
442         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
443     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
444     break;
445   }
446   case OMPRTL__kmpc_end_serialized_parallel: {
447     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
448     // global_tid);
449     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
450     llvm::FunctionType *FnTy =
451         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
452     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
453     break;
454   }
455   case OMPRTL__kmpc_flush: {
456     // Build void __kmpc_flush(ident_t *loc);
457     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
458     llvm::FunctionType *FnTy =
459         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
460     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
461     break;
462   }
463   case OMPRTL__kmpc_master: {
464     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
465     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
466     llvm::FunctionType *FnTy =
467         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
468     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
469     break;
470   }
471   case OMPRTL__kmpc_end_master: {
472     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
473     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
474     llvm::FunctionType *FnTy =
475         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
476     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
477     break;
478   }
479   case OMPRTL__kmpc_omp_taskyield: {
480     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
481     // int end_part);
482     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
483     llvm::FunctionType *FnTy =
484         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
485     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
486     break;
487   }
488   case OMPRTL__kmpc_single: {
489     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
490     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
491     llvm::FunctionType *FnTy =
492         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
493     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
494     break;
495   }
496   case OMPRTL__kmpc_end_single: {
497     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
498     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
499     llvm::FunctionType *FnTy =
500         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
501     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
502     break;
503   }
504   }
505   return RTLFn;
506 }
507 
508 llvm::Constant *
509 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
510   // Lookup the entry, lazily creating it if necessary.
511   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
512                                      Twine(CGM.getMangledName(VD)) + ".cache.");
513 }
514 
515 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
516                                                      const VarDecl *VD,
517                                                      llvm::Value *VDAddr,
518                                                      SourceLocation Loc) {
519   auto VarTy = VDAddr->getType()->getPointerElementType();
520   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
521                          CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
522                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
523                          getOrCreateThreadPrivateCache(VD)};
524   return CGF.EmitRuntimeCall(
525       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
526 }
527 
528 void CGOpenMPRuntime::emitThreadPrivateVarInit(
529     CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
530     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
531   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
532   // library.
533   auto OMPLoc = emitUpdateLocation(CGF, Loc);
534   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
535                       OMPLoc);
536   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
537   // to register constructor/destructor for variable.
538   llvm::Value *Args[] = {OMPLoc,
539                          CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
540                          Ctor, CopyCtor, Dtor};
541   CGF.EmitRuntimeCall(
542       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
543 }
544 
545 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
546     const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
547     bool PerformInit, CodeGenFunction *CGF) {
548   VD = VD->getDefinition(CGM.getContext());
549   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
550     ThreadPrivateWithDefinition.insert(VD);
551     QualType ASTTy = VD->getType();
552 
553     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
554     auto Init = VD->getAnyInitializer();
555     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
556       // Generate function that re-emits the declaration's initializer into the
557       // threadprivate copy of the variable VD
558       CodeGenFunction CtorCGF(CGM);
559       FunctionArgList Args;
560       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
561                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
562       Args.push_back(&Dst);
563 
564       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
565           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
566           /*isVariadic=*/false);
567       auto FTy = CGM.getTypes().GetFunctionType(FI);
568       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
569           FTy, ".__kmpc_global_ctor_.", Loc);
570       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
571                             Args, SourceLocation());
572       auto ArgVal = CtorCGF.EmitLoadOfScalar(
573           CtorCGF.GetAddrOfLocalVar(&Dst),
574           /*Volatile=*/false, CGM.PointerAlignInBytes,
575           CGM.getContext().VoidPtrTy, Dst.getLocation());
576       auto Arg = CtorCGF.Builder.CreatePointerCast(
577           ArgVal,
578           CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
579       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
580                                /*IsInitializer=*/true);
581       ArgVal = CtorCGF.EmitLoadOfScalar(
582           CtorCGF.GetAddrOfLocalVar(&Dst),
583           /*Volatile=*/false, CGM.PointerAlignInBytes,
584           CGM.getContext().VoidPtrTy, Dst.getLocation());
585       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
586       CtorCGF.FinishFunction();
587       Ctor = Fn;
588     }
589     if (VD->getType().isDestructedType() != QualType::DK_none) {
590       // Generate function that emits destructor call for the threadprivate copy
591       // of the variable VD
592       CodeGenFunction DtorCGF(CGM);
593       FunctionArgList Args;
594       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
595                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
596       Args.push_back(&Dst);
597 
598       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
599           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
600           /*isVariadic=*/false);
601       auto FTy = CGM.getTypes().GetFunctionType(FI);
602       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
603           FTy, ".__kmpc_global_dtor_.", Loc);
604       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
605                             SourceLocation());
606       auto ArgVal = DtorCGF.EmitLoadOfScalar(
607           DtorCGF.GetAddrOfLocalVar(&Dst),
608           /*Volatile=*/false, CGM.PointerAlignInBytes,
609           CGM.getContext().VoidPtrTy, Dst.getLocation());
610       DtorCGF.emitDestroy(ArgVal, ASTTy,
611                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
612                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
613       DtorCGF.FinishFunction();
614       Dtor = Fn;
615     }
616     // Do not emit init function if it is not required.
617     if (!Ctor && !Dtor)
618       return nullptr;
619 
620     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
621     auto CopyCtorTy =
622         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
623                                 /*isVarArg=*/false)->getPointerTo();
624     // Copying constructor for the threadprivate variable.
625     // Must be NULL - reserved by runtime, but currently it requires that this
626     // parameter is always NULL. Otherwise it fires assertion.
627     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
628     if (Ctor == nullptr) {
629       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
630                                             /*isVarArg=*/false)->getPointerTo();
631       Ctor = llvm::Constant::getNullValue(CtorTy);
632     }
633     if (Dtor == nullptr) {
634       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
635                                             /*isVarArg=*/false)->getPointerTo();
636       Dtor = llvm::Constant::getNullValue(DtorTy);
637     }
638     if (!CGF) {
639       auto InitFunctionTy =
640           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
641       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
642           InitFunctionTy, ".__omp_threadprivate_init_.");
643       CodeGenFunction InitCGF(CGM);
644       FunctionArgList ArgList;
645       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
646                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
647                             Loc);
648       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
649       InitCGF.FinishFunction();
650       return InitFunction;
651     }
652     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
653   }
654   return nullptr;
655 }
656 
657 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
658                                        llvm::Value *OutlinedFn,
659                                        llvm::Value *CapturedStruct) {
660   // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/)
661   llvm::Value *Args[] = {
662       emitUpdateLocation(CGF, Loc),
663       CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument
664       // (there is only one additional argument - 'context')
665       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
666       CGF.EmitCastToVoidPtr(CapturedStruct)};
667   auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
668   CGF.EmitRuntimeCall(RTLFn, Args);
669 }
670 
671 void CGOpenMPRuntime::emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc,
672                                      llvm::Value *OutlinedFn,
673                                      llvm::Value *CapturedStruct) {
674   auto ThreadID = getThreadID(CGF, Loc);
675   // Build calls:
676   // __kmpc_serialized_parallel(&Loc, GTid);
677   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), ThreadID};
678   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
679                       Args);
680 
681   // OutlinedFn(&GTid, &zero, CapturedStruct);
682   auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
683   auto Int32Ty =
684       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
685   auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
686   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
687   llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
688   CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
689 
690   // __kmpc_end_serialized_parallel(&Loc, GTid);
691   llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
692   CGF.EmitRuntimeCall(
693       createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
694 }
695 
696 // If we're inside an (outlined) parallel region, use the region info's
697 // thread-ID variable (it is passed in a first argument of the outlined function
698 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
699 // regular serial code region, get thread ID by calling kmp_int32
700 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
701 // return the address of that temp.
702 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
703                                                   SourceLocation Loc) {
704   if (auto OMPRegionInfo =
705           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
706     return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF),
707                                 SourceLocation()).getScalarVal();
708   auto ThreadID = getThreadID(CGF, Loc);
709   auto Int32Ty =
710       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
711   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
712   CGF.EmitStoreOfScalar(ThreadID,
713                         CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
714 
715   return ThreadIDTemp;
716 }
717 
718 llvm::Constant *
719 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
720                                              const llvm::Twine &Name) {
721   SmallString<256> Buffer;
722   llvm::raw_svector_ostream Out(Buffer);
723   Out << Name;
724   auto RuntimeName = Out.str();
725   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
726   if (Elem.second) {
727     assert(Elem.second->getType()->getPointerElementType() == Ty &&
728            "OMP internal variable has different type than requested");
729     return &*Elem.second;
730   }
731 
732   return Elem.second = new llvm::GlobalVariable(
733              CGM.getModule(), Ty, /*IsConstant*/ false,
734              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
735              Elem.first());
736 }
737 
738 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
739   llvm::Twine Name(".gomp_critical_user_", CriticalName);
740   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
741 }
742 
743 void CGOpenMPRuntime::emitCriticalRegion(
744     CodeGenFunction &CGF, StringRef CriticalName,
745     const std::function<void()> &CriticalOpGen, SourceLocation Loc) {
746   auto RegionLock = getCriticalRegionLock(CriticalName);
747   // __kmpc_critical(ident_t *, gtid, Lock);
748   // CriticalOpGen();
749   // __kmpc_end_critical(ident_t *, gtid, Lock);
750   // Prepare arguments and build a call to __kmpc_critical
751   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
752                          RegionLock};
753   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
754   CriticalOpGen();
755   // Build a call to __kmpc_end_critical
756   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
757 }
758 
759 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
760                        const std::function<void()> &BodyOpGen) {
761   llvm::Value *CallBool = CGF.EmitScalarConversion(
762       IfCond,
763       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
764       CGF.getContext().BoolTy);
765 
766   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
767   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
768   // Generate the branch (If-stmt)
769   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
770   CGF.EmitBlock(ThenBlock);
771   BodyOpGen();
772   // Emit the rest of bblocks/branches
773   CGF.EmitBranch(ContBlock);
774   CGF.EmitBlock(ContBlock, true);
775 }
776 
777 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
778                                        const std::function<void()> &MasterOpGen,
779                                        SourceLocation Loc) {
780   // if(__kmpc_master(ident_t *, gtid)) {
781   //   MasterOpGen();
782   //   __kmpc_end_master(ident_t *, gtid);
783   // }
784   // Prepare arguments and build a call to __kmpc_master
785   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
786   auto *IsMaster =
787       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
788   emitIfStmt(CGF, IsMaster, [&]() -> void {
789     MasterOpGen();
790     // Build a call to __kmpc_end_master.
791     // OpenMP [1.2.2 OpenMP Language Terminology]
792     // For C/C++, an executable statement, possibly compound, with a single
793     // entry at the top and a single exit at the bottom, or an OpenMP construct.
794     // * Access to the structured block must not be the result of a branch.
795     // * The point of exit cannot be a branch out of the structured block.
796     // * The point of entry must not be a call to setjmp().
797     // * longjmp() and throw() must not violate the entry/exit criteria.
798     // * An expression statement, iteration statement, selection statement, or
799     // try block is considered to be a structured block if the corresponding
800     // compound statement obtained by enclosing it in { and } would be a
801     // structured block.
802     // It is analyzed in Sema, so we can just call __kmpc_end_master() on
803     // fallthrough rather than pushing a normal cleanup for it.
804     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_master), Args);
805   });
806 }
807 
808 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
809                                         SourceLocation Loc) {
810   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
811   llvm::Value *Args[] = {
812       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
813       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
814   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
815 }
816 
817 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
818                                        const std::function<void()> &SingleOpGen,
819                                        SourceLocation Loc) {
820   // if(__kmpc_single(ident_t *, gtid)) {
821   //   SingleOpGen();
822   //   __kmpc_end_single(ident_t *, gtid);
823   // }
824   // Prepare arguments and build a call to __kmpc_single
825   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
826   auto *IsSingle =
827       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
828   emitIfStmt(CGF, IsSingle, [&]() -> void {
829     SingleOpGen();
830     // Build a call to __kmpc_end_single.
831     // OpenMP [1.2.2 OpenMP Language Terminology]
832     // For C/C++, an executable statement, possibly compound, with a single
833     // entry at the top and a single exit at the bottom, or an OpenMP construct.
834     // * Access to the structured block must not be the result of a branch.
835     // * The point of exit cannot be a branch out of the structured block.
836     // * The point of entry must not be a call to setjmp().
837     // * longjmp() and throw() must not violate the entry/exit criteria.
838     // * An expression statement, iteration statement, selection statement, or
839     // try block is considered to be a structured block if the corresponding
840     // compound statement obtained by enclosing it in { and } would be a
841     // structured block.
842     // It is analyzed in Sema, so we can just call __kmpc_end_single() on
843     // fallthrough rather than pushing a normal cleanup for it.
844     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_single), Args);
845   });
846 }
847 
848 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
849                                       bool IsExplicit) {
850   // Build call __kmpc_cancel_barrier(loc, thread_id);
851   auto Flags = static_cast<OpenMPLocationFlags>(
852       OMP_IDENT_KMPC |
853       (IsExplicit ? OMP_IDENT_BARRIER_EXPL : OMP_IDENT_BARRIER_IMPL));
854   // Build call __kmpc_cancel_barrier(loc, thread_id);
855   // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this
856   // one provides the same functionality and adds initial support for
857   // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier()
858   // is provided default by the runtime library so it safe to make such
859   // replacement.
860   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
861                          getThreadID(CGF, Loc)};
862   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
863 }
864 
865 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
866 /// the enum sched_type in kmp.h).
867 enum OpenMPSchedType {
868   /// \brief Lower bound for default (unordered) versions.
869   OMP_sch_lower = 32,
870   OMP_sch_static_chunked = 33,
871   OMP_sch_static = 34,
872   OMP_sch_dynamic_chunked = 35,
873   OMP_sch_guided_chunked = 36,
874   OMP_sch_runtime = 37,
875   OMP_sch_auto = 38,
876   /// \brief Lower bound for 'ordered' versions.
877   OMP_ord_lower = 64,
878   /// \brief Lower bound for 'nomerge' versions.
879   OMP_nm_lower = 160,
880 };
881 
882 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
883 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
884                                           bool Chunked) {
885   switch (ScheduleKind) {
886   case OMPC_SCHEDULE_static:
887     return Chunked ? OMP_sch_static_chunked : OMP_sch_static;
888   case OMPC_SCHEDULE_dynamic:
889     return OMP_sch_dynamic_chunked;
890   case OMPC_SCHEDULE_guided:
891     return OMP_sch_guided_chunked;
892   case OMPC_SCHEDULE_auto:
893     return OMP_sch_auto;
894   case OMPC_SCHEDULE_runtime:
895     return OMP_sch_runtime;
896   case OMPC_SCHEDULE_unknown:
897     assert(!Chunked && "chunk was specified but schedule kind not known");
898     return OMP_sch_static;
899   }
900   llvm_unreachable("Unexpected runtime schedule");
901 }
902 
903 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
904                                          bool Chunked) const {
905   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
906   return Schedule == OMP_sch_static;
907 }
908 
909 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
910   auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false);
911   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
912   return Schedule != OMP_sch_static;
913 }
914 
915 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
916                                   OpenMPScheduleClauseKind ScheduleKind,
917                                   unsigned IVSize, bool IVSigned,
918                                   llvm::Value *IL, llvm::Value *LB,
919                                   llvm::Value *UB, llvm::Value *ST,
920                                   llvm::Value *Chunk) {
921   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
922   // Call __kmpc_for_static_init(
923   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
924   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
925   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
926   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
927   // TODO: Implement dynamic schedule.
928 
929   // If the Chunk was not specified in the clause - use default value 1.
930   if (Chunk == nullptr)
931     Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1);
932 
933   llvm::Value *Args[] = {
934       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
935       CGF.Builder.getInt32(Schedule), // Schedule type
936       IL,                             // &isLastIter
937       LB,                             // &LB
938       UB,                             // &UB
939       ST,                             // &Stride
940       CGF.Builder.getIntN(IVSize, 1), // Incr
941       Chunk                           // Chunk
942   };
943   assert((IVSize == 32 || IVSize == 64) &&
944          "Index size is not compatible with the omp runtime");
945   auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4
946                                     : OMPRTL__kmpc_for_static_init_4u)
947                         : (IVSigned ? OMPRTL__kmpc_for_static_init_8
948                                     : OMPRTL__kmpc_for_static_init_8u);
949   CGF.EmitRuntimeCall(createRuntimeFunction(F), Args);
950 }
951 
952 void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
953                                     OpenMPScheduleClauseKind ScheduleKind) {
954   assert((ScheduleKind == OMPC_SCHEDULE_static ||
955           ScheduleKind == OMPC_SCHEDULE_unknown) &&
956          "Non-static schedule kinds are not yet implemented");
957   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
958   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
959                          getThreadID(CGF, Loc)};
960   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
961                       Args);
962 }
963 
964 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
965                                            llvm::Value *NumThreads,
966                                            SourceLocation Loc) {
967   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
968   llvm::Value *Args[] = {
969       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
970       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
971   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
972                       Args);
973 }
974 
975 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
976                                 SourceLocation Loc) {
977   // Build call void __kmpc_flush(ident_t *loc)
978   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
979                       emitUpdateLocation(CGF, Loc));
980 }
981 
982