1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CGCleanup.h"
17 #include "clang/AST/Decl.h"
18 #include "clang/AST/StmtOpenMP.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cassert>
26 
27 using namespace clang;
28 using namespace CodeGen;
29 
30 namespace {
31 /// \brief Base class for handling code generation inside OpenMP regions.
32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
33 public:
34   CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS)
35       : CGCapturedStmtInfo(CS, CR_OpenMP), Directive(D) {}
36 
37   CGOpenMPRegionInfo(const OMPExecutableDirective &D)
38       : CGCapturedStmtInfo(CR_OpenMP), Directive(D) {}
39 
40   /// \brief Get a variable or parameter for storing global thread id
41   /// inside OpenMP construct.
42   virtual const VarDecl *getThreadIDVariable() const = 0;
43 
44   /// \brief Get an LValue for the current ThreadID variable.
45   /// \return LValue for thread id variable. This LValue always has type int32*.
46   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
47 
48     /// \brief Emit the captured statement body.
49   virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
50 
51   static bool classof(const CGCapturedStmtInfo *Info) {
52     return Info->getKind() == CR_OpenMP;
53   }
54 protected:
55   /// \brief OpenMP executable directive associated with the region.
56   const OMPExecutableDirective &Directive;
57 };
58 
59 /// \brief API for captured statement code generation in OpenMP constructs.
60 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
61 public:
62   CGOpenMPOutlinedRegionInfo(const OMPExecutableDirective &D,
63                              const CapturedStmt &CS, const VarDecl *ThreadIDVar)
64       : CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar) {
65     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
66   }
67   /// \brief Get a variable or parameter for storing global thread id
68   /// inside OpenMP construct.
69   virtual const VarDecl *getThreadIDVariable() const override {
70     return ThreadIDVar;
71   }
72   /// \brief Get the name of the capture helper.
73   StringRef getHelperName() const override { return ".omp_outlined."; }
74 
75 private:
76   /// \brief A variable or parameter storing global thread id for OpenMP
77   /// constructs.
78   const VarDecl *ThreadIDVar;
79 };
80 
81 /// \brief API for captured statement code generation in OpenMP constructs.
82 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
83 public:
84   CGOpenMPTaskOutlinedRegionInfo(const OMPExecutableDirective &D,
85                                  const CapturedStmt &CS,
86                                  const VarDecl *ThreadIDVar,
87                                  const VarDecl *PartIDVar)
88       : CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar),
89         PartIDVar(PartIDVar) {
90     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
91   }
92   /// \brief Get a variable or parameter for storing global thread id
93   /// inside OpenMP construct.
94   virtual const VarDecl *getThreadIDVariable() const override {
95     return ThreadIDVar;
96   }
97 
98   /// \brief Get an LValue for the current ThreadID variable.
99   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
100 
101   /// \brief Emit the captured statement body.
102   virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
103 
104   /// \brief Get the name of the capture helper.
105   StringRef getHelperName() const override { return ".omp_outlined."; }
106 
107 private:
108   /// \brief A variable or parameter storing global thread id for OpenMP
109   /// constructs.
110   const VarDecl *ThreadIDVar;
111   /// \brief A variable or parameter storing part id for OpenMP tasking
112   /// constructs.
113   const VarDecl *PartIDVar;
114 };
115 
116 /// \brief API for inlined captured statement code generation in OpenMP
117 /// constructs.
118 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
119 public:
120   CGOpenMPInlinedRegionInfo(const OMPExecutableDirective &D,
121                             CodeGenFunction::CGCapturedStmtInfo *OldCSI)
122       : CGOpenMPRegionInfo(D), OldCSI(OldCSI),
123         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
124   // \brief Retrieve the value of the context parameter.
125   virtual llvm::Value *getContextValue() const override {
126     if (OuterRegionInfo)
127       return OuterRegionInfo->getContextValue();
128     llvm_unreachable("No context value for inlined OpenMP region");
129   }
130   /// \brief Lookup the captured field decl for a variable.
131   virtual const FieldDecl *lookup(const VarDecl *VD) const override {
132     if (OuterRegionInfo)
133       return OuterRegionInfo->lookup(VD);
134     llvm_unreachable("Trying to reference VarDecl that is neither local nor "
135                      "captured in outer OpenMP region");
136   }
137   virtual FieldDecl *getThisFieldDecl() const override {
138     if (OuterRegionInfo)
139       return OuterRegionInfo->getThisFieldDecl();
140     return nullptr;
141   }
142   /// \brief Get a variable or parameter for storing global thread id
143   /// inside OpenMP construct.
144   virtual const VarDecl *getThreadIDVariable() const override {
145     if (OuterRegionInfo)
146       return OuterRegionInfo->getThreadIDVariable();
147     return nullptr;
148   }
149 
150   /// \brief Get the name of the capture helper.
151   virtual StringRef getHelperName() const override {
152     llvm_unreachable("No helper name for inlined OpenMP construct");
153   }
154 
155   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
156 
157 private:
158   /// \brief CodeGen info about outer OpenMP region.
159   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
160   CGOpenMPRegionInfo *OuterRegionInfo;
161 };
162 } // namespace
163 
164 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
165   return CGF.MakeNaturalAlignAddrLValue(
166       CGF.Builder.CreateAlignedLoad(
167           CGF.GetAddrOfLocalVar(getThreadIDVariable()),
168           CGF.PointerAlignInBytes),
169       getThreadIDVariable()
170           ->getType()
171           ->castAs<PointerType>()
172           ->getPointeeType());
173 }
174 
175 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
176   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
177   CGF.EmitOMPPrivateClause(Directive, PrivateScope);
178   CGF.EmitOMPFirstprivateClause(Directive, PrivateScope);
179   if (PrivateScope.Privatize())
180     // Emit implicit barrier to synchronize threads and avoid data races.
181     CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Directive.getLocStart(),
182                                                /*IsExplicit=*/false);
183   CGCapturedStmtInfo::EmitBody(CGF, S);
184 }
185 
186 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
187     CodeGenFunction &CGF) {
188   return CGF.MakeNaturalAlignAddrLValue(
189       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
190       getThreadIDVariable()->getType());
191 }
192 
193 void CGOpenMPTaskOutlinedRegionInfo::EmitBody(CodeGenFunction &CGF,
194                                               const Stmt *S) {
195   if (PartIDVar) {
196     // TODO: emit code for untied tasks.
197   }
198   CGCapturedStmtInfo::EmitBody(CGF, S);
199 }
200 
201 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
202     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
203   IdentTy = llvm::StructType::create(
204       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
205       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
206       CGM.Int8PtrTy /* psource */, nullptr);
207   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
208   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
209                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
210   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
211   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
212 }
213 
214 void CGOpenMPRuntime::clear() {
215   InternalVars.clear();
216 }
217 
218 llvm::Value *
219 CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D,
220                                       const VarDecl *ThreadIDVar) {
221   assert(ThreadIDVar->getType()->isPointerType() &&
222          "thread id variable must be of type kmp_int32 *");
223   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
224   CodeGenFunction CGF(CGM, true);
225   CGOpenMPOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar);
226   CGF.CapturedStmtInfo = &CGInfo;
227   return CGF.GenerateCapturedStmtFunction(*CS);
228 }
229 
230 llvm::Value *
231 CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D,
232                                           const VarDecl *ThreadIDVar,
233                                           const VarDecl *PartIDVar) {
234   assert(!ThreadIDVar->getType()->isPointerType() &&
235          "thread id variable must be of type kmp_int32 for tasks");
236   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
237   CodeGenFunction CGF(CGM, true);
238   CGOpenMPTaskOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar, PartIDVar);
239   CGF.CapturedStmtInfo = &CGInfo;
240   return CGF.GenerateCapturedStmtFunction(*CS);
241 }
242 
243 llvm::Value *
244 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
245   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
246   if (!Entry) {
247     if (!DefaultOpenMPPSource) {
248       // Initialize default location for psource field of ident_t structure of
249       // all ident_t objects. Format is ";file;function;line;column;;".
250       // Taken from
251       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
252       DefaultOpenMPPSource =
253           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
254       DefaultOpenMPPSource =
255           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
256     }
257     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
258         CGM.getModule(), IdentTy, /*isConstant*/ true,
259         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
260     DefaultOpenMPLocation->setUnnamedAddr(true);
261 
262     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
263     llvm::Constant *Values[] = {Zero,
264                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
265                                 Zero, Zero, DefaultOpenMPPSource};
266     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
267     DefaultOpenMPLocation->setInitializer(Init);
268     OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
269     return DefaultOpenMPLocation;
270   }
271   return Entry;
272 }
273 
274 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
275                                                  SourceLocation Loc,
276                                                  OpenMPLocationFlags Flags) {
277   // If no debug info is generated - return global default location.
278   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
279       Loc.isInvalid())
280     return getOrCreateDefaultLocation(Flags);
281 
282   assert(CGF.CurFn && "No function in current CodeGenFunction.");
283 
284   llvm::Value *LocValue = nullptr;
285   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
286   if (I != OpenMPLocThreadIDMap.end())
287     LocValue = I->second.DebugLoc;
288   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
289   // GetOpenMPThreadID was called before this routine.
290   if (LocValue == nullptr) {
291     // Generate "ident_t .kmpc_loc.addr;"
292     llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
293     AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
294     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
295     Elem.second.DebugLoc = AI;
296     LocValue = AI;
297 
298     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
299     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
300     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
301                              llvm::ConstantExpr::getSizeOf(IdentTy),
302                              CGM.PointerAlignInBytes);
303   }
304 
305   // char **psource = &.kmpc_loc_<flags>.addr.psource;
306   auto *PSource =
307       CGF.Builder.CreateConstInBoundsGEP2_32(LocValue, 0, IdentField_PSource);
308 
309   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
310   if (OMPDebugLoc == nullptr) {
311     SmallString<128> Buffer2;
312     llvm::raw_svector_ostream OS2(Buffer2);
313     // Build debug location
314     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
315     OS2 << ";" << PLoc.getFilename() << ";";
316     if (const FunctionDecl *FD =
317             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
318       OS2 << FD->getQualifiedNameAsString();
319     }
320     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
321     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
322     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
323   }
324   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
325   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
326 
327   return LocValue;
328 }
329 
330 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
331                                           SourceLocation Loc) {
332   assert(CGF.CurFn && "No function in current CodeGenFunction.");
333 
334   llvm::Value *ThreadID = nullptr;
335   // Check whether we've already cached a load of the thread id in this
336   // function.
337   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
338   if (I != OpenMPLocThreadIDMap.end()) {
339     ThreadID = I->second.ThreadID;
340     if (ThreadID != nullptr)
341       return ThreadID;
342   }
343   if (auto OMPRegionInfo =
344           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
345     if (OMPRegionInfo->getThreadIDVariable()) {
346       // Check if this an outlined function with thread id passed as argument.
347       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
348       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
349       // If value loaded in entry block, cache it and use it everywhere in
350       // function.
351       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
352         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
353         Elem.second.ThreadID = ThreadID;
354       }
355       return ThreadID;
356     }
357   }
358 
359   // This is not an outlined function region - need to call __kmpc_int32
360   // kmpc_global_thread_num(ident_t *loc).
361   // Generate thread id value and cache this value for use across the
362   // function.
363   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
364   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
365   ThreadID =
366       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
367                           emitUpdateLocation(CGF, Loc));
368   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
369   Elem.second.ThreadID = ThreadID;
370   return ThreadID;
371 }
372 
373 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
374   assert(CGF.CurFn && "No function in current CodeGenFunction.");
375   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
376     OpenMPLocThreadIDMap.erase(CGF.CurFn);
377 }
378 
379 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
380   return llvm::PointerType::getUnqual(IdentTy);
381 }
382 
383 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
384   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
385 }
386 
387 llvm::Constant *
388 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
389   llvm::Constant *RTLFn = nullptr;
390   switch (Function) {
391   case OMPRTL__kmpc_fork_call: {
392     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
393     // microtask, ...);
394     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
395                                 getKmpc_MicroPointerTy()};
396     llvm::FunctionType *FnTy =
397         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
398     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
399     break;
400   }
401   case OMPRTL__kmpc_global_thread_num: {
402     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
403     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
404     llvm::FunctionType *FnTy =
405         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
406     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
407     break;
408   }
409   case OMPRTL__kmpc_threadprivate_cached: {
410     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
411     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
412     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
413                                 CGM.VoidPtrTy, CGM.SizeTy,
414                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
415     llvm::FunctionType *FnTy =
416         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
417     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
418     break;
419   }
420   case OMPRTL__kmpc_critical: {
421     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
422     // kmp_critical_name *crit);
423     llvm::Type *TypeParams[] = {
424         getIdentTyPointerTy(), CGM.Int32Ty,
425         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
426     llvm::FunctionType *FnTy =
427         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
428     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
429     break;
430   }
431   case OMPRTL__kmpc_threadprivate_register: {
432     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
433     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
434     // typedef void *(*kmpc_ctor)(void *);
435     auto KmpcCtorTy =
436         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
437                                 /*isVarArg*/ false)->getPointerTo();
438     // typedef void *(*kmpc_cctor)(void *, void *);
439     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
440     auto KmpcCopyCtorTy =
441         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
442                                 /*isVarArg*/ false)->getPointerTo();
443     // typedef void (*kmpc_dtor)(void *);
444     auto KmpcDtorTy =
445         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
446             ->getPointerTo();
447     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
448                               KmpcCopyCtorTy, KmpcDtorTy};
449     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
450                                         /*isVarArg*/ false);
451     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
452     break;
453   }
454   case OMPRTL__kmpc_end_critical: {
455     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
456     // kmp_critical_name *crit);
457     llvm::Type *TypeParams[] = {
458         getIdentTyPointerTy(), CGM.Int32Ty,
459         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
460     llvm::FunctionType *FnTy =
461         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
462     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
463     break;
464   }
465   case OMPRTL__kmpc_cancel_barrier: {
466     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
467     // global_tid);
468     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
469     llvm::FunctionType *FnTy =
470         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
471     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
472     break;
473   }
474   case OMPRTL__kmpc_for_static_fini: {
475     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
476     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
477     llvm::FunctionType *FnTy =
478         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
479     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
480     break;
481   }
482   case OMPRTL__kmpc_push_num_threads: {
483     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
484     // kmp_int32 num_threads)
485     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
486                                 CGM.Int32Ty};
487     llvm::FunctionType *FnTy =
488         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
489     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
490     break;
491   }
492   case OMPRTL__kmpc_serialized_parallel: {
493     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
494     // global_tid);
495     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
496     llvm::FunctionType *FnTy =
497         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
498     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
499     break;
500   }
501   case OMPRTL__kmpc_end_serialized_parallel: {
502     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
503     // global_tid);
504     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
505     llvm::FunctionType *FnTy =
506         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
507     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
508     break;
509   }
510   case OMPRTL__kmpc_flush: {
511     // Build void __kmpc_flush(ident_t *loc);
512     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
513     llvm::FunctionType *FnTy =
514         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
515     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
516     break;
517   }
518   case OMPRTL__kmpc_master: {
519     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
520     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
521     llvm::FunctionType *FnTy =
522         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
523     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
524     break;
525   }
526   case OMPRTL__kmpc_end_master: {
527     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
528     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
529     llvm::FunctionType *FnTy =
530         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
531     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
532     break;
533   }
534   case OMPRTL__kmpc_omp_taskyield: {
535     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
536     // int end_part);
537     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
538     llvm::FunctionType *FnTy =
539         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
540     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
541     break;
542   }
543   case OMPRTL__kmpc_single: {
544     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
545     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
546     llvm::FunctionType *FnTy =
547         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
548     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
549     break;
550   }
551   case OMPRTL__kmpc_end_single: {
552     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
553     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
554     llvm::FunctionType *FnTy =
555         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
556     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
557     break;
558   }
559   case OMPRTL__kmpc_omp_task_alloc: {
560     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
561     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
562     // kmp_routine_entry_t *task_entry);
563     assert(KmpRoutineEntryPtrTy != nullptr &&
564            "Type kmp_routine_entry_t must be created.");
565     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
566                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
567     // Return void * and then cast to particular kmp_task_t type.
568     llvm::FunctionType *FnTy =
569         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
570     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
571     break;
572   }
573   case OMPRTL__kmpc_omp_task: {
574     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
575     // *new_task);
576     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
577                                 CGM.VoidPtrTy};
578     llvm::FunctionType *FnTy =
579         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
580     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
581     break;
582   }
583   case OMPRTL__kmpc_copyprivate: {
584     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
585     // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
586     // kmp_int32 didit);
587     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
588     auto *CpyFnTy =
589         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
590     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
591                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
592                                 CGM.Int32Ty};
593     llvm::FunctionType *FnTy =
594         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
595     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
596     break;
597   }
598   }
599   return RTLFn;
600 }
601 
602 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
603                                                              bool IVSigned) {
604   assert((IVSize == 32 || IVSize == 64) &&
605          "IV size is not compatible with the omp runtime");
606   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
607                                        : "__kmpc_for_static_init_4u")
608                            : (IVSigned ? "__kmpc_for_static_init_8"
609                                        : "__kmpc_for_static_init_8u");
610   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
611   auto PtrTy = llvm::PointerType::getUnqual(ITy);
612   llvm::Type *TypeParams[] = {
613     getIdentTyPointerTy(),                     // loc
614     CGM.Int32Ty,                               // tid
615     CGM.Int32Ty,                               // schedtype
616     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
617     PtrTy,                                     // p_lower
618     PtrTy,                                     // p_upper
619     PtrTy,                                     // p_stride
620     ITy,                                       // incr
621     ITy                                        // chunk
622   };
623   llvm::FunctionType *FnTy =
624       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
625   return CGM.CreateRuntimeFunction(FnTy, Name);
626 }
627 
628 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
629                                                             bool IVSigned) {
630   assert((IVSize == 32 || IVSize == 64) &&
631          "IV size is not compatible with the omp runtime");
632   auto Name =
633       IVSize == 32
634           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
635           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
636   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
637   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
638                                CGM.Int32Ty,           // tid
639                                CGM.Int32Ty,           // schedtype
640                                ITy,                   // lower
641                                ITy,                   // upper
642                                ITy,                   // stride
643                                ITy                    // chunk
644   };
645   llvm::FunctionType *FnTy =
646       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
647   return CGM.CreateRuntimeFunction(FnTy, Name);
648 }
649 
650 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
651                                                             bool IVSigned) {
652   assert((IVSize == 32 || IVSize == 64) &&
653          "IV size is not compatible with the omp runtime");
654   auto Name =
655       IVSize == 32
656           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
657           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
658   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
659   auto PtrTy = llvm::PointerType::getUnqual(ITy);
660   llvm::Type *TypeParams[] = {
661     getIdentTyPointerTy(),                     // loc
662     CGM.Int32Ty,                               // tid
663     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
664     PtrTy,                                     // p_lower
665     PtrTy,                                     // p_upper
666     PtrTy                                      // p_stride
667   };
668   llvm::FunctionType *FnTy =
669       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
670   return CGM.CreateRuntimeFunction(FnTy, Name);
671 }
672 
673 llvm::Constant *
674 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
675   // Lookup the entry, lazily creating it if necessary.
676   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
677                                      Twine(CGM.getMangledName(VD)) + ".cache.");
678 }
679 
680 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
681                                                      const VarDecl *VD,
682                                                      llvm::Value *VDAddr,
683                                                      SourceLocation Loc) {
684   auto VarTy = VDAddr->getType()->getPointerElementType();
685   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
686                          CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
687                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
688                          getOrCreateThreadPrivateCache(VD)};
689   return CGF.EmitRuntimeCall(
690       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
691 }
692 
693 void CGOpenMPRuntime::emitThreadPrivateVarInit(
694     CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
695     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
696   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
697   // library.
698   auto OMPLoc = emitUpdateLocation(CGF, Loc);
699   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
700                       OMPLoc);
701   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
702   // to register constructor/destructor for variable.
703   llvm::Value *Args[] = {OMPLoc,
704                          CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
705                          Ctor, CopyCtor, Dtor};
706   CGF.EmitRuntimeCall(
707       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
708 }
709 
710 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
711     const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
712     bool PerformInit, CodeGenFunction *CGF) {
713   VD = VD->getDefinition(CGM.getContext());
714   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
715     ThreadPrivateWithDefinition.insert(VD);
716     QualType ASTTy = VD->getType();
717 
718     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
719     auto Init = VD->getAnyInitializer();
720     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
721       // Generate function that re-emits the declaration's initializer into the
722       // threadprivate copy of the variable VD
723       CodeGenFunction CtorCGF(CGM);
724       FunctionArgList Args;
725       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
726                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
727       Args.push_back(&Dst);
728 
729       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
730           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
731           /*isVariadic=*/false);
732       auto FTy = CGM.getTypes().GetFunctionType(FI);
733       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
734           FTy, ".__kmpc_global_ctor_.", Loc);
735       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
736                             Args, SourceLocation());
737       auto ArgVal = CtorCGF.EmitLoadOfScalar(
738           CtorCGF.GetAddrOfLocalVar(&Dst),
739           /*Volatile=*/false, CGM.PointerAlignInBytes,
740           CGM.getContext().VoidPtrTy, Dst.getLocation());
741       auto Arg = CtorCGF.Builder.CreatePointerCast(
742           ArgVal,
743           CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
744       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
745                                /*IsInitializer=*/true);
746       ArgVal = CtorCGF.EmitLoadOfScalar(
747           CtorCGF.GetAddrOfLocalVar(&Dst),
748           /*Volatile=*/false, CGM.PointerAlignInBytes,
749           CGM.getContext().VoidPtrTy, Dst.getLocation());
750       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
751       CtorCGF.FinishFunction();
752       Ctor = Fn;
753     }
754     if (VD->getType().isDestructedType() != QualType::DK_none) {
755       // Generate function that emits destructor call for the threadprivate copy
756       // of the variable VD
757       CodeGenFunction DtorCGF(CGM);
758       FunctionArgList Args;
759       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
760                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
761       Args.push_back(&Dst);
762 
763       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
764           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
765           /*isVariadic=*/false);
766       auto FTy = CGM.getTypes().GetFunctionType(FI);
767       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
768           FTy, ".__kmpc_global_dtor_.", Loc);
769       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
770                             SourceLocation());
771       auto ArgVal = DtorCGF.EmitLoadOfScalar(
772           DtorCGF.GetAddrOfLocalVar(&Dst),
773           /*Volatile=*/false, CGM.PointerAlignInBytes,
774           CGM.getContext().VoidPtrTy, Dst.getLocation());
775       DtorCGF.emitDestroy(ArgVal, ASTTy,
776                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
777                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
778       DtorCGF.FinishFunction();
779       Dtor = Fn;
780     }
781     // Do not emit init function if it is not required.
782     if (!Ctor && !Dtor)
783       return nullptr;
784 
785     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
786     auto CopyCtorTy =
787         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
788                                 /*isVarArg=*/false)->getPointerTo();
789     // Copying constructor for the threadprivate variable.
790     // Must be NULL - reserved by runtime, but currently it requires that this
791     // parameter is always NULL. Otherwise it fires assertion.
792     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
793     if (Ctor == nullptr) {
794       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
795                                             /*isVarArg=*/false)->getPointerTo();
796       Ctor = llvm::Constant::getNullValue(CtorTy);
797     }
798     if (Dtor == nullptr) {
799       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
800                                             /*isVarArg=*/false)->getPointerTo();
801       Dtor = llvm::Constant::getNullValue(DtorTy);
802     }
803     if (!CGF) {
804       auto InitFunctionTy =
805           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
806       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
807           InitFunctionTy, ".__omp_threadprivate_init_.");
808       CodeGenFunction InitCGF(CGM);
809       FunctionArgList ArgList;
810       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
811                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
812                             Loc);
813       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
814       InitCGF.FinishFunction();
815       return InitFunction;
816     }
817     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
818   }
819   return nullptr;
820 }
821 
822 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
823                                        llvm::Value *OutlinedFn,
824                                        llvm::Value *CapturedStruct) {
825   // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/)
826   llvm::Value *Args[] = {
827       emitUpdateLocation(CGF, Loc),
828       CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument
829       // (there is only one additional argument - 'context')
830       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
831       CGF.EmitCastToVoidPtr(CapturedStruct)};
832   auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
833   CGF.EmitRuntimeCall(RTLFn, Args);
834 }
835 
836 void CGOpenMPRuntime::emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc,
837                                      llvm::Value *OutlinedFn,
838                                      llvm::Value *CapturedStruct) {
839   auto ThreadID = getThreadID(CGF, Loc);
840   // Build calls:
841   // __kmpc_serialized_parallel(&Loc, GTid);
842   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), ThreadID};
843   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
844                       Args);
845 
846   // OutlinedFn(&GTid, &zero, CapturedStruct);
847   auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
848   auto Int32Ty =
849       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
850   auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
851   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
852   llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
853   CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
854 
855   // __kmpc_end_serialized_parallel(&Loc, GTid);
856   llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
857   CGF.EmitRuntimeCall(
858       createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
859 }
860 
861 // If we're inside an (outlined) parallel region, use the region info's
862 // thread-ID variable (it is passed in a first argument of the outlined function
863 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
864 // regular serial code region, get thread ID by calling kmp_int32
865 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
866 // return the address of that temp.
867 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
868                                                   SourceLocation Loc) {
869   if (auto OMPRegionInfo =
870           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
871     if (OMPRegionInfo->getThreadIDVariable())
872       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
873 
874   auto ThreadID = getThreadID(CGF, Loc);
875   auto Int32Ty =
876       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
877   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
878   CGF.EmitStoreOfScalar(ThreadID,
879                         CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
880 
881   return ThreadIDTemp;
882 }
883 
884 llvm::Constant *
885 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
886                                              const llvm::Twine &Name) {
887   SmallString<256> Buffer;
888   llvm::raw_svector_ostream Out(Buffer);
889   Out << Name;
890   auto RuntimeName = Out.str();
891   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
892   if (Elem.second) {
893     assert(Elem.second->getType()->getPointerElementType() == Ty &&
894            "OMP internal variable has different type than requested");
895     return &*Elem.second;
896   }
897 
898   return Elem.second = new llvm::GlobalVariable(
899              CGM.getModule(), Ty, /*IsConstant*/ false,
900              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
901              Elem.first());
902 }
903 
904 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
905   llvm::Twine Name(".gomp_critical_user_", CriticalName);
906   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
907 }
908 
909 void CGOpenMPRuntime::emitCriticalRegion(
910     CodeGenFunction &CGF, StringRef CriticalName,
911     const std::function<void()> &CriticalOpGen, SourceLocation Loc) {
912   auto RegionLock = getCriticalRegionLock(CriticalName);
913   // __kmpc_critical(ident_t *, gtid, Lock);
914   // CriticalOpGen();
915   // __kmpc_end_critical(ident_t *, gtid, Lock);
916   // Prepare arguments and build a call to __kmpc_critical
917   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
918                          RegionLock};
919   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
920   CriticalOpGen();
921   // Build a call to __kmpc_end_critical
922   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
923 }
924 
925 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
926                        const std::function<void()> &BodyOpGen) {
927   llvm::Value *CallBool = CGF.EmitScalarConversion(
928       IfCond,
929       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
930       CGF.getContext().BoolTy);
931 
932   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
933   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
934   // Generate the branch (If-stmt)
935   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
936   CGF.EmitBlock(ThenBlock);
937   BodyOpGen();
938   // Emit the rest of bblocks/branches
939   CGF.EmitBranch(ContBlock);
940   CGF.EmitBlock(ContBlock, true);
941 }
942 
943 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
944                                        const std::function<void()> &MasterOpGen,
945                                        SourceLocation Loc) {
946   // if(__kmpc_master(ident_t *, gtid)) {
947   //   MasterOpGen();
948   //   __kmpc_end_master(ident_t *, gtid);
949   // }
950   // Prepare arguments and build a call to __kmpc_master
951   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
952   auto *IsMaster =
953       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
954   emitIfStmt(CGF, IsMaster, [&]() -> void {
955     MasterOpGen();
956     // Build a call to __kmpc_end_master.
957     // OpenMP [1.2.2 OpenMP Language Terminology]
958     // For C/C++, an executable statement, possibly compound, with a single
959     // entry at the top and a single exit at the bottom, or an OpenMP construct.
960     // * Access to the structured block must not be the result of a branch.
961     // * The point of exit cannot be a branch out of the structured block.
962     // * The point of entry must not be a call to setjmp().
963     // * longjmp() and throw() must not violate the entry/exit criteria.
964     // * An expression statement, iteration statement, selection statement, or
965     // try block is considered to be a structured block if the corresponding
966     // compound statement obtained by enclosing it in { and } would be a
967     // structured block.
968     // It is analyzed in Sema, so we can just call __kmpc_end_master() on
969     // fallthrough rather than pushing a normal cleanup for it.
970     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_master), Args);
971   });
972 }
973 
974 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
975                                         SourceLocation Loc) {
976   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
977   llvm::Value *Args[] = {
978       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
979       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
980   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
981 }
982 
983 static llvm::Value *emitCopyprivateCopyFunction(
984     CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> SrcExprs,
985     ArrayRef<const Expr *> DstExprs, ArrayRef<const Expr *> AssignmentOps) {
986   auto &C = CGM.getContext();
987   // void copy_func(void *LHSArg, void *RHSArg);
988   FunctionArgList Args;
989   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
990                            C.VoidPtrTy);
991   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
992                            C.VoidPtrTy);
993   Args.push_back(&LHSArg);
994   Args.push_back(&RHSArg);
995   FunctionType::ExtInfo EI;
996   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
997       C.VoidTy, Args, EI, /*isVariadic=*/false);
998   auto *Fn = llvm::Function::Create(
999       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1000       ".omp.copyprivate.copy_func", &CGM.getModule());
1001   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
1002   CodeGenFunction CGF(CGM);
1003   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1004   // Dst = (void*[n])(LHSArg);
1005   // Src = (void*[n])(RHSArg);
1006   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1007       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
1008                                     CGF.PointerAlignInBytes),
1009       ArgsType);
1010   auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1011       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
1012                                     CGF.PointerAlignInBytes),
1013       ArgsType);
1014   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1015   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1016   // ...
1017   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1018   CodeGenFunction::OMPPrivateScope Scope(CGF);
1019   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1020     Scope.addPrivate(
1021         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
1022         [&]() -> llvm::Value *{
1023           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1024               CGF.Builder.CreateAlignedLoad(CGF.Builder.CreateStructGEP(RHS, I),
1025                                             CGM.PointerAlignInBytes),
1026               CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1027         });
1028     Scope.addPrivate(
1029         cast<VarDecl>(cast<DeclRefExpr>(DstExprs[I])->getDecl()),
1030         [&]() -> llvm::Value *{
1031           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1032               CGF.Builder.CreateAlignedLoad(CGF.Builder.CreateStructGEP(LHS, I),
1033                                             CGM.PointerAlignInBytes),
1034               CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1035         });
1036   }
1037   Scope.Privatize();
1038   for (auto *E : AssignmentOps) {
1039     CGF.EmitIgnoredExpr(E);
1040   }
1041   Scope.ForceCleanup();
1042   CGF.FinishFunction();
1043   return Fn;
1044 }
1045 
1046 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1047                                        const std::function<void()> &SingleOpGen,
1048                                        SourceLocation Loc,
1049                                        ArrayRef<const Expr *> CopyprivateVars,
1050                                        ArrayRef<const Expr *> SrcExprs,
1051                                        ArrayRef<const Expr *> DstExprs,
1052                                        ArrayRef<const Expr *> AssignmentOps) {
1053   assert(CopyprivateVars.size() == SrcExprs.size() &&
1054          CopyprivateVars.size() == DstExprs.size() &&
1055          CopyprivateVars.size() == AssignmentOps.size());
1056   auto &C = CGM.getContext();
1057   // int32 did_it = 0;
1058   // if(__kmpc_single(ident_t *, gtid)) {
1059   //   SingleOpGen();
1060   //   __kmpc_end_single(ident_t *, gtid);
1061   //   did_it = 1;
1062   // }
1063   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1064   // <copy_func>, did_it);
1065 
1066   llvm::AllocaInst *DidIt = nullptr;
1067   if (!CopyprivateVars.empty()) {
1068     // int32 did_it = 0;
1069     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1070     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1071     CGF.InitTempAlloca(DidIt, CGF.Builder.getInt32(0));
1072   }
1073   // Prepare arguments and build a call to __kmpc_single
1074   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1075   auto *IsSingle =
1076       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1077   emitIfStmt(CGF, IsSingle, [&]() -> void {
1078     SingleOpGen();
1079     if (DidIt) {
1080       // did_it = 1;
1081       CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
1082                                      DidIt->getAlignment());
1083     }
1084     // Build a call to __kmpc_end_single.
1085     // OpenMP [1.2.2 OpenMP Language Terminology]
1086     // For C/C++, an executable statement, possibly compound, with a single
1087     // entry at the top and a single exit at the bottom, or an OpenMP construct.
1088     // * Access to the structured block must not be the result of a branch.
1089     // * The point of exit cannot be a branch out of the structured block.
1090     // * The point of entry must not be a call to setjmp().
1091     // * longjmp() and throw() must not violate the entry/exit criteria.
1092     // * An expression statement, iteration statement, selection statement, or
1093     // try block is considered to be a structured block if the corresponding
1094     // compound statement obtained by enclosing it in { and } would be a
1095     // structured block.
1096     // It is analyzed in Sema, so we can just call __kmpc_end_single() on
1097     // fallthrough rather than pushing a normal cleanup for it.
1098     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_single), Args);
1099   });
1100   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1101   // <copy_func>, did_it);
1102   if (DidIt) {
1103     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1104     auto CopyprivateArrayTy =
1105         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1106                                /*IndexTypeQuals=*/0);
1107     // Create a list of all private variables for copyprivate.
1108     auto *CopyprivateList =
1109         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1110     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1111       auto *Elem = CGF.Builder.CreateStructGEP(CopyprivateList, I);
1112       CGF.Builder.CreateAlignedStore(
1113           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1114               CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
1115           Elem, CGM.PointerAlignInBytes);
1116     }
1117     // Build function that copies private values from single region to all other
1118     // threads in the corresponding parallel region.
1119     auto *CpyFn = emitCopyprivateCopyFunction(
1120         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1121         SrcExprs, DstExprs, AssignmentOps);
1122     auto *BufSize = CGF.Builder.getInt32(
1123         C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
1124     auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1125                                                                CGF.VoidPtrTy);
1126     auto *DidItVal =
1127         CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes);
1128     llvm::Value *Args[] = {
1129         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1130         getThreadID(CGF, Loc),        // i32 <gtid>
1131         BufSize,                      // i32 <buf_size>
1132         CL,                           // void *<copyprivate list>
1133         CpyFn,                        // void (*) (void *, void *) <copy_func>
1134         DidItVal                      // i32 did_it
1135     };
1136     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1137   }
1138 }
1139 
1140 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1141                                       bool IsExplicit) {
1142   // Build call __kmpc_cancel_barrier(loc, thread_id);
1143   auto Flags = static_cast<OpenMPLocationFlags>(
1144       OMP_IDENT_KMPC |
1145       (IsExplicit ? OMP_IDENT_BARRIER_EXPL : OMP_IDENT_BARRIER_IMPL));
1146   // Build call __kmpc_cancel_barrier(loc, thread_id);
1147   // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this
1148   // one provides the same functionality and adds initial support for
1149   // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier()
1150   // is provided default by the runtime library so it safe to make such
1151   // replacement.
1152   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1153                          getThreadID(CGF, Loc)};
1154   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1155 }
1156 
1157 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1158 /// the enum sched_type in kmp.h).
1159 enum OpenMPSchedType {
1160   /// \brief Lower bound for default (unordered) versions.
1161   OMP_sch_lower = 32,
1162   OMP_sch_static_chunked = 33,
1163   OMP_sch_static = 34,
1164   OMP_sch_dynamic_chunked = 35,
1165   OMP_sch_guided_chunked = 36,
1166   OMP_sch_runtime = 37,
1167   OMP_sch_auto = 38,
1168   /// \brief Lower bound for 'ordered' versions.
1169   OMP_ord_lower = 64,
1170   /// \brief Lower bound for 'nomerge' versions.
1171   OMP_nm_lower = 160,
1172 };
1173 
1174 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
1175 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1176                                           bool Chunked) {
1177   switch (ScheduleKind) {
1178   case OMPC_SCHEDULE_static:
1179     return Chunked ? OMP_sch_static_chunked : OMP_sch_static;
1180   case OMPC_SCHEDULE_dynamic:
1181     return OMP_sch_dynamic_chunked;
1182   case OMPC_SCHEDULE_guided:
1183     return OMP_sch_guided_chunked;
1184   case OMPC_SCHEDULE_auto:
1185     return OMP_sch_auto;
1186   case OMPC_SCHEDULE_runtime:
1187     return OMP_sch_runtime;
1188   case OMPC_SCHEDULE_unknown:
1189     assert(!Chunked && "chunk was specified but schedule kind not known");
1190     return OMP_sch_static;
1191   }
1192   llvm_unreachable("Unexpected runtime schedule");
1193 }
1194 
1195 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1196                                          bool Chunked) const {
1197   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
1198   return Schedule == OMP_sch_static;
1199 }
1200 
1201 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1202   auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false);
1203   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1204   return Schedule != OMP_sch_static;
1205 }
1206 
1207 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
1208                                   OpenMPScheduleClauseKind ScheduleKind,
1209                                   unsigned IVSize, bool IVSigned,
1210                                   llvm::Value *IL, llvm::Value *LB,
1211                                   llvm::Value *UB, llvm::Value *ST,
1212                                   llvm::Value *Chunk) {
1213   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
1214   if (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked) {
1215     // Call __kmpc_dispatch_init(
1216     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1217     //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1218     //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1219 
1220     // If the Chunk was not specified in the clause - use default value 1.
1221     if (Chunk == nullptr)
1222       Chunk = CGF.Builder.getIntN(IVSize, 1);
1223     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1224                             getThreadID(CGF, Loc),
1225                             CGF.Builder.getInt32(Schedule), // Schedule type
1226                             CGF.Builder.getIntN(IVSize, 0), // Lower
1227                             UB,                             // Upper
1228                             CGF.Builder.getIntN(IVSize, 1), // Stride
1229                             Chunk                           // Chunk
1230     };
1231     CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1232   } else {
1233     // Call __kmpc_for_static_init(
1234     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1235     //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1236     //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1237     //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1238     if (Chunk == nullptr) {
1239       assert(Schedule == OMP_sch_static &&
1240              "expected static non-chunked schedule");
1241       // If the Chunk was not specified in the clause - use default value 1.
1242       Chunk = CGF.Builder.getIntN(IVSize, 1);
1243     } else
1244       assert(Schedule == OMP_sch_static_chunked &&
1245              "expected static chunked schedule");
1246     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1247                             getThreadID(CGF, Loc),
1248                             CGF.Builder.getInt32(Schedule), // Schedule type
1249                             IL,                             // &isLastIter
1250                             LB,                             // &LB
1251                             UB,                             // &UB
1252                             ST,                             // &Stride
1253                             CGF.Builder.getIntN(IVSize, 1), // Incr
1254                             Chunk                           // Chunk
1255     };
1256     CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1257   }
1258 }
1259 
1260 void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
1261                                     OpenMPScheduleClauseKind ScheduleKind) {
1262   assert((ScheduleKind == OMPC_SCHEDULE_static ||
1263           ScheduleKind == OMPC_SCHEDULE_unknown) &&
1264          "Non-static schedule kinds are not yet implemented");
1265   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1266   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1267                          getThreadID(CGF, Loc)};
1268   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1269                       Args);
1270 }
1271 
1272 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1273                                           SourceLocation Loc, unsigned IVSize,
1274                                           bool IVSigned, llvm::Value *IL,
1275                                           llvm::Value *LB, llvm::Value *UB,
1276                                           llvm::Value *ST) {
1277   // Call __kmpc_dispatch_next(
1278   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1279   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1280   //          kmp_int[32|64] *p_stride);
1281   llvm::Value *Args[] = {
1282       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1283       IL, // &isLastIter
1284       LB, // &Lower
1285       UB, // &Upper
1286       ST  // &Stride
1287   };
1288   llvm::Value *Call =
1289       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1290   return CGF.EmitScalarConversion(
1291       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1292       CGF.getContext().BoolTy);
1293 }
1294 
1295 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1296                                            llvm::Value *NumThreads,
1297                                            SourceLocation Loc) {
1298   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1299   llvm::Value *Args[] = {
1300       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1301       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1302   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1303                       Args);
1304 }
1305 
1306 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1307                                 SourceLocation Loc) {
1308   // Build call void __kmpc_flush(ident_t *loc)
1309   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1310                       emitUpdateLocation(CGF, Loc));
1311 }
1312 
1313 namespace {
1314 /// \brief Indexes of fields for type kmp_task_t.
1315 enum KmpTaskTFields {
1316   /// \brief List of shared variables.
1317   KmpTaskTShareds,
1318   /// \brief Task routine.
1319   KmpTaskTRoutine,
1320   /// \brief Partition id for the untied tasks.
1321   KmpTaskTPartId,
1322   /// \brief Function with call of destructors for private variables.
1323   KmpTaskTDestructors,
1324 };
1325 } // namespace
1326 
1327 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
1328   if (!KmpRoutineEntryPtrTy) {
1329     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
1330     auto &C = CGM.getContext();
1331     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
1332     FunctionProtoType::ExtProtoInfo EPI;
1333     KmpRoutineEntryPtrQTy = C.getPointerType(
1334         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
1335     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
1336   }
1337 }
1338 
1339 static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1340                                  QualType FieldTy) {
1341   auto *Field = FieldDecl::Create(
1342       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1343       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1344       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1345   Field->setAccess(AS_public);
1346   DC->addDecl(Field);
1347 }
1348 
1349 static QualType createKmpTaskTRecordDecl(CodeGenModule &CGM,
1350                                          QualType KmpInt32Ty,
1351                                          QualType KmpRoutineEntryPointerQTy) {
1352   auto &C = CGM.getContext();
1353   // Build struct kmp_task_t {
1354   //         void *              shareds;
1355   //         kmp_routine_entry_t routine;
1356   //         kmp_int32           part_id;
1357   //         kmp_routine_entry_t destructors;
1358   //         /*  private vars  */
1359   //       };
1360   auto *RD = C.buildImplicitRecord("kmp_task_t");
1361   RD->startDefinition();
1362   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1363   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1364   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1365   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1366   // TODO: add private fields.
1367   RD->completeDefinition();
1368   return C.getRecordType(RD);
1369 }
1370 
1371 /// \brief Emit a proxy function which accepts kmp_task_t as the second
1372 /// argument.
1373 /// \code
1374 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
1375 ///   TaskFunction(gtid, tt->part_id, tt->shareds);
1376 ///   return 0;
1377 /// }
1378 /// \endcode
1379 static llvm::Value *
1380 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
1381                       QualType KmpInt32Ty, QualType KmpTaskTPtrQTy,
1382                       QualType SharedsPtrTy, llvm::Value *TaskFunction) {
1383   auto &C = CGM.getContext();
1384   FunctionArgList Args;
1385   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1386   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1387                                 /*Id=*/nullptr, KmpTaskTPtrQTy);
1388   Args.push_back(&GtidArg);
1389   Args.push_back(&TaskTypeArg);
1390   FunctionType::ExtInfo Info;
1391   auto &TaskEntryFnInfo =
1392       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1393                                                     /*isVariadic=*/false);
1394   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
1395   auto *TaskEntry =
1396       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
1397                              ".omp_task_entry.", &CGM.getModule());
1398   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
1399   CodeGenFunction CGF(CGM);
1400   CGF.disableDebugInfo();
1401   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
1402 
1403   // TaskFunction(gtid, tt->part_id, tt->shareds);
1404   auto *GtidParam = CGF.EmitLoadOfScalar(
1405       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
1406       C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
1407   auto TaskTypeArgAddr = CGF.EmitLoadOfScalar(
1408       CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false,
1409       CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc);
1410   auto *PartidPtr = CGF.Builder.CreateStructGEP(TaskTypeArgAddr,
1411                                                 /*Idx=*/KmpTaskTPartId);
1412   auto *PartidParam = CGF.EmitLoadOfScalar(
1413       PartidPtr, /*Volatile=*/false,
1414       C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
1415   auto *SharedsPtr = CGF.Builder.CreateStructGEP(TaskTypeArgAddr,
1416                                                  /*Idx=*/KmpTaskTShareds);
1417   auto *SharedsParam =
1418       CGF.EmitLoadOfScalar(SharedsPtr, /*Volatile=*/false,
1419                            CGM.PointerAlignInBytes, C.VoidPtrTy, Loc);
1420   llvm::Value *CallArgs[] = {
1421       GtidParam, PartidParam,
1422       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1423           SharedsParam, CGF.ConvertTypeForMem(SharedsPtrTy))};
1424   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
1425   CGF.EmitStoreThroughLValue(
1426       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
1427       CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
1428   CGF.FinishFunction();
1429   return TaskEntry;
1430 }
1431 
1432 void CGOpenMPRuntime::emitTaskCall(
1433     CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
1434     llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
1435     llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) {
1436   auto &C = CGM.getContext();
1437   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1438   // Build type kmp_routine_entry_t (if not built yet).
1439   emitKmpRoutineEntryT(KmpInt32Ty);
1440   // Build particular struct kmp_task_t for the given task.
1441   auto KmpTaskQTy =
1442       createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy);
1443   QualType KmpTaskTPtrQTy = C.getPointerType(KmpTaskQTy);
1444   auto KmpTaskTPtrTy = CGF.ConvertType(KmpTaskQTy)->getPointerTo();
1445   auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy));
1446   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
1447 
1448   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
1449   // kmp_task_t *tt);
1450   auto *TaskEntry = emitProxyTaskFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy,
1451                                           SharedsPtrTy, TaskFunction);
1452 
1453   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1454   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1455   // kmp_routine_entry_t *task_entry);
1456   // Task flags. Format is taken from
1457   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
1458   // description of kmp_tasking_flags struct.
1459   const unsigned TiedFlag = 0x1;
1460   const unsigned FinalFlag = 0x2;
1461   unsigned Flags = Tied ? TiedFlag : 0;
1462   auto *TaskFlags =
1463       Final.getPointer()
1464           ? CGF.Builder.CreateSelect(Final.getPointer(),
1465                                      CGF.Builder.getInt32(FinalFlag),
1466                                      CGF.Builder.getInt32(/*C=*/0))
1467           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
1468   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
1469   auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
1470   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
1471                               getThreadID(CGF, Loc), TaskFlags, KmpTaskTySize,
1472                               CGM.getSize(SharedsSize),
1473                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1474                                   TaskEntry, KmpRoutineEntryPtrTy)};
1475   auto *NewTask = CGF.EmitRuntimeCall(
1476       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
1477   auto *NewTaskNewTaskTTy =
1478       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NewTask, KmpTaskTPtrTy);
1479   // Fill the data in the resulting kmp_task_t record.
1480   // Copy shareds if there are any.
1481   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty())
1482     CGF.EmitAggregateCopy(
1483         CGF.EmitLoadOfScalar(
1484             CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy,
1485                                         /*Idx=*/KmpTaskTShareds),
1486             /*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy, Loc),
1487         Shareds, SharedsTy);
1488   // TODO: generate function with destructors for privates.
1489   // Provide pointer to function with destructors for privates.
1490   CGF.Builder.CreateAlignedStore(
1491       llvm::ConstantPointerNull::get(
1492           cast<llvm::PointerType>(KmpRoutineEntryPtrTy)),
1493       CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy,
1494                                   /*Idx=*/KmpTaskTDestructors),
1495       CGM.PointerAlignInBytes);
1496 
1497   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
1498   // libcall.
1499   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1500   // *new_task);
1501   llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc),
1502                              getThreadID(CGF, Loc), NewTask};
1503   // TODO: add check for untied tasks.
1504   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1505 }
1506 
1507 InlinedOpenMPRegionRAII::InlinedOpenMPRegionRAII(
1508     CodeGenFunction &CGF, const OMPExecutableDirective &D)
1509     : CGF(CGF) {
1510   CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(D, CGF.CapturedStmtInfo);
1511   // 1.2.2 OpenMP Language Terminology
1512   // Structured block - An executable statement with a single entry at the
1513   // top and a single exit at the bottom.
1514   // The point of exit cannot be a branch out of the structured block.
1515   // longjmp() and throw() must not violate the entry/exit criteria.
1516   CGF.EHStack.pushTerminate();
1517 }
1518 
1519 InlinedOpenMPRegionRAII::~InlinedOpenMPRegionRAII() {
1520   CGF.EHStack.popTerminate();
1521   auto *OldCSI =
1522       cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
1523   delete CGF.CapturedStmtInfo;
1524   CGF.CapturedStmtInfo = OldCSI;
1525 }
1526 
1527