1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CGCleanup.h"
17 #include "clang/AST/Decl.h"
18 #include "clang/AST/StmtOpenMP.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cassert>
26 
27 using namespace clang;
28 using namespace CodeGen;
29 
30 namespace {
31 /// \brief Base class for handling code generation inside OpenMP regions.
32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
33 public:
34   /// \brief Kinds of OpenMP regions used in codegen.
35   enum CGOpenMPRegionKind {
36     /// \brief Region with outlined function for standalone 'parallel'
37     /// directive.
38     ParallelOutlinedRegion,
39     /// \brief Region with outlined function for standalone 'task' directive.
40     TaskOutlinedRegion,
41     /// \brief Region for constructs that do not require function outlining,
42     /// like 'for', 'sections', 'atomic' etc. directives.
43     InlinedRegion,
44   };
45 
46   CGOpenMPRegionInfo(const CapturedStmt &CS,
47                      const CGOpenMPRegionKind RegionKind,
48                      const RegionCodeGenTy &CodeGen)
49       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
50         CodeGen(CodeGen) {}
51 
52   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
53                      const RegionCodeGenTy &CodeGen)
54       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind),
55         CodeGen(CodeGen) {}
56 
57   /// \brief Get a variable or parameter for storing global thread id
58   /// inside OpenMP construct.
59   virtual const VarDecl *getThreadIDVariable() const = 0;
60 
61   /// \brief Emit the captured statement body.
62   virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
63 
64   /// \brief Get an LValue for the current ThreadID variable.
65   /// \return LValue for thread id variable. This LValue always has type int32*.
66   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
67 
68   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
69 
70   static bool classof(const CGCapturedStmtInfo *Info) {
71     return Info->getKind() == CR_OpenMP;
72   }
73 
74 protected:
75   CGOpenMPRegionKind RegionKind;
76   const RegionCodeGenTy &CodeGen;
77 };
78 
79 /// \brief API for captured statement code generation in OpenMP constructs.
80 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
81 public:
82   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
83                              const RegionCodeGenTy &CodeGen)
84       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen),
85         ThreadIDVar(ThreadIDVar) {
86     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
87   }
88   /// \brief Get a variable or parameter for storing global thread id
89   /// inside OpenMP construct.
90   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
91 
92   /// \brief Get the name of the capture helper.
93   StringRef getHelperName() const override { return ".omp_outlined."; }
94 
95   static bool classof(const CGCapturedStmtInfo *Info) {
96     return CGOpenMPRegionInfo::classof(Info) &&
97            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
98                ParallelOutlinedRegion;
99   }
100 
101 private:
102   /// \brief A variable or parameter storing global thread id for OpenMP
103   /// constructs.
104   const VarDecl *ThreadIDVar;
105 };
106 
107 /// \brief API for captured statement code generation in OpenMP constructs.
108 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
109 public:
110   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
111                                  const VarDecl *ThreadIDVar,
112                                  const RegionCodeGenTy &CodeGen)
113       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen),
114         ThreadIDVar(ThreadIDVar) {
115     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
116   }
117   /// \brief Get a variable or parameter for storing global thread id
118   /// inside OpenMP construct.
119   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
120 
121   /// \brief Get an LValue for the current ThreadID variable.
122   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
123 
124   /// \brief Get the name of the capture helper.
125   StringRef getHelperName() const override { return ".omp_outlined."; }
126 
127   static bool classof(const CGCapturedStmtInfo *Info) {
128     return CGOpenMPRegionInfo::classof(Info) &&
129            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
130                TaskOutlinedRegion;
131   }
132 
133 private:
134   /// \brief A variable or parameter storing global thread id for OpenMP
135   /// constructs.
136   const VarDecl *ThreadIDVar;
137 };
138 
139 /// \brief API for inlined captured statement code generation in OpenMP
140 /// constructs.
141 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
142 public:
143   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
144                             const RegionCodeGenTy &CodeGen)
145       : CGOpenMPRegionInfo(InlinedRegion, CodeGen), OldCSI(OldCSI),
146         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
147   // \brief Retrieve the value of the context parameter.
148   llvm::Value *getContextValue() const override {
149     if (OuterRegionInfo)
150       return OuterRegionInfo->getContextValue();
151     llvm_unreachable("No context value for inlined OpenMP region");
152   }
153   virtual void setContextValue(llvm::Value *V) override {
154     if (OuterRegionInfo) {
155       OuterRegionInfo->setContextValue(V);
156       return;
157     }
158     llvm_unreachable("No context value for inlined OpenMP region");
159   }
160   /// \brief Lookup the captured field decl for a variable.
161   const FieldDecl *lookup(const VarDecl *VD) const override {
162     if (OuterRegionInfo)
163       return OuterRegionInfo->lookup(VD);
164     // If there is no outer outlined region,no need to lookup in a list of
165     // captured variables, we can use the original one.
166     return nullptr;
167   }
168   FieldDecl *getThisFieldDecl() const override {
169     if (OuterRegionInfo)
170       return OuterRegionInfo->getThisFieldDecl();
171     return nullptr;
172   }
173   /// \brief Get a variable or parameter for storing global thread id
174   /// inside OpenMP construct.
175   const VarDecl *getThreadIDVariable() const override {
176     if (OuterRegionInfo)
177       return OuterRegionInfo->getThreadIDVariable();
178     return nullptr;
179   }
180 
181   /// \brief Get the name of the capture helper.
182   StringRef getHelperName() const override {
183     if (auto *OuterRegionInfo = getOldCSI())
184       return OuterRegionInfo->getHelperName();
185     llvm_unreachable("No helper name for inlined OpenMP construct");
186   }
187 
188   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
189 
190   static bool classof(const CGCapturedStmtInfo *Info) {
191     return CGOpenMPRegionInfo::classof(Info) &&
192            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
193   }
194 
195 private:
196   /// \brief CodeGen info about outer OpenMP region.
197   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
198   CGOpenMPRegionInfo *OuterRegionInfo;
199 };
200 
201 /// \brief RAII for emitting code of OpenMP constructs.
202 class InlinedOpenMPRegionRAII {
203   CodeGenFunction &CGF;
204 
205 public:
206   /// \brief Constructs region for combined constructs.
207   /// \param CodeGen Code generation sequence for combined directives. Includes
208   /// a list of functions used for code generation of implicitly inlined
209   /// regions.
210   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen)
211       : CGF(CGF) {
212     // Start emission for the construct.
213     CGF.CapturedStmtInfo =
214         new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen);
215   }
216   ~InlinedOpenMPRegionRAII() {
217     // Restore original CapturedStmtInfo only if we're done with code emission.
218     auto *OldCSI =
219         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
220     delete CGF.CapturedStmtInfo;
221     CGF.CapturedStmtInfo = OldCSI;
222   }
223 };
224 
225 } // namespace
226 
227 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
228   return CGF.MakeNaturalAlignAddrLValue(
229       CGF.Builder.CreateAlignedLoad(
230           CGF.GetAddrOfLocalVar(getThreadIDVariable()),
231           CGF.PointerAlignInBytes),
232       getThreadIDVariable()
233           ->getType()
234           ->castAs<PointerType>()
235           ->getPointeeType());
236 }
237 
238 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
239   // 1.2.2 OpenMP Language Terminology
240   // Structured block - An executable statement with a single entry at the
241   // top and a single exit at the bottom.
242   // The point of exit cannot be a branch out of the structured block.
243   // longjmp() and throw() must not violate the entry/exit criteria.
244   CGF.EHStack.pushTerminate();
245   {
246     CodeGenFunction::RunCleanupsScope Scope(CGF);
247     CodeGen(CGF);
248   }
249   CGF.EHStack.popTerminate();
250 }
251 
252 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
253     CodeGenFunction &CGF) {
254   return CGF.MakeNaturalAlignAddrLValue(
255       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
256       getThreadIDVariable()->getType());
257 }
258 
259 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
260     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
261   IdentTy = llvm::StructType::create(
262       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
263       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
264       CGM.Int8PtrTy /* psource */, nullptr);
265   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
266   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
267                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
268   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
269   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
270 }
271 
272 void CGOpenMPRuntime::clear() {
273   InternalVars.clear();
274 }
275 
276 llvm::Value *
277 CGOpenMPRuntime::emitParallelOutlinedFunction(const OMPExecutableDirective &D,
278                                               const VarDecl *ThreadIDVar,
279                                               const RegionCodeGenTy &CodeGen) {
280   assert(ThreadIDVar->getType()->isPointerType() &&
281          "thread id variable must be of type kmp_int32 *");
282   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
283   CodeGenFunction CGF(CGM, true);
284   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
285   CGF.CapturedStmtInfo = &CGInfo;
286   return CGF.GenerateCapturedStmtFunction(*CS);
287 }
288 
289 llvm::Value *
290 CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D,
291                                           const VarDecl *ThreadIDVar,
292                                           const RegionCodeGenTy &CodeGen) {
293   assert(!ThreadIDVar->getType()->isPointerType() &&
294          "thread id variable must be of type kmp_int32 for tasks");
295   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
296   CodeGenFunction CGF(CGM, true);
297   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
298   CGF.CapturedStmtInfo = &CGInfo;
299   return CGF.GenerateCapturedStmtFunction(*CS);
300 }
301 
302 llvm::Value *
303 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
304   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
305   if (!Entry) {
306     if (!DefaultOpenMPPSource) {
307       // Initialize default location for psource field of ident_t structure of
308       // all ident_t objects. Format is ";file;function;line;column;;".
309       // Taken from
310       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
311       DefaultOpenMPPSource =
312           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
313       DefaultOpenMPPSource =
314           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
315     }
316     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
317         CGM.getModule(), IdentTy, /*isConstant*/ true,
318         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
319     DefaultOpenMPLocation->setUnnamedAddr(true);
320 
321     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
322     llvm::Constant *Values[] = {Zero,
323                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
324                                 Zero, Zero, DefaultOpenMPPSource};
325     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
326     DefaultOpenMPLocation->setInitializer(Init);
327     OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
328     return DefaultOpenMPLocation;
329   }
330   return Entry;
331 }
332 
333 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
334                                                  SourceLocation Loc,
335                                                  OpenMPLocationFlags Flags) {
336   // If no debug info is generated - return global default location.
337   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
338       Loc.isInvalid())
339     return getOrCreateDefaultLocation(Flags);
340 
341   assert(CGF.CurFn && "No function in current CodeGenFunction.");
342 
343   llvm::Value *LocValue = nullptr;
344   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
345   if (I != OpenMPLocThreadIDMap.end())
346     LocValue = I->second.DebugLoc;
347   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
348   // GetOpenMPThreadID was called before this routine.
349   if (LocValue == nullptr) {
350     // Generate "ident_t .kmpc_loc.addr;"
351     llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
352     AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
353     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
354     Elem.second.DebugLoc = AI;
355     LocValue = AI;
356 
357     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
358     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
359     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
360                              llvm::ConstantExpr::getSizeOf(IdentTy),
361                              CGM.PointerAlignInBytes);
362   }
363 
364   // char **psource = &.kmpc_loc_<flags>.addr.psource;
365   auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0,
366                                                          IdentField_PSource);
367 
368   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
369   if (OMPDebugLoc == nullptr) {
370     SmallString<128> Buffer2;
371     llvm::raw_svector_ostream OS2(Buffer2);
372     // Build debug location
373     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
374     OS2 << ";" << PLoc.getFilename() << ";";
375     if (const FunctionDecl *FD =
376             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
377       OS2 << FD->getQualifiedNameAsString();
378     }
379     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
380     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
381     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
382   }
383   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
384   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
385 
386   return LocValue;
387 }
388 
389 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
390                                           SourceLocation Loc) {
391   assert(CGF.CurFn && "No function in current CodeGenFunction.");
392 
393   llvm::Value *ThreadID = nullptr;
394   // Check whether we've already cached a load of the thread id in this
395   // function.
396   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
397   if (I != OpenMPLocThreadIDMap.end()) {
398     ThreadID = I->second.ThreadID;
399     if (ThreadID != nullptr)
400       return ThreadID;
401   }
402   if (auto OMPRegionInfo =
403           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
404     if (OMPRegionInfo->getThreadIDVariable()) {
405       // Check if this an outlined function with thread id passed as argument.
406       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
407       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
408       // If value loaded in entry block, cache it and use it everywhere in
409       // function.
410       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
411         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
412         Elem.second.ThreadID = ThreadID;
413       }
414       return ThreadID;
415     }
416   }
417 
418   // This is not an outlined function region - need to call __kmpc_int32
419   // kmpc_global_thread_num(ident_t *loc).
420   // Generate thread id value and cache this value for use across the
421   // function.
422   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
423   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
424   ThreadID =
425       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
426                           emitUpdateLocation(CGF, Loc));
427   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
428   Elem.second.ThreadID = ThreadID;
429   return ThreadID;
430 }
431 
432 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
433   assert(CGF.CurFn && "No function in current CodeGenFunction.");
434   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
435     OpenMPLocThreadIDMap.erase(CGF.CurFn);
436 }
437 
438 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
439   return llvm::PointerType::getUnqual(IdentTy);
440 }
441 
442 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
443   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
444 }
445 
446 llvm::Constant *
447 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
448   llvm::Constant *RTLFn = nullptr;
449   switch (Function) {
450   case OMPRTL__kmpc_fork_call: {
451     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
452     // microtask, ...);
453     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
454                                 getKmpc_MicroPointerTy()};
455     llvm::FunctionType *FnTy =
456         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
457     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
458     break;
459   }
460   case OMPRTL__kmpc_global_thread_num: {
461     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
462     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
463     llvm::FunctionType *FnTy =
464         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
465     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
466     break;
467   }
468   case OMPRTL__kmpc_threadprivate_cached: {
469     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
470     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
471     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
472                                 CGM.VoidPtrTy, CGM.SizeTy,
473                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
474     llvm::FunctionType *FnTy =
475         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
476     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
477     break;
478   }
479   case OMPRTL__kmpc_critical: {
480     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
481     // kmp_critical_name *crit);
482     llvm::Type *TypeParams[] = {
483         getIdentTyPointerTy(), CGM.Int32Ty,
484         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
485     llvm::FunctionType *FnTy =
486         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
487     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
488     break;
489   }
490   case OMPRTL__kmpc_threadprivate_register: {
491     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
492     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
493     // typedef void *(*kmpc_ctor)(void *);
494     auto KmpcCtorTy =
495         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
496                                 /*isVarArg*/ false)->getPointerTo();
497     // typedef void *(*kmpc_cctor)(void *, void *);
498     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
499     auto KmpcCopyCtorTy =
500         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
501                                 /*isVarArg*/ false)->getPointerTo();
502     // typedef void (*kmpc_dtor)(void *);
503     auto KmpcDtorTy =
504         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
505             ->getPointerTo();
506     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
507                               KmpcCopyCtorTy, KmpcDtorTy};
508     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
509                                         /*isVarArg*/ false);
510     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
511     break;
512   }
513   case OMPRTL__kmpc_end_critical: {
514     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
515     // kmp_critical_name *crit);
516     llvm::Type *TypeParams[] = {
517         getIdentTyPointerTy(), CGM.Int32Ty,
518         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
519     llvm::FunctionType *FnTy =
520         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
521     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
522     break;
523   }
524   case OMPRTL__kmpc_cancel_barrier: {
525     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
526     // global_tid);
527     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
528     llvm::FunctionType *FnTy =
529         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
530     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
531     break;
532   }
533   case OMPRTL__kmpc_for_static_fini: {
534     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
535     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
536     llvm::FunctionType *FnTy =
537         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
538     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
539     break;
540   }
541   case OMPRTL__kmpc_push_num_threads: {
542     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
543     // kmp_int32 num_threads)
544     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
545                                 CGM.Int32Ty};
546     llvm::FunctionType *FnTy =
547         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
548     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
549     break;
550   }
551   case OMPRTL__kmpc_serialized_parallel: {
552     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
553     // global_tid);
554     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
555     llvm::FunctionType *FnTy =
556         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
557     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
558     break;
559   }
560   case OMPRTL__kmpc_end_serialized_parallel: {
561     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
562     // global_tid);
563     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
564     llvm::FunctionType *FnTy =
565         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
566     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
567     break;
568   }
569   case OMPRTL__kmpc_flush: {
570     // Build void __kmpc_flush(ident_t *loc);
571     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
572     llvm::FunctionType *FnTy =
573         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
574     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
575     break;
576   }
577   case OMPRTL__kmpc_master: {
578     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
579     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
580     llvm::FunctionType *FnTy =
581         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
582     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
583     break;
584   }
585   case OMPRTL__kmpc_end_master: {
586     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
587     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
588     llvm::FunctionType *FnTy =
589         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
590     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
591     break;
592   }
593   case OMPRTL__kmpc_omp_taskyield: {
594     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
595     // int end_part);
596     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
597     llvm::FunctionType *FnTy =
598         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
599     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
600     break;
601   }
602   case OMPRTL__kmpc_single: {
603     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
604     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
605     llvm::FunctionType *FnTy =
606         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
607     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
608     break;
609   }
610   case OMPRTL__kmpc_end_single: {
611     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
612     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
613     llvm::FunctionType *FnTy =
614         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
615     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
616     break;
617   }
618   case OMPRTL__kmpc_omp_task_alloc: {
619     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
620     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
621     // kmp_routine_entry_t *task_entry);
622     assert(KmpRoutineEntryPtrTy != nullptr &&
623            "Type kmp_routine_entry_t must be created.");
624     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
625                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
626     // Return void * and then cast to particular kmp_task_t type.
627     llvm::FunctionType *FnTy =
628         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
629     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
630     break;
631   }
632   case OMPRTL__kmpc_omp_task: {
633     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
634     // *new_task);
635     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
636                                 CGM.VoidPtrTy};
637     llvm::FunctionType *FnTy =
638         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
639     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
640     break;
641   }
642   case OMPRTL__kmpc_copyprivate: {
643     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
644     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
645     // kmp_int32 didit);
646     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
647     auto *CpyFnTy =
648         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
649     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
650                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
651                                 CGM.Int32Ty};
652     llvm::FunctionType *FnTy =
653         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
654     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
655     break;
656   }
657   case OMPRTL__kmpc_reduce: {
658     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
659     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
660     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
661     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
662     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
663                                                /*isVarArg=*/false);
664     llvm::Type *TypeParams[] = {
665         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
666         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
667         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
668     llvm::FunctionType *FnTy =
669         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
670     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
671     break;
672   }
673   case OMPRTL__kmpc_reduce_nowait: {
674     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
675     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
676     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
677     // *lck);
678     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
679     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
680                                                /*isVarArg=*/false);
681     llvm::Type *TypeParams[] = {
682         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
683         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
684         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
685     llvm::FunctionType *FnTy =
686         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
687     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
688     break;
689   }
690   case OMPRTL__kmpc_end_reduce: {
691     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
692     // kmp_critical_name *lck);
693     llvm::Type *TypeParams[] = {
694         getIdentTyPointerTy(), CGM.Int32Ty,
695         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
696     llvm::FunctionType *FnTy =
697         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
698     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
699     break;
700   }
701   case OMPRTL__kmpc_end_reduce_nowait: {
702     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
703     // kmp_critical_name *lck);
704     llvm::Type *TypeParams[] = {
705         getIdentTyPointerTy(), CGM.Int32Ty,
706         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
707     llvm::FunctionType *FnTy =
708         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
709     RTLFn =
710         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
711     break;
712   }
713   case OMPRTL__kmpc_omp_task_begin_if0: {
714     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
715     // *new_task);
716     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
717                                 CGM.VoidPtrTy};
718     llvm::FunctionType *FnTy =
719         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
720     RTLFn =
721         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
722     break;
723   }
724   case OMPRTL__kmpc_omp_task_complete_if0: {
725     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
726     // *new_task);
727     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
728                                 CGM.VoidPtrTy};
729     llvm::FunctionType *FnTy =
730         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
731     RTLFn = CGM.CreateRuntimeFunction(FnTy,
732                                       /*Name=*/"__kmpc_omp_task_complete_if0");
733     break;
734   }
735   case OMPRTL__kmpc_ordered: {
736     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
737     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
738     llvm::FunctionType *FnTy =
739         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
740     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
741     break;
742   }
743   case OMPRTL__kmpc_end_ordered: {
744     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
745     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
746     llvm::FunctionType *FnTy =
747         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
748     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
749     break;
750   }
751   case OMPRTL__kmpc_omp_taskwait: {
752     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
753     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
754     llvm::FunctionType *FnTy =
755         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
756     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
757     break;
758   }
759   }
760   return RTLFn;
761 }
762 
763 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
764                                                              bool IVSigned) {
765   assert((IVSize == 32 || IVSize == 64) &&
766          "IV size is not compatible with the omp runtime");
767   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
768                                        : "__kmpc_for_static_init_4u")
769                            : (IVSigned ? "__kmpc_for_static_init_8"
770                                        : "__kmpc_for_static_init_8u");
771   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
772   auto PtrTy = llvm::PointerType::getUnqual(ITy);
773   llvm::Type *TypeParams[] = {
774     getIdentTyPointerTy(),                     // loc
775     CGM.Int32Ty,                               // tid
776     CGM.Int32Ty,                               // schedtype
777     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
778     PtrTy,                                     // p_lower
779     PtrTy,                                     // p_upper
780     PtrTy,                                     // p_stride
781     ITy,                                       // incr
782     ITy                                        // chunk
783   };
784   llvm::FunctionType *FnTy =
785       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
786   return CGM.CreateRuntimeFunction(FnTy, Name);
787 }
788 
789 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
790                                                             bool IVSigned) {
791   assert((IVSize == 32 || IVSize == 64) &&
792          "IV size is not compatible with the omp runtime");
793   auto Name =
794       IVSize == 32
795           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
796           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
797   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
798   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
799                                CGM.Int32Ty,           // tid
800                                CGM.Int32Ty,           // schedtype
801                                ITy,                   // lower
802                                ITy,                   // upper
803                                ITy,                   // stride
804                                ITy                    // chunk
805   };
806   llvm::FunctionType *FnTy =
807       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
808   return CGM.CreateRuntimeFunction(FnTy, Name);
809 }
810 
811 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
812                                                             bool IVSigned) {
813   assert((IVSize == 32 || IVSize == 64) &&
814          "IV size is not compatible with the omp runtime");
815   auto Name =
816       IVSize == 32
817           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
818           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
819   llvm::Type *TypeParams[] = {
820       getIdentTyPointerTy(), // loc
821       CGM.Int32Ty,           // tid
822   };
823   llvm::FunctionType *FnTy =
824       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
825   return CGM.CreateRuntimeFunction(FnTy, Name);
826 }
827 
828 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
829                                                             bool IVSigned) {
830   assert((IVSize == 32 || IVSize == 64) &&
831          "IV size is not compatible with the omp runtime");
832   auto Name =
833       IVSize == 32
834           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
835           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
836   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
837   auto PtrTy = llvm::PointerType::getUnqual(ITy);
838   llvm::Type *TypeParams[] = {
839     getIdentTyPointerTy(),                     // loc
840     CGM.Int32Ty,                               // tid
841     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
842     PtrTy,                                     // p_lower
843     PtrTy,                                     // p_upper
844     PtrTy                                      // p_stride
845   };
846   llvm::FunctionType *FnTy =
847       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
848   return CGM.CreateRuntimeFunction(FnTy, Name);
849 }
850 
851 llvm::Constant *
852 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
853   // Lookup the entry, lazily creating it if necessary.
854   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
855                                      Twine(CGM.getMangledName(VD)) + ".cache.");
856 }
857 
858 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
859                                                      const VarDecl *VD,
860                                                      llvm::Value *VDAddr,
861                                                      SourceLocation Loc) {
862   auto VarTy = VDAddr->getType()->getPointerElementType();
863   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
864                          CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
865                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
866                          getOrCreateThreadPrivateCache(VD)};
867   return CGF.EmitRuntimeCall(
868       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
869 }
870 
871 void CGOpenMPRuntime::emitThreadPrivateVarInit(
872     CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
873     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
874   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
875   // library.
876   auto OMPLoc = emitUpdateLocation(CGF, Loc);
877   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
878                       OMPLoc);
879   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
880   // to register constructor/destructor for variable.
881   llvm::Value *Args[] = {OMPLoc,
882                          CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
883                          Ctor, CopyCtor, Dtor};
884   CGF.EmitRuntimeCall(
885       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
886 }
887 
888 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
889     const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
890     bool PerformInit, CodeGenFunction *CGF) {
891   VD = VD->getDefinition(CGM.getContext());
892   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
893     ThreadPrivateWithDefinition.insert(VD);
894     QualType ASTTy = VD->getType();
895 
896     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
897     auto Init = VD->getAnyInitializer();
898     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
899       // Generate function that re-emits the declaration's initializer into the
900       // threadprivate copy of the variable VD
901       CodeGenFunction CtorCGF(CGM);
902       FunctionArgList Args;
903       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
904                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
905       Args.push_back(&Dst);
906 
907       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
908           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
909           /*isVariadic=*/false);
910       auto FTy = CGM.getTypes().GetFunctionType(FI);
911       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
912           FTy, ".__kmpc_global_ctor_.", Loc);
913       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
914                             Args, SourceLocation());
915       auto ArgVal = CtorCGF.EmitLoadOfScalar(
916           CtorCGF.GetAddrOfLocalVar(&Dst),
917           /*Volatile=*/false, CGM.PointerAlignInBytes,
918           CGM.getContext().VoidPtrTy, Dst.getLocation());
919       auto Arg = CtorCGF.Builder.CreatePointerCast(
920           ArgVal,
921           CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
922       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
923                                /*IsInitializer=*/true);
924       ArgVal = CtorCGF.EmitLoadOfScalar(
925           CtorCGF.GetAddrOfLocalVar(&Dst),
926           /*Volatile=*/false, CGM.PointerAlignInBytes,
927           CGM.getContext().VoidPtrTy, Dst.getLocation());
928       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
929       CtorCGF.FinishFunction();
930       Ctor = Fn;
931     }
932     if (VD->getType().isDestructedType() != QualType::DK_none) {
933       // Generate function that emits destructor call for the threadprivate copy
934       // of the variable VD
935       CodeGenFunction DtorCGF(CGM);
936       FunctionArgList Args;
937       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
938                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
939       Args.push_back(&Dst);
940 
941       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
942           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
943           /*isVariadic=*/false);
944       auto FTy = CGM.getTypes().GetFunctionType(FI);
945       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
946           FTy, ".__kmpc_global_dtor_.", Loc);
947       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
948                             SourceLocation());
949       auto ArgVal = DtorCGF.EmitLoadOfScalar(
950           DtorCGF.GetAddrOfLocalVar(&Dst),
951           /*Volatile=*/false, CGM.PointerAlignInBytes,
952           CGM.getContext().VoidPtrTy, Dst.getLocation());
953       DtorCGF.emitDestroy(ArgVal, ASTTy,
954                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
955                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
956       DtorCGF.FinishFunction();
957       Dtor = Fn;
958     }
959     // Do not emit init function if it is not required.
960     if (!Ctor && !Dtor)
961       return nullptr;
962 
963     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
964     auto CopyCtorTy =
965         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
966                                 /*isVarArg=*/false)->getPointerTo();
967     // Copying constructor for the threadprivate variable.
968     // Must be NULL - reserved by runtime, but currently it requires that this
969     // parameter is always NULL. Otherwise it fires assertion.
970     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
971     if (Ctor == nullptr) {
972       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
973                                             /*isVarArg=*/false)->getPointerTo();
974       Ctor = llvm::Constant::getNullValue(CtorTy);
975     }
976     if (Dtor == nullptr) {
977       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
978                                             /*isVarArg=*/false)->getPointerTo();
979       Dtor = llvm::Constant::getNullValue(DtorTy);
980     }
981     if (!CGF) {
982       auto InitFunctionTy =
983           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
984       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
985           InitFunctionTy, ".__omp_threadprivate_init_.");
986       CodeGenFunction InitCGF(CGM);
987       FunctionArgList ArgList;
988       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
989                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
990                             Loc);
991       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
992       InitCGF.FinishFunction();
993       return InitFunction;
994     }
995     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
996   }
997   return nullptr;
998 }
999 
1000 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1001 /// function. Here is the logic:
1002 /// if (Cond) {
1003 ///   ThenGen();
1004 /// } else {
1005 ///   ElseGen();
1006 /// }
1007 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1008                             const RegionCodeGenTy &ThenGen,
1009                             const RegionCodeGenTy &ElseGen) {
1010   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1011 
1012   // If the condition constant folds and can be elided, try to avoid emitting
1013   // the condition and the dead arm of the if/else.
1014   bool CondConstant;
1015   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1016     CodeGenFunction::RunCleanupsScope Scope(CGF);
1017     if (CondConstant) {
1018       ThenGen(CGF);
1019     } else {
1020       ElseGen(CGF);
1021     }
1022     return;
1023   }
1024 
1025   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1026   // emit the conditional branch.
1027   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1028   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1029   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1030   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1031 
1032   // Emit the 'then' code.
1033   CGF.EmitBlock(ThenBlock);
1034   {
1035     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1036     ThenGen(CGF);
1037   }
1038   CGF.EmitBranch(ContBlock);
1039   // Emit the 'else' code if present.
1040   {
1041     // There is no need to emit line number for unconditional branch.
1042     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1043     CGF.EmitBlock(ElseBlock);
1044   }
1045   {
1046     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1047     ElseGen(CGF);
1048   }
1049   {
1050     // There is no need to emit line number for unconditional branch.
1051     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1052     CGF.EmitBranch(ContBlock);
1053   }
1054   // Emit the continuation block for code after the if.
1055   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1056 }
1057 
1058 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1059                                        llvm::Value *OutlinedFn,
1060                                        llvm::Value *CapturedStruct,
1061                                        const Expr *IfCond) {
1062   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1063   auto &&ThenGen =
1064       [this, OutlinedFn, CapturedStruct, RTLoc](CodeGenFunction &CGF) {
1065         // Build call __kmpc_fork_call(loc, 1, microtask,
1066         // captured_struct/*context*/)
1067         llvm::Value *Args[] = {
1068             RTLoc,
1069             CGF.Builder.getInt32(
1070                 1), // Number of arguments after 'microtask' argument
1071             // (there is only one additional argument - 'context')
1072             CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
1073             CGF.EmitCastToVoidPtr(CapturedStruct)};
1074         auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
1075         CGF.EmitRuntimeCall(RTLFn, Args);
1076       };
1077   auto &&ElseGen = [this, OutlinedFn, CapturedStruct, RTLoc, Loc](
1078       CodeGenFunction &CGF) {
1079     auto ThreadID = getThreadID(CGF, Loc);
1080     // Build calls:
1081     // __kmpc_serialized_parallel(&Loc, GTid);
1082     llvm::Value *Args[] = {RTLoc, ThreadID};
1083     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
1084                         Args);
1085 
1086     // OutlinedFn(&GTid, &zero, CapturedStruct);
1087     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
1088     auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32,
1089                                                           /*Signed*/ true);
1090     auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
1091     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1092     llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
1093     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1094 
1095     // __kmpc_end_serialized_parallel(&Loc, GTid);
1096     llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
1097     CGF.EmitRuntimeCall(
1098         createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
1099   };
1100   if (IfCond) {
1101     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1102   } else {
1103     CodeGenFunction::RunCleanupsScope Scope(CGF);
1104     ThenGen(CGF);
1105   }
1106 }
1107 
1108 // If we're inside an (outlined) parallel region, use the region info's
1109 // thread-ID variable (it is passed in a first argument of the outlined function
1110 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1111 // regular serial code region, get thread ID by calling kmp_int32
1112 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1113 // return the address of that temp.
1114 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1115                                                   SourceLocation Loc) {
1116   if (auto OMPRegionInfo =
1117           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1118     if (OMPRegionInfo->getThreadIDVariable())
1119       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1120 
1121   auto ThreadID = getThreadID(CGF, Loc);
1122   auto Int32Ty =
1123       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1124   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1125   CGF.EmitStoreOfScalar(ThreadID,
1126                         CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
1127 
1128   return ThreadIDTemp;
1129 }
1130 
1131 llvm::Constant *
1132 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1133                                              const llvm::Twine &Name) {
1134   SmallString<256> Buffer;
1135   llvm::raw_svector_ostream Out(Buffer);
1136   Out << Name;
1137   auto RuntimeName = Out.str();
1138   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1139   if (Elem.second) {
1140     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1141            "OMP internal variable has different type than requested");
1142     return &*Elem.second;
1143   }
1144 
1145   return Elem.second = new llvm::GlobalVariable(
1146              CGM.getModule(), Ty, /*IsConstant*/ false,
1147              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1148              Elem.first());
1149 }
1150 
1151 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1152   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1153   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1154 }
1155 
1156 namespace {
1157 class CallEndCleanup : public EHScopeStack::Cleanup {
1158 public:
1159   typedef ArrayRef<llvm::Value *> CleanupValuesTy;
1160 private:
1161   llvm::Value *Callee;
1162   llvm::SmallVector<llvm::Value *, 8> Args;
1163 
1164 public:
1165   CallEndCleanup(llvm::Value *Callee, CleanupValuesTy Args)
1166       : Callee(Callee), Args(Args.begin(), Args.end()) {}
1167   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1168     CGF.EmitRuntimeCall(Callee, Args);
1169   }
1170 };
1171 } // namespace
1172 
1173 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1174                                          StringRef CriticalName,
1175                                          const RegionCodeGenTy &CriticalOpGen,
1176                                          SourceLocation Loc) {
1177   // __kmpc_critical(ident_t *, gtid, Lock);
1178   // CriticalOpGen();
1179   // __kmpc_end_critical(ident_t *, gtid, Lock);
1180   // Prepare arguments and build a call to __kmpc_critical
1181   {
1182     CodeGenFunction::RunCleanupsScope Scope(CGF);
1183     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1184                            getCriticalRegionLock(CriticalName)};
1185     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1186     // Build a call to __kmpc_end_critical
1187     CGF.EHStack.pushCleanup<CallEndCleanup>(
1188         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1189         llvm::makeArrayRef(Args));
1190     emitInlinedDirective(CGF, CriticalOpGen);
1191   }
1192 }
1193 
1194 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1195                        const RegionCodeGenTy &BodyOpGen) {
1196   llvm::Value *CallBool = CGF.EmitScalarConversion(
1197       IfCond,
1198       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1199       CGF.getContext().BoolTy);
1200 
1201   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1202   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1203   // Generate the branch (If-stmt)
1204   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1205   CGF.EmitBlock(ThenBlock);
1206   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, BodyOpGen);
1207   // Emit the rest of bblocks/branches
1208   CGF.EmitBranch(ContBlock);
1209   CGF.EmitBlock(ContBlock, true);
1210 }
1211 
1212 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1213                                        const RegionCodeGenTy &MasterOpGen,
1214                                        SourceLocation Loc) {
1215   // if(__kmpc_master(ident_t *, gtid)) {
1216   //   MasterOpGen();
1217   //   __kmpc_end_master(ident_t *, gtid);
1218   // }
1219   // Prepare arguments and build a call to __kmpc_master
1220   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1221   auto *IsMaster =
1222       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1223   emitIfStmt(CGF, IsMaster, [&](CodeGenFunction &CGF) -> void {
1224     CodeGenFunction::RunCleanupsScope Scope(CGF);
1225     CGF.EHStack.pushCleanup<CallEndCleanup>(
1226         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1227         llvm::makeArrayRef(Args));
1228     MasterOpGen(CGF);
1229   });
1230 }
1231 
1232 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1233                                         SourceLocation Loc) {
1234   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1235   llvm::Value *Args[] = {
1236       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1237       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1238   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1239 }
1240 
1241 static llvm::Value *emitCopyprivateCopyFunction(
1242     CodeGenModule &CGM, llvm::Type *ArgsType,
1243     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1244     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1245   auto &C = CGM.getContext();
1246   // void copy_func(void *LHSArg, void *RHSArg);
1247   FunctionArgList Args;
1248   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1249                            C.VoidPtrTy);
1250   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1251                            C.VoidPtrTy);
1252   Args.push_back(&LHSArg);
1253   Args.push_back(&RHSArg);
1254   FunctionType::ExtInfo EI;
1255   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1256       C.VoidTy, Args, EI, /*isVariadic=*/false);
1257   auto *Fn = llvm::Function::Create(
1258       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1259       ".omp.copyprivate.copy_func", &CGM.getModule());
1260   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
1261   CodeGenFunction CGF(CGM);
1262   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1263   // Dest = (void*[n])(LHSArg);
1264   // Src = (void*[n])(RHSArg);
1265   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1266       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
1267                                     CGF.PointerAlignInBytes),
1268       ArgsType);
1269   auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1270       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
1271                                     CGF.PointerAlignInBytes),
1272       ArgsType);
1273   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1274   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1275   // ...
1276   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1277   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1278     auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1279         CGF.Builder.CreateAlignedLoad(
1280             CGF.Builder.CreateStructGEP(nullptr, LHS, I),
1281             CGM.PointerAlignInBytes),
1282         CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1283     auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1284         CGF.Builder.CreateAlignedLoad(
1285             CGF.Builder.CreateStructGEP(nullptr, RHS, I),
1286             CGM.PointerAlignInBytes),
1287         CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1288     CGF.EmitOMPCopy(CGF, CopyprivateVars[I]->getType(), DestAddr, SrcAddr,
1289                     cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()),
1290                     cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
1291                     AssignmentOps[I]);
1292   }
1293   CGF.FinishFunction();
1294   return Fn;
1295 }
1296 
1297 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1298                                        const RegionCodeGenTy &SingleOpGen,
1299                                        SourceLocation Loc,
1300                                        ArrayRef<const Expr *> CopyprivateVars,
1301                                        ArrayRef<const Expr *> SrcExprs,
1302                                        ArrayRef<const Expr *> DstExprs,
1303                                        ArrayRef<const Expr *> AssignmentOps) {
1304   assert(CopyprivateVars.size() == SrcExprs.size() &&
1305          CopyprivateVars.size() == DstExprs.size() &&
1306          CopyprivateVars.size() == AssignmentOps.size());
1307   auto &C = CGM.getContext();
1308   // int32 did_it = 0;
1309   // if(__kmpc_single(ident_t *, gtid)) {
1310   //   SingleOpGen();
1311   //   __kmpc_end_single(ident_t *, gtid);
1312   //   did_it = 1;
1313   // }
1314   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1315   // <copy_func>, did_it);
1316 
1317   llvm::AllocaInst *DidIt = nullptr;
1318   if (!CopyprivateVars.empty()) {
1319     // int32 did_it = 0;
1320     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1321     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1322     CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(0), DidIt,
1323                                    DidIt->getAlignment());
1324   }
1325   // Prepare arguments and build a call to __kmpc_single
1326   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1327   auto *IsSingle =
1328       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1329   emitIfStmt(CGF, IsSingle, [&](CodeGenFunction &CGF) -> void {
1330     CodeGenFunction::RunCleanupsScope Scope(CGF);
1331     CGF.EHStack.pushCleanup<CallEndCleanup>(
1332         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1333         llvm::makeArrayRef(Args));
1334     SingleOpGen(CGF);
1335     if (DidIt) {
1336       // did_it = 1;
1337       CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
1338                                      DidIt->getAlignment());
1339     }
1340   });
1341   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1342   // <copy_func>, did_it);
1343   if (DidIt) {
1344     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1345     auto CopyprivateArrayTy =
1346         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1347                                /*IndexTypeQuals=*/0);
1348     // Create a list of all private variables for copyprivate.
1349     auto *CopyprivateList =
1350         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1351     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1352       auto *Elem = CGF.Builder.CreateStructGEP(
1353           CopyprivateList->getAllocatedType(), CopyprivateList, I);
1354       CGF.Builder.CreateAlignedStore(
1355           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1356               CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
1357           Elem, CGM.PointerAlignInBytes);
1358     }
1359     // Build function that copies private values from single region to all other
1360     // threads in the corresponding parallel region.
1361     auto *CpyFn = emitCopyprivateCopyFunction(
1362         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1363         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1364     auto *BufSize = llvm::ConstantInt::get(
1365         CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
1366     auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1367                                                                CGF.VoidPtrTy);
1368     auto *DidItVal =
1369         CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes);
1370     llvm::Value *Args[] = {
1371         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1372         getThreadID(CGF, Loc),        // i32 <gtid>
1373         BufSize,                      // size_t <buf_size>
1374         CL,                           // void *<copyprivate list>
1375         CpyFn,                        // void (*) (void *, void *) <copy_func>
1376         DidItVal                      // i32 did_it
1377     };
1378     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1379   }
1380 }
1381 
1382 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
1383                                         const RegionCodeGenTy &OrderedOpGen,
1384                                         SourceLocation Loc) {
1385   // __kmpc_ordered(ident_t *, gtid);
1386   // OrderedOpGen();
1387   // __kmpc_end_ordered(ident_t *, gtid);
1388   // Prepare arguments and build a call to __kmpc_ordered
1389   {
1390     CodeGenFunction::RunCleanupsScope Scope(CGF);
1391     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1392     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
1393     // Build a call to __kmpc_end_ordered
1394     CGF.EHStack.pushCleanup<CallEndCleanup>(
1395         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
1396         llvm::makeArrayRef(Args));
1397     emitInlinedDirective(CGF, OrderedOpGen);
1398   }
1399 }
1400 
1401 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1402                                       OpenMPDirectiveKind Kind) {
1403   // Build call __kmpc_cancel_barrier(loc, thread_id);
1404   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1405   if (Kind == OMPD_for) {
1406     Flags =
1407         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1408   } else if (Kind == OMPD_sections) {
1409     Flags = static_cast<OpenMPLocationFlags>(Flags |
1410                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
1411   } else if (Kind == OMPD_single) {
1412     Flags =
1413         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1414   } else if (Kind == OMPD_barrier) {
1415     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1416   } else {
1417     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1418   }
1419   // Build call __kmpc_cancel_barrier(loc, thread_id);
1420   // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this
1421   // one provides the same functionality and adds initial support for
1422   // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier()
1423   // is provided default by the runtime library so it safe to make such
1424   // replacement.
1425   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1426                          getThreadID(CGF, Loc)};
1427   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1428 }
1429 
1430 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1431 /// the enum sched_type in kmp.h).
1432 enum OpenMPSchedType {
1433   /// \brief Lower bound for default (unordered) versions.
1434   OMP_sch_lower = 32,
1435   OMP_sch_static_chunked = 33,
1436   OMP_sch_static = 34,
1437   OMP_sch_dynamic_chunked = 35,
1438   OMP_sch_guided_chunked = 36,
1439   OMP_sch_runtime = 37,
1440   OMP_sch_auto = 38,
1441   /// \brief Lower bound for 'ordered' versions.
1442   OMP_ord_lower = 64,
1443   /// \brief Lower bound for 'nomerge' versions.
1444   OMP_nm_lower = 160,
1445 };
1446 
1447 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
1448 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1449                                           bool Chunked) {
1450   switch (ScheduleKind) {
1451   case OMPC_SCHEDULE_static:
1452     return Chunked ? OMP_sch_static_chunked : OMP_sch_static;
1453   case OMPC_SCHEDULE_dynamic:
1454     return OMP_sch_dynamic_chunked;
1455   case OMPC_SCHEDULE_guided:
1456     return OMP_sch_guided_chunked;
1457   case OMPC_SCHEDULE_auto:
1458     return OMP_sch_auto;
1459   case OMPC_SCHEDULE_runtime:
1460     return OMP_sch_runtime;
1461   case OMPC_SCHEDULE_unknown:
1462     assert(!Chunked && "chunk was specified but schedule kind not known");
1463     return OMP_sch_static;
1464   }
1465   llvm_unreachable("Unexpected runtime schedule");
1466 }
1467 
1468 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1469                                          bool Chunked) const {
1470   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
1471   return Schedule == OMP_sch_static;
1472 }
1473 
1474 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1475   auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false);
1476   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1477   return Schedule != OMP_sch_static;
1478 }
1479 
1480 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
1481                                   OpenMPScheduleClauseKind ScheduleKind,
1482                                   unsigned IVSize, bool IVSigned,
1483                                   llvm::Value *IL, llvm::Value *LB,
1484                                   llvm::Value *UB, llvm::Value *ST,
1485                                   llvm::Value *Chunk) {
1486   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
1487   if (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked) {
1488     // Call __kmpc_dispatch_init(
1489     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1490     //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1491     //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1492 
1493     // If the Chunk was not specified in the clause - use default value 1.
1494     if (Chunk == nullptr)
1495       Chunk = CGF.Builder.getIntN(IVSize, 1);
1496     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1497                             getThreadID(CGF, Loc),
1498                             CGF.Builder.getInt32(Schedule), // Schedule type
1499                             CGF.Builder.getIntN(IVSize, 0), // Lower
1500                             UB,                             // Upper
1501                             CGF.Builder.getIntN(IVSize, 1), // Stride
1502                             Chunk                           // Chunk
1503     };
1504     CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1505   } else {
1506     // Call __kmpc_for_static_init(
1507     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1508     //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1509     //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1510     //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1511     if (Chunk == nullptr) {
1512       assert(Schedule == OMP_sch_static &&
1513              "expected static non-chunked schedule");
1514       // If the Chunk was not specified in the clause - use default value 1.
1515       Chunk = CGF.Builder.getIntN(IVSize, 1);
1516     } else
1517       assert(Schedule == OMP_sch_static_chunked &&
1518              "expected static chunked schedule");
1519     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1520                             getThreadID(CGF, Loc),
1521                             CGF.Builder.getInt32(Schedule), // Schedule type
1522                             IL,                             // &isLastIter
1523                             LB,                             // &LB
1524                             UB,                             // &UB
1525                             ST,                             // &Stride
1526                             CGF.Builder.getIntN(IVSize, 1), // Incr
1527                             Chunk                           // Chunk
1528     };
1529     CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1530   }
1531 }
1532 
1533 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
1534                                           SourceLocation Loc) {
1535   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1536   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1537                          getThreadID(CGF, Loc)};
1538   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1539                       Args);
1540 }
1541 
1542 void CGOpenMPRuntime::emitForOrderedDynamicIterationEnd(CodeGenFunction &CGF,
1543                                                         SourceLocation Loc,
1544                                                         unsigned IVSize,
1545                                                         bool IVSigned) {
1546   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
1547   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1548                          getThreadID(CGF, Loc)};
1549   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
1550 }
1551 
1552 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1553                                           SourceLocation Loc, unsigned IVSize,
1554                                           bool IVSigned, llvm::Value *IL,
1555                                           llvm::Value *LB, llvm::Value *UB,
1556                                           llvm::Value *ST) {
1557   // Call __kmpc_dispatch_next(
1558   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1559   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1560   //          kmp_int[32|64] *p_stride);
1561   llvm::Value *Args[] = {
1562       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1563       IL, // &isLastIter
1564       LB, // &Lower
1565       UB, // &Upper
1566       ST  // &Stride
1567   };
1568   llvm::Value *Call =
1569       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1570   return CGF.EmitScalarConversion(
1571       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1572       CGF.getContext().BoolTy);
1573 }
1574 
1575 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1576                                            llvm::Value *NumThreads,
1577                                            SourceLocation Loc) {
1578   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1579   llvm::Value *Args[] = {
1580       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1581       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1582   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1583                       Args);
1584 }
1585 
1586 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1587                                 SourceLocation Loc) {
1588   // Build call void __kmpc_flush(ident_t *loc)
1589   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1590                       emitUpdateLocation(CGF, Loc));
1591 }
1592 
1593 namespace {
1594 /// \brief Indexes of fields for type kmp_task_t.
1595 enum KmpTaskTFields {
1596   /// \brief List of shared variables.
1597   KmpTaskTShareds,
1598   /// \brief Task routine.
1599   KmpTaskTRoutine,
1600   /// \brief Partition id for the untied tasks.
1601   KmpTaskTPartId,
1602   /// \brief Function with call of destructors for private variables.
1603   KmpTaskTDestructors,
1604   /// \brief Record with list of all private/firstprivate copies for the task
1605   /// directive.
1606   KmpTaskTPrivates,
1607 };
1608 } // namespace
1609 
1610 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
1611   if (!KmpRoutineEntryPtrTy) {
1612     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
1613     auto &C = CGM.getContext();
1614     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
1615     FunctionProtoType::ExtProtoInfo EPI;
1616     KmpRoutineEntryPtrQTy = C.getPointerType(
1617         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
1618     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
1619   }
1620 }
1621 
1622 static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1623                                  QualType FieldTy) {
1624   auto *Field = FieldDecl::Create(
1625       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1626       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1627       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1628   Field->setAccess(AS_public);
1629   DC->addDecl(Field);
1630 }
1631 
1632 namespace {
1633 typedef std::pair<CharUnits /*Align*/,
1634                   std::pair<const VarDecl *, const VarDecl *>> VDPair;
1635 } // namespace
1636 
1637 static RecordDecl *createPrivatesRecordDecl(CodeGenModule &CGM,
1638                                             const ArrayRef<VDPair> Privates) {
1639   if (!Privates.empty()) {
1640     auto &C = CGM.getContext();
1641     // Build struct .kmp_privates_t. {
1642     //         /*  private vars  */
1643     //       };
1644     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
1645     RD->startDefinition();
1646     for (auto &&Pair : Privates) {
1647       addFieldToRecordDecl(C, RD,
1648                            Pair.second.first->getType().getNonReferenceType());
1649     }
1650     // TODO: add firstprivate fields.
1651     RD->completeDefinition();
1652     return RD;
1653   }
1654   return nullptr;
1655 }
1656 
1657 static RecordDecl *createKmpTaskTRecordDecl(CodeGenModule &CGM,
1658                                             QualType KmpInt32Ty,
1659                                             QualType KmpRoutineEntryPointerQTy,
1660                                             const ArrayRef<VDPair> Privates) {
1661   auto &C = CGM.getContext();
1662   // Build struct kmp_task_t {
1663   //         void *              shareds;
1664   //         kmp_routine_entry_t routine;
1665   //         kmp_int32           part_id;
1666   //         kmp_routine_entry_t destructors;
1667   //         /*  private vars  */
1668   //       };
1669   auto *RD = C.buildImplicitRecord("kmp_task_t");
1670   RD->startDefinition();
1671   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1672   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1673   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1674   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1675   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
1676     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
1677   }
1678   RD->completeDefinition();
1679   return RD;
1680 }
1681 
1682 /// \brief Emit a proxy function which accepts kmp_task_t as the second
1683 /// argument.
1684 /// \code
1685 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
1686 ///   TaskFunction(gtid, tt->part_id, tt->shareds);
1687 ///   return 0;
1688 /// }
1689 /// \endcode
1690 static llvm::Value *
1691 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
1692                       QualType KmpInt32Ty, QualType KmpTaskTPtrQTy,
1693                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
1694                       llvm::Type *KmpTaskTTy) {
1695   auto &C = CGM.getContext();
1696   FunctionArgList Args;
1697   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1698   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1699                                 /*Id=*/nullptr, KmpTaskTPtrQTy);
1700   Args.push_back(&GtidArg);
1701   Args.push_back(&TaskTypeArg);
1702   FunctionType::ExtInfo Info;
1703   auto &TaskEntryFnInfo =
1704       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1705                                                     /*isVariadic=*/false);
1706   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
1707   auto *TaskEntry =
1708       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
1709                              ".omp_task_entry.", &CGM.getModule());
1710   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
1711   CodeGenFunction CGF(CGM);
1712   CGF.disableDebugInfo();
1713   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
1714 
1715   // TaskFunction(gtid, tt->part_id, tt->shareds);
1716   auto *GtidParam = CGF.EmitLoadOfScalar(
1717       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
1718       C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
1719   auto TaskTypeArgAddr = CGF.EmitLoadOfScalar(
1720       CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false,
1721       CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc);
1722   auto *PartidPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr,
1723                                                 /*Idx=*/KmpTaskTPartId);
1724   auto *PartidParam = CGF.EmitLoadOfScalar(
1725       PartidPtr, /*Volatile=*/false,
1726       C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
1727   auto *SharedsPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr,
1728                                                  /*Idx=*/KmpTaskTShareds);
1729   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1730       CGF.EmitLoadOfScalar(SharedsPtr, /*Volatile=*/false,
1731                            CGM.PointerAlignInBytes, C.VoidPtrTy, Loc),
1732       CGF.ConvertTypeForMem(SharedsPtrTy));
1733 
1734   llvm::Value *CallArgs[] = {GtidParam, PartidParam, SharedsParam};
1735   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
1736   CGF.EmitStoreThroughLValue(
1737       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
1738       CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
1739   CGF.FinishFunction();
1740   return TaskEntry;
1741 }
1742 
1743 static llvm::Value *
1744 emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc,
1745                         QualType KmpInt32Ty, QualType KmpTaskTPtrQTy,
1746                         QualType KmpTaskQTy, RecordDecl *KmpTaskQTyRD) {
1747   auto &C = CGM.getContext();
1748   FunctionArgList Args;
1749   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1750   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1751                                 /*Id=*/nullptr, KmpTaskTPtrQTy);
1752   Args.push_back(&GtidArg);
1753   Args.push_back(&TaskTypeArg);
1754   FunctionType::ExtInfo Info;
1755   auto &DestructorFnInfo =
1756       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1757                                                     /*isVariadic=*/false);
1758   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
1759   auto *DestructorFn =
1760       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
1761                              ".omp_task_destructor.", &CGM.getModule());
1762   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, DestructorFnInfo, DestructorFn);
1763   CodeGenFunction CGF(CGM);
1764   CGF.disableDebugInfo();
1765   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
1766                     Args);
1767 
1768   auto *TaskTypeArgAddr = CGF.EmitLoadOfScalar(
1769       CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false,
1770       CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc);
1771   LValue Base = CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskQTy);
1772   auto FI = std::next(KmpTaskQTyRD->field_begin(), KmpTaskTPrivates);
1773   Base = CGF.EmitLValueForField(Base, *FI);
1774   for (auto *Field :
1775        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
1776     if (auto DtorKind = Field->getType().isDestructedType()) {
1777       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
1778       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
1779     }
1780   }
1781   CGF.FinishFunction();
1782   return DestructorFn;
1783 }
1784 
1785 static int array_pod_sort_comparator(const VDPair *P1, const VDPair *P2) {
1786   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
1787 }
1788 
1789 void CGOpenMPRuntime::emitTaskCall(
1790     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
1791     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
1792     llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
1793     const Expr *IfCond, const ArrayRef<const Expr *> PrivateVars,
1794     const ArrayRef<const Expr *> PrivateCopies) {
1795   auto &C = CGM.getContext();
1796   llvm::SmallVector<VDPair, 8> Privates;
1797   auto I = PrivateCopies.begin();
1798   // Aggeregate privates and sort them by the alignment.
1799   for (auto *E : PrivateVars) {
1800     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1801     Privates.push_back(std::make_pair(
1802         C.getTypeAlignInChars(VD->getType()),
1803         std::make_pair(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()))));
1804     ++I;
1805   }
1806   llvm::array_pod_sort(Privates.begin(), Privates.end(),
1807                        array_pod_sort_comparator);
1808   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1809   // Build type kmp_routine_entry_t (if not built yet).
1810   emitKmpRoutineEntryT(KmpInt32Ty);
1811   // Build particular struct kmp_task_t for the given task.
1812   auto *KmpTaskQTyRD = createKmpTaskTRecordDecl(
1813       CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy, Privates);
1814   auto KmpTaskQTy = C.getRecordType(KmpTaskQTyRD);
1815   QualType KmpTaskTPtrQTy = C.getPointerType(KmpTaskQTy);
1816   auto *KmpTaskTTy = CGF.ConvertType(KmpTaskQTy);
1817   auto *KmpTaskTPtrTy = KmpTaskTTy->getPointerTo();
1818   auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy));
1819   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
1820 
1821   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
1822   // kmp_task_t *tt);
1823   auto *TaskEntry =
1824       emitProxyTaskFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy, SharedsPtrTy,
1825                             TaskFunction, KmpTaskTTy);
1826 
1827   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1828   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1829   // kmp_routine_entry_t *task_entry);
1830   // Task flags. Format is taken from
1831   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
1832   // description of kmp_tasking_flags struct.
1833   const unsigned TiedFlag = 0x1;
1834   const unsigned FinalFlag = 0x2;
1835   unsigned Flags = Tied ? TiedFlag : 0;
1836   auto *TaskFlags =
1837       Final.getPointer()
1838           ? CGF.Builder.CreateSelect(Final.getPointer(),
1839                                      CGF.Builder.getInt32(FinalFlag),
1840                                      CGF.Builder.getInt32(/*C=*/0))
1841           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
1842   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
1843   auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
1844   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
1845                               getThreadID(CGF, Loc), TaskFlags, KmpTaskTySize,
1846                               CGM.getSize(SharedsSize),
1847                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1848                                   TaskEntry, KmpRoutineEntryPtrTy)};
1849   auto *NewTask = CGF.EmitRuntimeCall(
1850       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
1851   auto *NewTaskNewTaskTTy =
1852       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NewTask, KmpTaskTPtrTy);
1853   // Fill the data in the resulting kmp_task_t record.
1854   // Copy shareds if there are any.
1855   auto *KmpTaskSharedsPtr = CGF.EmitLoadOfScalar(
1856       CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy,
1857                                   /*Idx=*/KmpTaskTShareds),
1858       /*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy, Loc);
1859   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty())
1860     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
1861   // Emit initial values for private copies (if any).
1862   bool NeedsCleanup = false;
1863   if (!Privates.empty()) {
1864     LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, KmpTaskQTy);
1865     auto FI = std::next(KmpTaskQTyRD->field_begin(), KmpTaskTPrivates);
1866     Base = CGF.EmitLValueForField(Base, *FI);
1867     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
1868     LValue SharedsBase = CGF.MakeNaturalAlignAddrLValue(
1869         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1870             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
1871         SharedsTy);
1872     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
1873         cast<CapturedStmt>(*D.getAssociatedStmt()));
1874     for (auto &&Pair : Privates) {
1875       auto *VD = Pair.second.second;
1876       auto *Init = VD->getAnyInitializer();
1877       LValue PrivateLValue = CGF.EmitLValueForField(Base, *FI);
1878       if (Init) {
1879         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
1880       }
1881       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
1882       // Copy addresses of privates to corresponding references in the list of
1883       // captured variables.
1884       //   ...
1885       //   tt->shareds.var_addr = &tt->privates.private_var;
1886       //   ...
1887       auto *OriginalVD = Pair.second.first;
1888       auto *SharedField = CapturesInfo.lookup(OriginalVD);
1889       auto SharedRefLValue =
1890           CGF.EmitLValueForFieldInitialization(SharedsBase, SharedField);
1891       CGF.EmitStoreThroughLValue(RValue::get(PrivateLValue.getAddress()),
1892                                  SharedRefLValue);
1893       ++FI, ++I;
1894     }
1895   }
1896   // Provide pointer to function with destructors for privates.
1897   llvm::Value *DestructorFn =
1898       NeedsCleanup
1899           ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy,
1900                                     KmpTaskQTy, KmpTaskQTyRD)
1901           : llvm::ConstantPointerNull::get(
1902                 cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
1903   CGF.Builder.CreateAlignedStore(
1904       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DestructorFn,
1905                                                       KmpRoutineEntryPtrTy),
1906       CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy,
1907                                   /*Idx=*/KmpTaskTDestructors),
1908       CGM.PointerAlignInBytes);
1909   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
1910   // libcall.
1911   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1912   // *new_task);
1913   auto *ThreadID = getThreadID(CGF, Loc);
1914   llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID, NewTask};
1915   auto &&ThenCodeGen = [this, &TaskArgs](CodeGenFunction &CGF) {
1916     // TODO: add check for untied tasks.
1917     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1918   };
1919   auto &&ElseCodeGen =
1920       [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry](
1921           CodeGenFunction &CGF) {
1922         CodeGenFunction::RunCleanupsScope LocalScope(CGF);
1923         CGF.EmitRuntimeCall(
1924             createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs);
1925         // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
1926         // kmp_task_t *new_task);
1927         CGF.EHStack.pushCleanup<CallEndCleanup>(
1928             NormalAndEHCleanup,
1929             createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
1930             llvm::makeArrayRef(TaskArgs));
1931 
1932         // Call proxy_task_entry(gtid, new_task);
1933         llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
1934         CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
1935       };
1936   if (IfCond) {
1937     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
1938   } else {
1939     CodeGenFunction::RunCleanupsScope Scope(CGF);
1940     ThenCodeGen(CGF);
1941   }
1942 }
1943 
1944 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
1945                                           llvm::Type *ArgsType,
1946                                           ArrayRef<const Expr *> LHSExprs,
1947                                           ArrayRef<const Expr *> RHSExprs,
1948                                           ArrayRef<const Expr *> ReductionOps) {
1949   auto &C = CGM.getContext();
1950 
1951   // void reduction_func(void *LHSArg, void *RHSArg);
1952   FunctionArgList Args;
1953   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1954                            C.VoidPtrTy);
1955   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1956                            C.VoidPtrTy);
1957   Args.push_back(&LHSArg);
1958   Args.push_back(&RHSArg);
1959   FunctionType::ExtInfo EI;
1960   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1961       C.VoidTy, Args, EI, /*isVariadic=*/false);
1962   auto *Fn = llvm::Function::Create(
1963       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1964       ".omp.reduction.reduction_func", &CGM.getModule());
1965   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
1966   CodeGenFunction CGF(CGM);
1967   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1968 
1969   // Dst = (void*[n])(LHSArg);
1970   // Src = (void*[n])(RHSArg);
1971   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1972       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
1973                                     CGF.PointerAlignInBytes),
1974       ArgsType);
1975   auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1976       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
1977                                     CGF.PointerAlignInBytes),
1978       ArgsType);
1979 
1980   //  ...
1981   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
1982   //  ...
1983   CodeGenFunction::OMPPrivateScope Scope(CGF);
1984   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
1985     Scope.addPrivate(
1986         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()),
1987         [&]() -> llvm::Value *{
1988           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1989               CGF.Builder.CreateAlignedLoad(
1990                   CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I),
1991                   CGM.PointerAlignInBytes),
1992               CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType())));
1993         });
1994     Scope.addPrivate(
1995         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()),
1996         [&]() -> llvm::Value *{
1997           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1998               CGF.Builder.CreateAlignedLoad(
1999                   CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I),
2000                   CGM.PointerAlignInBytes),
2001               CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType())));
2002         });
2003   }
2004   Scope.Privatize();
2005   for (auto *E : ReductionOps) {
2006     CGF.EmitIgnoredExpr(E);
2007   }
2008   Scope.ForceCleanup();
2009   CGF.FinishFunction();
2010   return Fn;
2011 }
2012 
2013 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
2014                                     ArrayRef<const Expr *> LHSExprs,
2015                                     ArrayRef<const Expr *> RHSExprs,
2016                                     ArrayRef<const Expr *> ReductionOps,
2017                                     bool WithNowait) {
2018   // Next code should be emitted for reduction:
2019   //
2020   // static kmp_critical_name lock = { 0 };
2021   //
2022   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
2023   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
2024   //  ...
2025   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
2026   //  *(Type<n>-1*)rhs[<n>-1]);
2027   // }
2028   //
2029   // ...
2030   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
2031   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2032   // RedList, reduce_func, &<lock>)) {
2033   // case 1:
2034   //  ...
2035   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2036   //  ...
2037   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2038   // break;
2039   // case 2:
2040   //  ...
2041   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2042   //  ...
2043   // break;
2044   // default:;
2045   // }
2046 
2047   auto &C = CGM.getContext();
2048 
2049   // 1. Build a list of reduction variables.
2050   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
2051   llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size());
2052   QualType ReductionArrayTy =
2053       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2054                              /*IndexTypeQuals=*/0);
2055   auto *ReductionList =
2056       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
2057   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
2058     auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I);
2059     CGF.Builder.CreateAlignedStore(
2060         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2061             CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy),
2062         Elem, CGM.PointerAlignInBytes);
2063   }
2064 
2065   // 2. Emit reduce_func().
2066   auto *ReductionFn = emitReductionFunction(
2067       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs,
2068       RHSExprs, ReductionOps);
2069 
2070   // 3. Create static kmp_critical_name lock = { 0 };
2071   auto *Lock = getCriticalRegionLock(".reduction");
2072 
2073   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2074   // RedList, reduce_func, &<lock>);
2075   auto *IdentTLoc = emitUpdateLocation(
2076       CGF, Loc,
2077       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
2078   auto *ThreadId = getThreadID(CGF, Loc);
2079   auto *ReductionArrayTySize = llvm::ConstantInt::get(
2080       CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
2081   auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList,
2082                                                              CGF.VoidPtrTy);
2083   llvm::Value *Args[] = {
2084       IdentTLoc,                             // ident_t *<loc>
2085       ThreadId,                              // i32 <gtid>
2086       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
2087       ReductionArrayTySize,                  // size_type sizeof(RedList)
2088       RL,                                    // void *RedList
2089       ReductionFn, // void (*) (void *, void *) <reduce_func>
2090       Lock         // kmp_critical_name *&<lock>
2091   };
2092   auto Res = CGF.EmitRuntimeCall(
2093       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
2094                                        : OMPRTL__kmpc_reduce),
2095       Args);
2096 
2097   // 5. Build switch(res)
2098   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
2099   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
2100 
2101   // 6. Build case 1:
2102   //  ...
2103   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2104   //  ...
2105   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2106   // break;
2107   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
2108   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
2109   CGF.EmitBlock(Case1BB);
2110 
2111   {
2112     CodeGenFunction::RunCleanupsScope Scope(CGF);
2113     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2114     llvm::Value *EndArgs[] = {
2115         IdentTLoc, // ident_t *<loc>
2116         ThreadId,  // i32 <gtid>
2117         Lock       // kmp_critical_name *&<lock>
2118     };
2119     CGF.EHStack.pushCleanup<CallEndCleanup>(
2120         NormalAndEHCleanup,
2121         createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
2122                                          : OMPRTL__kmpc_end_reduce),
2123         llvm::makeArrayRef(EndArgs));
2124     for (auto *E : ReductionOps) {
2125       CGF.EmitIgnoredExpr(E);
2126     }
2127   }
2128 
2129   CGF.EmitBranch(DefaultBB);
2130 
2131   // 7. Build case 2:
2132   //  ...
2133   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2134   //  ...
2135   // break;
2136   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
2137   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
2138   CGF.EmitBlock(Case2BB);
2139 
2140   {
2141     CodeGenFunction::RunCleanupsScope Scope(CGF);
2142     auto I = LHSExprs.begin();
2143     for (auto *E : ReductionOps) {
2144       const Expr *XExpr = nullptr;
2145       const Expr *EExpr = nullptr;
2146       const Expr *UpExpr = nullptr;
2147       BinaryOperatorKind BO = BO_Comma;
2148       // Try to emit update expression as a simple atomic.
2149       if (auto *ACO = dyn_cast<AbstractConditionalOperator>(E)) {
2150         // If this is a conditional operator, analyze it's condition for
2151         // min/max reduction operator.
2152         E = ACO->getCond();
2153       }
2154       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
2155         if (BO->getOpcode() == BO_Assign) {
2156           XExpr = BO->getLHS();
2157           UpExpr = BO->getRHS();
2158         }
2159       }
2160       // Analyze RHS part of the whole expression.
2161       if (UpExpr) {
2162         if (auto *BORHS =
2163                 dyn_cast<BinaryOperator>(UpExpr->IgnoreParenImpCasts())) {
2164           EExpr = BORHS->getRHS();
2165           BO = BORHS->getOpcode();
2166         }
2167       }
2168       if (XExpr) {
2169         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2170         LValue X = CGF.EmitLValue(XExpr);
2171         RValue E;
2172         if (EExpr)
2173           E = CGF.EmitAnyExpr(EExpr);
2174         CGF.EmitOMPAtomicSimpleUpdateExpr(
2175             X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
2176             [&CGF, UpExpr, VD](RValue XRValue) {
2177               CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
2178               PrivateScope.addPrivate(
2179                   VD, [&CGF, VD, XRValue]() -> llvm::Value *{
2180                     auto *LHSTemp = CGF.CreateMemTemp(VD->getType());
2181                     CGF.EmitStoreThroughLValue(
2182                         XRValue,
2183                         CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType()));
2184                     return LHSTemp;
2185                   });
2186               (void)PrivateScope.Privatize();
2187               return CGF.EmitAnyExpr(UpExpr);
2188             });
2189       } else {
2190         // Emit as a critical region.
2191         emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) {
2192           CGF.EmitIgnoredExpr(E);
2193         }, Loc);
2194       }
2195       ++I;
2196     }
2197   }
2198 
2199   CGF.EmitBranch(DefaultBB);
2200   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
2201 }
2202 
2203 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
2204                                        SourceLocation Loc) {
2205   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
2206   // global_tid);
2207   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2208   // Ignore return result until untied tasks are supported.
2209   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
2210 }
2211 
2212 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
2213                                            const RegionCodeGenTy &CodeGen) {
2214   InlinedOpenMPRegionRAII Region(CGF, CodeGen);
2215   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
2216 }
2217 
2218