1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CGCleanup.h"
17 #include "clang/AST/Decl.h"
18 #include "clang/AST/StmtOpenMP.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cassert>
26 
27 using namespace clang;
28 using namespace CodeGen;
29 
30 namespace {
31 /// \brief Base class for handling code generation inside OpenMP regions.
32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
33 public:
34   /// \brief Kinds of OpenMP regions used in codegen.
35   enum CGOpenMPRegionKind {
36     /// \brief Region with outlined function for standalone 'parallel'
37     /// directive.
38     ParallelOutlinedRegion,
39     /// \brief Region with outlined function for standalone 'task' directive.
40     TaskOutlinedRegion,
41     /// \brief Region for constructs that do not require function outlining,
42     /// like 'for', 'sections', 'atomic' etc. directives.
43     InlinedRegion,
44     /// \brief Region with outlined function for standalone 'target' directive.
45     TargetRegion,
46   };
47 
48   CGOpenMPRegionInfo(const CapturedStmt &CS,
49                      const CGOpenMPRegionKind RegionKind,
50                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
51                      bool HasCancel)
52       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
53         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
54 
55   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
56                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
57                      bool HasCancel)
58       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
59         Kind(Kind), HasCancel(HasCancel) {}
60 
61   /// \brief Get a variable or parameter for storing global thread id
62   /// inside OpenMP construct.
63   virtual const VarDecl *getThreadIDVariable() const = 0;
64 
65   /// \brief Emit the captured statement body.
66   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
67 
68   /// \brief Get an LValue for the current ThreadID variable.
69   /// \return LValue for thread id variable. This LValue always has type int32*.
70   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
71 
72   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
73 
74   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
75 
76   bool hasCancel() const { return HasCancel; }
77 
78   static bool classof(const CGCapturedStmtInfo *Info) {
79     return Info->getKind() == CR_OpenMP;
80   }
81 
82 protected:
83   CGOpenMPRegionKind RegionKind;
84   const RegionCodeGenTy &CodeGen;
85   OpenMPDirectiveKind Kind;
86   bool HasCancel;
87 };
88 
89 /// \brief API for captured statement code generation in OpenMP constructs.
90 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
91 public:
92   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
93                              const RegionCodeGenTy &CodeGen,
94                              OpenMPDirectiveKind Kind, bool HasCancel)
95       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
96                            HasCancel),
97         ThreadIDVar(ThreadIDVar) {
98     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
99   }
100   /// \brief Get a variable or parameter for storing global thread id
101   /// inside OpenMP construct.
102   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
103 
104   /// \brief Get the name of the capture helper.
105   StringRef getHelperName() const override { return ".omp_outlined."; }
106 
107   static bool classof(const CGCapturedStmtInfo *Info) {
108     return CGOpenMPRegionInfo::classof(Info) &&
109            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
110                ParallelOutlinedRegion;
111   }
112 
113 private:
114   /// \brief A variable or parameter storing global thread id for OpenMP
115   /// constructs.
116   const VarDecl *ThreadIDVar;
117 };
118 
119 /// \brief API for captured statement code generation in OpenMP constructs.
120 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
121 public:
122   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
123                                  const VarDecl *ThreadIDVar,
124                                  const RegionCodeGenTy &CodeGen,
125                                  OpenMPDirectiveKind Kind, bool HasCancel)
126       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
127         ThreadIDVar(ThreadIDVar) {
128     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
129   }
130   /// \brief Get a variable or parameter for storing global thread id
131   /// inside OpenMP construct.
132   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
133 
134   /// \brief Get an LValue for the current ThreadID variable.
135   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
136 
137   /// \brief Get the name of the capture helper.
138   StringRef getHelperName() const override { return ".omp_outlined."; }
139 
140   static bool classof(const CGCapturedStmtInfo *Info) {
141     return CGOpenMPRegionInfo::classof(Info) &&
142            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
143                TaskOutlinedRegion;
144   }
145 
146 private:
147   /// \brief A variable or parameter storing global thread id for OpenMP
148   /// constructs.
149   const VarDecl *ThreadIDVar;
150 };
151 
152 /// \brief API for inlined captured statement code generation in OpenMP
153 /// constructs.
154 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
155 public:
156   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
157                             const RegionCodeGenTy &CodeGen,
158                             OpenMPDirectiveKind Kind, bool HasCancel)
159       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
160         OldCSI(OldCSI),
161         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
162   // \brief Retrieve the value of the context parameter.
163   llvm::Value *getContextValue() const override {
164     if (OuterRegionInfo)
165       return OuterRegionInfo->getContextValue();
166     llvm_unreachable("No context value for inlined OpenMP region");
167   }
168   void setContextValue(llvm::Value *V) override {
169     if (OuterRegionInfo) {
170       OuterRegionInfo->setContextValue(V);
171       return;
172     }
173     llvm_unreachable("No context value for inlined OpenMP region");
174   }
175   /// \brief Lookup the captured field decl for a variable.
176   const FieldDecl *lookup(const VarDecl *VD) const override {
177     if (OuterRegionInfo)
178       return OuterRegionInfo->lookup(VD);
179     // If there is no outer outlined region,no need to lookup in a list of
180     // captured variables, we can use the original one.
181     return nullptr;
182   }
183   FieldDecl *getThisFieldDecl() const override {
184     if (OuterRegionInfo)
185       return OuterRegionInfo->getThisFieldDecl();
186     return nullptr;
187   }
188   /// \brief Get a variable or parameter for storing global thread id
189   /// inside OpenMP construct.
190   const VarDecl *getThreadIDVariable() const override {
191     if (OuterRegionInfo)
192       return OuterRegionInfo->getThreadIDVariable();
193     return nullptr;
194   }
195 
196   /// \brief Get the name of the capture helper.
197   StringRef getHelperName() const override {
198     if (auto *OuterRegionInfo = getOldCSI())
199       return OuterRegionInfo->getHelperName();
200     llvm_unreachable("No helper name for inlined OpenMP construct");
201   }
202 
203   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
208   }
209 
210 private:
211   /// \brief CodeGen info about outer OpenMP region.
212   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
213   CGOpenMPRegionInfo *OuterRegionInfo;
214 };
215 
216 /// \brief API for captured statement code generation in OpenMP target
217 /// constructs. For this captures, implicit parameters are used instead of the
218 /// captured fields.
219 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
220 public:
221   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
222                            const RegionCodeGenTy &CodeGen)
223       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
224                            /*HasCancel = */ false) {}
225 
226   /// \brief This is unused for target regions because each starts executing
227   /// with a single thread.
228   const VarDecl *getThreadIDVariable() const override { return nullptr; }
229 
230   /// \brief Get the name of the capture helper.
231   StringRef getHelperName() const override { return ".omp_offloading."; }
232 
233   static bool classof(const CGCapturedStmtInfo *Info) {
234     return CGOpenMPRegionInfo::classof(Info) &&
235            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
236   }
237 };
238 
239 /// \brief RAII for emitting code of OpenMP constructs.
240 class InlinedOpenMPRegionRAII {
241   CodeGenFunction &CGF;
242 
243 public:
244   /// \brief Constructs region for combined constructs.
245   /// \param CodeGen Code generation sequence for combined directives. Includes
246   /// a list of functions used for code generation of implicitly inlined
247   /// regions.
248   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
249                           OpenMPDirectiveKind Kind, bool HasCancel)
250       : CGF(CGF) {
251     // Start emission for the construct.
252     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
253         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
254   }
255   ~InlinedOpenMPRegionRAII() {
256     // Restore original CapturedStmtInfo only if we're done with code emission.
257     auto *OldCSI =
258         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
259     delete CGF.CapturedStmtInfo;
260     CGF.CapturedStmtInfo = OldCSI;
261   }
262 };
263 
264 } // anonymous namespace
265 
266 static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr,
267                                       QualType Ty) {
268   AlignmentSource Source;
269   CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source);
270   return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align),
271                             Ty->getPointeeType(), Source);
272 }
273 
274 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
275   return emitLoadOfPointerLValue(CGF,
276                                  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
277                                  getThreadIDVariable()->getType());
278 }
279 
280 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
281   // 1.2.2 OpenMP Language Terminology
282   // Structured block - An executable statement with a single entry at the
283   // top and a single exit at the bottom.
284   // The point of exit cannot be a branch out of the structured block.
285   // longjmp() and throw() must not violate the entry/exit criteria.
286   CGF.EHStack.pushTerminate();
287   {
288     CodeGenFunction::RunCleanupsScope Scope(CGF);
289     CodeGen(CGF);
290   }
291   CGF.EHStack.popTerminate();
292 }
293 
294 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
295     CodeGenFunction &CGF) {
296   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
297                             getThreadIDVariable()->getType(),
298                             AlignmentSource::Decl);
299 }
300 
301 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
302     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
303   IdentTy = llvm::StructType::create(
304       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
305       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
306       CGM.Int8PtrTy /* psource */, nullptr);
307   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
308   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
309                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
310   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
311   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
312 }
313 
314 void CGOpenMPRuntime::clear() {
315   InternalVars.clear();
316 }
317 
318 // Layout information for ident_t.
319 static CharUnits getIdentAlign(CodeGenModule &CGM) {
320   return CGM.getPointerAlign();
321 }
322 static CharUnits getIdentSize(CodeGenModule &CGM) {
323   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
324   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
325 }
326 static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) {
327   // All the fields except the last are i32, so this works beautifully.
328   return unsigned(Field) * CharUnits::fromQuantity(4);
329 }
330 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
331                                    CGOpenMPRuntime::IdentFieldIndex Field,
332                                    const llvm::Twine &Name = "") {
333   auto Offset = getOffsetOfIdentField(Field);
334   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
335 }
336 
337 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
338     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
339     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
340   assert(ThreadIDVar->getType()->isPointerType() &&
341          "thread id variable must be of type kmp_int32 *");
342   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
343   CodeGenFunction CGF(CGM, true);
344   bool HasCancel = false;
345   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
346     HasCancel = OPD->hasCancel();
347   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
348     HasCancel = OPSD->hasCancel();
349   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
350     HasCancel = OPFD->hasCancel();
351   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
352                                     HasCancel);
353   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
354   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
355 }
356 
357 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
358     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
359     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
360   assert(!ThreadIDVar->getType()->isPointerType() &&
361          "thread id variable must be of type kmp_int32 for tasks");
362   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
363   CodeGenFunction CGF(CGM, true);
364   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
365                                         InnermostKind,
366                                         cast<OMPTaskDirective>(D).hasCancel());
367   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
368   return CGF.GenerateCapturedStmtFunction(*CS);
369 }
370 
371 Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
372   CharUnits Align = getIdentAlign(CGM);
373   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
374   if (!Entry) {
375     if (!DefaultOpenMPPSource) {
376       // Initialize default location for psource field of ident_t structure of
377       // all ident_t objects. Format is ";file;function;line;column;;".
378       // Taken from
379       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
380       DefaultOpenMPPSource =
381           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
382       DefaultOpenMPPSource =
383           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
384     }
385     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
386         CGM.getModule(), IdentTy, /*isConstant*/ true,
387         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
388     DefaultOpenMPLocation->setUnnamedAddr(true);
389     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
390 
391     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
392     llvm::Constant *Values[] = {Zero,
393                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
394                                 Zero, Zero, DefaultOpenMPPSource};
395     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
396     DefaultOpenMPLocation->setInitializer(Init);
397     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
398   }
399   return Address(Entry, Align);
400 }
401 
402 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
403                                                  SourceLocation Loc,
404                                                  OpenMPLocationFlags Flags) {
405   // If no debug info is generated - return global default location.
406   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
407       Loc.isInvalid())
408     return getOrCreateDefaultLocation(Flags).getPointer();
409 
410   assert(CGF.CurFn && "No function in current CodeGenFunction.");
411 
412   Address LocValue = Address::invalid();
413   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
414   if (I != OpenMPLocThreadIDMap.end())
415     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
416 
417   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
418   // GetOpenMPThreadID was called before this routine.
419   if (!LocValue.isValid()) {
420     // Generate "ident_t .kmpc_loc.addr;"
421     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
422                                       ".kmpc_loc.addr");
423     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
424     Elem.second.DebugLoc = AI.getPointer();
425     LocValue = AI;
426 
427     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
428     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
429     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
430                              CGM.getSize(getIdentSize(CGF.CGM)));
431   }
432 
433   // char **psource = &.kmpc_loc_<flags>.addr.psource;
434   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
435 
436   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
437   if (OMPDebugLoc == nullptr) {
438     SmallString<128> Buffer2;
439     llvm::raw_svector_ostream OS2(Buffer2);
440     // Build debug location
441     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
442     OS2 << ";" << PLoc.getFilename() << ";";
443     if (const FunctionDecl *FD =
444             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
445       OS2 << FD->getQualifiedNameAsString();
446     }
447     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
448     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
449     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
450   }
451   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
452   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
453 
454   // Our callers always pass this to a runtime function, so for
455   // convenience, go ahead and return a naked pointer.
456   return LocValue.getPointer();
457 }
458 
459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
460                                           SourceLocation Loc) {
461   assert(CGF.CurFn && "No function in current CodeGenFunction.");
462 
463   llvm::Value *ThreadID = nullptr;
464   // Check whether we've already cached a load of the thread id in this
465   // function.
466   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
467   if (I != OpenMPLocThreadIDMap.end()) {
468     ThreadID = I->second.ThreadID;
469     if (ThreadID != nullptr)
470       return ThreadID;
471   }
472   if (auto OMPRegionInfo =
473           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
474     if (OMPRegionInfo->getThreadIDVariable()) {
475       // Check if this an outlined function with thread id passed as argument.
476       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
477       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
478       // If value loaded in entry block, cache it and use it everywhere in
479       // function.
480       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
481         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
482         Elem.second.ThreadID = ThreadID;
483       }
484       return ThreadID;
485     }
486   }
487 
488   // This is not an outlined function region - need to call __kmpc_int32
489   // kmpc_global_thread_num(ident_t *loc).
490   // Generate thread id value and cache this value for use across the
491   // function.
492   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
493   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
494   ThreadID =
495       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
496                           emitUpdateLocation(CGF, Loc));
497   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
498   Elem.second.ThreadID = ThreadID;
499   return ThreadID;
500 }
501 
502 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
503   assert(CGF.CurFn && "No function in current CodeGenFunction.");
504   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
505     OpenMPLocThreadIDMap.erase(CGF.CurFn);
506 }
507 
508 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
509   return llvm::PointerType::getUnqual(IdentTy);
510 }
511 
512 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
513   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
514 }
515 
516 llvm::Constant *
517 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
518   llvm::Constant *RTLFn = nullptr;
519   switch (Function) {
520   case OMPRTL__kmpc_fork_call: {
521     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
522     // microtask, ...);
523     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
524                                 getKmpc_MicroPointerTy()};
525     llvm::FunctionType *FnTy =
526         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
527     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
528     break;
529   }
530   case OMPRTL__kmpc_global_thread_num: {
531     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
532     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
533     llvm::FunctionType *FnTy =
534         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
535     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
536     break;
537   }
538   case OMPRTL__kmpc_threadprivate_cached: {
539     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
540     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
541     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
542                                 CGM.VoidPtrTy, CGM.SizeTy,
543                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
544     llvm::FunctionType *FnTy =
545         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
546     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
547     break;
548   }
549   case OMPRTL__kmpc_critical: {
550     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
551     // kmp_critical_name *crit);
552     llvm::Type *TypeParams[] = {
553         getIdentTyPointerTy(), CGM.Int32Ty,
554         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
555     llvm::FunctionType *FnTy =
556         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
557     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
558     break;
559   }
560   case OMPRTL__kmpc_threadprivate_register: {
561     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
562     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
563     // typedef void *(*kmpc_ctor)(void *);
564     auto KmpcCtorTy =
565         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
566                                 /*isVarArg*/ false)->getPointerTo();
567     // typedef void *(*kmpc_cctor)(void *, void *);
568     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
569     auto KmpcCopyCtorTy =
570         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
571                                 /*isVarArg*/ false)->getPointerTo();
572     // typedef void (*kmpc_dtor)(void *);
573     auto KmpcDtorTy =
574         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
575             ->getPointerTo();
576     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
577                               KmpcCopyCtorTy, KmpcDtorTy};
578     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
579                                         /*isVarArg*/ false);
580     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
581     break;
582   }
583   case OMPRTL__kmpc_end_critical: {
584     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
585     // kmp_critical_name *crit);
586     llvm::Type *TypeParams[] = {
587         getIdentTyPointerTy(), CGM.Int32Ty,
588         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
589     llvm::FunctionType *FnTy =
590         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
591     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
592     break;
593   }
594   case OMPRTL__kmpc_cancel_barrier: {
595     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
596     // global_tid);
597     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
598     llvm::FunctionType *FnTy =
599         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
600     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
601     break;
602   }
603   case OMPRTL__kmpc_barrier: {
604     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
605     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
606     llvm::FunctionType *FnTy =
607         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
608     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
609     break;
610   }
611   case OMPRTL__kmpc_for_static_fini: {
612     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
613     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
614     llvm::FunctionType *FnTy =
615         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
616     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
617     break;
618   }
619   case OMPRTL__kmpc_push_num_threads: {
620     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
621     // kmp_int32 num_threads)
622     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
623                                 CGM.Int32Ty};
624     llvm::FunctionType *FnTy =
625         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
626     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
627     break;
628   }
629   case OMPRTL__kmpc_serialized_parallel: {
630     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
631     // global_tid);
632     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
633     llvm::FunctionType *FnTy =
634         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
635     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
636     break;
637   }
638   case OMPRTL__kmpc_end_serialized_parallel: {
639     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
640     // global_tid);
641     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
642     llvm::FunctionType *FnTy =
643         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
644     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
645     break;
646   }
647   case OMPRTL__kmpc_flush: {
648     // Build void __kmpc_flush(ident_t *loc);
649     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
650     llvm::FunctionType *FnTy =
651         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
652     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
653     break;
654   }
655   case OMPRTL__kmpc_master: {
656     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
657     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
658     llvm::FunctionType *FnTy =
659         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
660     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
661     break;
662   }
663   case OMPRTL__kmpc_end_master: {
664     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
665     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
666     llvm::FunctionType *FnTy =
667         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
668     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
669     break;
670   }
671   case OMPRTL__kmpc_omp_taskyield: {
672     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
673     // int end_part);
674     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
675     llvm::FunctionType *FnTy =
676         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
677     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
678     break;
679   }
680   case OMPRTL__kmpc_single: {
681     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
682     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
683     llvm::FunctionType *FnTy =
684         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
685     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
686     break;
687   }
688   case OMPRTL__kmpc_end_single: {
689     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
690     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
691     llvm::FunctionType *FnTy =
692         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
693     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
694     break;
695   }
696   case OMPRTL__kmpc_omp_task_alloc: {
697     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
698     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
699     // kmp_routine_entry_t *task_entry);
700     assert(KmpRoutineEntryPtrTy != nullptr &&
701            "Type kmp_routine_entry_t must be created.");
702     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
703                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
704     // Return void * and then cast to particular kmp_task_t type.
705     llvm::FunctionType *FnTy =
706         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
707     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
708     break;
709   }
710   case OMPRTL__kmpc_omp_task: {
711     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
712     // *new_task);
713     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
714                                 CGM.VoidPtrTy};
715     llvm::FunctionType *FnTy =
716         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
717     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
718     break;
719   }
720   case OMPRTL__kmpc_copyprivate: {
721     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
722     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
723     // kmp_int32 didit);
724     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
725     auto *CpyFnTy =
726         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
727     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
728                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
729                                 CGM.Int32Ty};
730     llvm::FunctionType *FnTy =
731         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
732     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
733     break;
734   }
735   case OMPRTL__kmpc_reduce: {
736     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
737     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
738     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
739     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
740     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
741                                                /*isVarArg=*/false);
742     llvm::Type *TypeParams[] = {
743         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
744         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
745         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
746     llvm::FunctionType *FnTy =
747         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
748     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
749     break;
750   }
751   case OMPRTL__kmpc_reduce_nowait: {
752     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
753     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
754     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
755     // *lck);
756     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
757     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
758                                                /*isVarArg=*/false);
759     llvm::Type *TypeParams[] = {
760         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
761         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
762         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
763     llvm::FunctionType *FnTy =
764         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
765     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
766     break;
767   }
768   case OMPRTL__kmpc_end_reduce: {
769     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
770     // kmp_critical_name *lck);
771     llvm::Type *TypeParams[] = {
772         getIdentTyPointerTy(), CGM.Int32Ty,
773         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
774     llvm::FunctionType *FnTy =
775         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
776     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
777     break;
778   }
779   case OMPRTL__kmpc_end_reduce_nowait: {
780     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
781     // kmp_critical_name *lck);
782     llvm::Type *TypeParams[] = {
783         getIdentTyPointerTy(), CGM.Int32Ty,
784         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
785     llvm::FunctionType *FnTy =
786         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
787     RTLFn =
788         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
789     break;
790   }
791   case OMPRTL__kmpc_omp_task_begin_if0: {
792     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
793     // *new_task);
794     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
795                                 CGM.VoidPtrTy};
796     llvm::FunctionType *FnTy =
797         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
798     RTLFn =
799         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
800     break;
801   }
802   case OMPRTL__kmpc_omp_task_complete_if0: {
803     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
804     // *new_task);
805     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
806                                 CGM.VoidPtrTy};
807     llvm::FunctionType *FnTy =
808         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
809     RTLFn = CGM.CreateRuntimeFunction(FnTy,
810                                       /*Name=*/"__kmpc_omp_task_complete_if0");
811     break;
812   }
813   case OMPRTL__kmpc_ordered: {
814     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
815     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
816     llvm::FunctionType *FnTy =
817         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
818     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
819     break;
820   }
821   case OMPRTL__kmpc_end_ordered: {
822     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
823     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
824     llvm::FunctionType *FnTy =
825         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
826     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
827     break;
828   }
829   case OMPRTL__kmpc_omp_taskwait: {
830     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
831     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
832     llvm::FunctionType *FnTy =
833         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
834     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
835     break;
836   }
837   case OMPRTL__kmpc_taskgroup: {
838     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
839     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
840     llvm::FunctionType *FnTy =
841         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
842     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
843     break;
844   }
845   case OMPRTL__kmpc_end_taskgroup: {
846     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
847     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
848     llvm::FunctionType *FnTy =
849         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
850     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
851     break;
852   }
853   case OMPRTL__kmpc_push_proc_bind: {
854     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
855     // int proc_bind)
856     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
857     llvm::FunctionType *FnTy =
858         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
859     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
860     break;
861   }
862   case OMPRTL__kmpc_omp_task_with_deps: {
863     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
864     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
865     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
866     llvm::Type *TypeParams[] = {
867         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
868         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
869     llvm::FunctionType *FnTy =
870         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
871     RTLFn =
872         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
873     break;
874   }
875   case OMPRTL__kmpc_omp_wait_deps: {
876     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
877     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
878     // kmp_depend_info_t *noalias_dep_list);
879     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
880                                 CGM.Int32Ty,           CGM.VoidPtrTy,
881                                 CGM.Int32Ty,           CGM.VoidPtrTy};
882     llvm::FunctionType *FnTy =
883         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
884     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
885     break;
886   }
887   case OMPRTL__kmpc_cancellationpoint: {
888     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
889     // global_tid, kmp_int32 cncl_kind)
890     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
891     llvm::FunctionType *FnTy =
892         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
893     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
894     break;
895   }
896   case OMPRTL__kmpc_cancel: {
897     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
898     // kmp_int32 cncl_kind)
899     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
900     llvm::FunctionType *FnTy =
901         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
902     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
903     break;
904   }
905   case OMPRTL__tgt_target: {
906     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
907     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
908     // *arg_types);
909     llvm::Type *TypeParams[] = {CGM.Int32Ty,
910                                 CGM.VoidPtrTy,
911                                 CGM.Int32Ty,
912                                 CGM.VoidPtrPtrTy,
913                                 CGM.VoidPtrPtrTy,
914                                 CGM.SizeTy->getPointerTo(),
915                                 CGM.Int32Ty->getPointerTo()};
916     llvm::FunctionType *FnTy =
917         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
918     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
919     break;
920   }
921   }
922   return RTLFn;
923 }
924 
925 static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) {
926   auto &C = CGF.getContext();
927   llvm::Value *Size = nullptr;
928   auto SizeInChars = C.getTypeSizeInChars(Ty);
929   if (SizeInChars.isZero()) {
930     // getTypeSizeInChars() returns 0 for a VLA.
931     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
932       llvm::Value *ArraySize;
933       std::tie(ArraySize, Ty) = CGF.getVLASize(VAT);
934       Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
935     }
936     SizeInChars = C.getTypeSizeInChars(Ty);
937     assert(!SizeInChars.isZero());
938     Size = CGF.Builder.CreateNUWMul(
939         Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()));
940   } else
941     Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity());
942   return Size;
943 }
944 
945 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
946                                                              bool IVSigned) {
947   assert((IVSize == 32 || IVSize == 64) &&
948          "IV size is not compatible with the omp runtime");
949   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
950                                        : "__kmpc_for_static_init_4u")
951                            : (IVSigned ? "__kmpc_for_static_init_8"
952                                        : "__kmpc_for_static_init_8u");
953   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
954   auto PtrTy = llvm::PointerType::getUnqual(ITy);
955   llvm::Type *TypeParams[] = {
956     getIdentTyPointerTy(),                     // loc
957     CGM.Int32Ty,                               // tid
958     CGM.Int32Ty,                               // schedtype
959     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
960     PtrTy,                                     // p_lower
961     PtrTy,                                     // p_upper
962     PtrTy,                                     // p_stride
963     ITy,                                       // incr
964     ITy                                        // chunk
965   };
966   llvm::FunctionType *FnTy =
967       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
968   return CGM.CreateRuntimeFunction(FnTy, Name);
969 }
970 
971 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
972                                                             bool IVSigned) {
973   assert((IVSize == 32 || IVSize == 64) &&
974          "IV size is not compatible with the omp runtime");
975   auto Name =
976       IVSize == 32
977           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
978           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
979   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
980   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
981                                CGM.Int32Ty,           // tid
982                                CGM.Int32Ty,           // schedtype
983                                ITy,                   // lower
984                                ITy,                   // upper
985                                ITy,                   // stride
986                                ITy                    // chunk
987   };
988   llvm::FunctionType *FnTy =
989       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
990   return CGM.CreateRuntimeFunction(FnTy, Name);
991 }
992 
993 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
994                                                             bool IVSigned) {
995   assert((IVSize == 32 || IVSize == 64) &&
996          "IV size is not compatible with the omp runtime");
997   auto Name =
998       IVSize == 32
999           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1000           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1001   llvm::Type *TypeParams[] = {
1002       getIdentTyPointerTy(), // loc
1003       CGM.Int32Ty,           // tid
1004   };
1005   llvm::FunctionType *FnTy =
1006       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1007   return CGM.CreateRuntimeFunction(FnTy, Name);
1008 }
1009 
1010 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
1011                                                             bool IVSigned) {
1012   assert((IVSize == 32 || IVSize == 64) &&
1013          "IV size is not compatible with the omp runtime");
1014   auto Name =
1015       IVSize == 32
1016           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1017           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1018   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1019   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1020   llvm::Type *TypeParams[] = {
1021     getIdentTyPointerTy(),                     // loc
1022     CGM.Int32Ty,                               // tid
1023     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1024     PtrTy,                                     // p_lower
1025     PtrTy,                                     // p_upper
1026     PtrTy                                      // p_stride
1027   };
1028   llvm::FunctionType *FnTy =
1029       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1030   return CGM.CreateRuntimeFunction(FnTy, Name);
1031 }
1032 
1033 llvm::Constant *
1034 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1035   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1036          !CGM.getContext().getTargetInfo().isTLSSupported());
1037   // Lookup the entry, lazily creating it if necessary.
1038   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1039                                      Twine(CGM.getMangledName(VD)) + ".cache.");
1040 }
1041 
1042 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1043                                                 const VarDecl *VD,
1044                                                 Address VDAddr,
1045                                                 SourceLocation Loc) {
1046   if (CGM.getLangOpts().OpenMPUseTLS &&
1047       CGM.getContext().getTargetInfo().isTLSSupported())
1048     return VDAddr;
1049 
1050   auto VarTy = VDAddr.getElementType();
1051   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1052                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1053                                                        CGM.Int8PtrTy),
1054                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1055                          getOrCreateThreadPrivateCache(VD)};
1056   return Address(CGF.EmitRuntimeCall(
1057       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1058                  VDAddr.getAlignment());
1059 }
1060 
1061 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1062     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1063     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1064   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1065   // library.
1066   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1067   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1068                       OMPLoc);
1069   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1070   // to register constructor/destructor for variable.
1071   llvm::Value *Args[] = {OMPLoc,
1072                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1073                                                        CGM.VoidPtrTy),
1074                          Ctor, CopyCtor, Dtor};
1075   CGF.EmitRuntimeCall(
1076       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1077 }
1078 
1079 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1080     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1081     bool PerformInit, CodeGenFunction *CGF) {
1082   if (CGM.getLangOpts().OpenMPUseTLS &&
1083       CGM.getContext().getTargetInfo().isTLSSupported())
1084     return nullptr;
1085 
1086   VD = VD->getDefinition(CGM.getContext());
1087   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1088     ThreadPrivateWithDefinition.insert(VD);
1089     QualType ASTTy = VD->getType();
1090 
1091     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1092     auto Init = VD->getAnyInitializer();
1093     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1094       // Generate function that re-emits the declaration's initializer into the
1095       // threadprivate copy of the variable VD
1096       CodeGenFunction CtorCGF(CGM);
1097       FunctionArgList Args;
1098       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1099                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1100       Args.push_back(&Dst);
1101 
1102       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1103           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
1104           /*isVariadic=*/false);
1105       auto FTy = CGM.getTypes().GetFunctionType(FI);
1106       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1107           FTy, ".__kmpc_global_ctor_.", Loc);
1108       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1109                             Args, SourceLocation());
1110       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1111           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1112           CGM.getContext().VoidPtrTy, Dst.getLocation());
1113       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1114       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1115                                              CtorCGF.ConvertTypeForMem(ASTTy));
1116       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1117                                /*IsInitializer=*/true);
1118       ArgVal = CtorCGF.EmitLoadOfScalar(
1119           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1120           CGM.getContext().VoidPtrTy, Dst.getLocation());
1121       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1122       CtorCGF.FinishFunction();
1123       Ctor = Fn;
1124     }
1125     if (VD->getType().isDestructedType() != QualType::DK_none) {
1126       // Generate function that emits destructor call for the threadprivate copy
1127       // of the variable VD
1128       CodeGenFunction DtorCGF(CGM);
1129       FunctionArgList Args;
1130       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1131                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1132       Args.push_back(&Dst);
1133 
1134       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1135           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
1136           /*isVariadic=*/false);
1137       auto FTy = CGM.getTypes().GetFunctionType(FI);
1138       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1139           FTy, ".__kmpc_global_dtor_.", Loc);
1140       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1141                             SourceLocation());
1142       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1143           DtorCGF.GetAddrOfLocalVar(&Dst),
1144           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1145       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1146                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1147                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1148       DtorCGF.FinishFunction();
1149       Dtor = Fn;
1150     }
1151     // Do not emit init function if it is not required.
1152     if (!Ctor && !Dtor)
1153       return nullptr;
1154 
1155     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1156     auto CopyCtorTy =
1157         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1158                                 /*isVarArg=*/false)->getPointerTo();
1159     // Copying constructor for the threadprivate variable.
1160     // Must be NULL - reserved by runtime, but currently it requires that this
1161     // parameter is always NULL. Otherwise it fires assertion.
1162     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1163     if (Ctor == nullptr) {
1164       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1165                                             /*isVarArg=*/false)->getPointerTo();
1166       Ctor = llvm::Constant::getNullValue(CtorTy);
1167     }
1168     if (Dtor == nullptr) {
1169       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1170                                             /*isVarArg=*/false)->getPointerTo();
1171       Dtor = llvm::Constant::getNullValue(DtorTy);
1172     }
1173     if (!CGF) {
1174       auto InitFunctionTy =
1175           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1176       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1177           InitFunctionTy, ".__omp_threadprivate_init_.");
1178       CodeGenFunction InitCGF(CGM);
1179       FunctionArgList ArgList;
1180       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1181                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1182                             Loc);
1183       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1184       InitCGF.FinishFunction();
1185       return InitFunction;
1186     }
1187     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1188   }
1189   return nullptr;
1190 }
1191 
1192 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1193 /// function. Here is the logic:
1194 /// if (Cond) {
1195 ///   ThenGen();
1196 /// } else {
1197 ///   ElseGen();
1198 /// }
1199 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1200                             const RegionCodeGenTy &ThenGen,
1201                             const RegionCodeGenTy &ElseGen) {
1202   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1203 
1204   // If the condition constant folds and can be elided, try to avoid emitting
1205   // the condition and the dead arm of the if/else.
1206   bool CondConstant;
1207   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1208     CodeGenFunction::RunCleanupsScope Scope(CGF);
1209     if (CondConstant) {
1210       ThenGen(CGF);
1211     } else {
1212       ElseGen(CGF);
1213     }
1214     return;
1215   }
1216 
1217   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1218   // emit the conditional branch.
1219   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1220   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1221   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1222   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1223 
1224   // Emit the 'then' code.
1225   CGF.EmitBlock(ThenBlock);
1226   {
1227     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1228     ThenGen(CGF);
1229   }
1230   CGF.EmitBranch(ContBlock);
1231   // Emit the 'else' code if present.
1232   {
1233     // There is no need to emit line number for unconditional branch.
1234     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1235     CGF.EmitBlock(ElseBlock);
1236   }
1237   {
1238     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1239     ElseGen(CGF);
1240   }
1241   {
1242     // There is no need to emit line number for unconditional branch.
1243     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1244     CGF.EmitBranch(ContBlock);
1245   }
1246   // Emit the continuation block for code after the if.
1247   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1248 }
1249 
1250 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1251                                        llvm::Value *OutlinedFn,
1252                                        ArrayRef<llvm::Value *> CapturedVars,
1253                                        const Expr *IfCond) {
1254   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1255   auto &&ThenGen = [this, OutlinedFn, CapturedVars,
1256                     RTLoc](CodeGenFunction &CGF) {
1257     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1258     llvm::Value *Args[] = {
1259         RTLoc,
1260         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1261         CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
1262     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1263     RealArgs.append(std::begin(Args), std::end(Args));
1264     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1265 
1266     auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
1267     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1268   };
1269   auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc,
1270                     Loc](CodeGenFunction &CGF) {
1271     auto ThreadID = getThreadID(CGF, Loc);
1272     // Build calls:
1273     // __kmpc_serialized_parallel(&Loc, GTid);
1274     llvm::Value *Args[] = {RTLoc, ThreadID};
1275     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
1276                         Args);
1277 
1278     // OutlinedFn(&GTid, &zero, CapturedStruct);
1279     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
1280     Address ZeroAddr =
1281       CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1282                            /*Name*/ ".zero.addr");
1283     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1284     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1285     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1286     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1287     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1288     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1289 
1290     // __kmpc_end_serialized_parallel(&Loc, GTid);
1291     llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
1292     CGF.EmitRuntimeCall(
1293         createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
1294   };
1295   if (IfCond) {
1296     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1297   } else {
1298     CodeGenFunction::RunCleanupsScope Scope(CGF);
1299     ThenGen(CGF);
1300   }
1301 }
1302 
1303 // If we're inside an (outlined) parallel region, use the region info's
1304 // thread-ID variable (it is passed in a first argument of the outlined function
1305 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1306 // regular serial code region, get thread ID by calling kmp_int32
1307 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1308 // return the address of that temp.
1309 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1310                                              SourceLocation Loc) {
1311   if (auto OMPRegionInfo =
1312           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1313     if (OMPRegionInfo->getThreadIDVariable())
1314       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1315 
1316   auto ThreadID = getThreadID(CGF, Loc);
1317   auto Int32Ty =
1318       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1319   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1320   CGF.EmitStoreOfScalar(ThreadID,
1321                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1322 
1323   return ThreadIDTemp;
1324 }
1325 
1326 llvm::Constant *
1327 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1328                                              const llvm::Twine &Name) {
1329   SmallString<256> Buffer;
1330   llvm::raw_svector_ostream Out(Buffer);
1331   Out << Name;
1332   auto RuntimeName = Out.str();
1333   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1334   if (Elem.second) {
1335     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1336            "OMP internal variable has different type than requested");
1337     return &*Elem.second;
1338   }
1339 
1340   return Elem.second = new llvm::GlobalVariable(
1341              CGM.getModule(), Ty, /*IsConstant*/ false,
1342              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1343              Elem.first());
1344 }
1345 
1346 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1347   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1348   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1349 }
1350 
1351 namespace {
1352 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup {
1353   llvm::Value *Callee;
1354   llvm::Value *Args[N];
1355 
1356 public:
1357   CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
1358       : Callee(Callee) {
1359     assert(CleanupArgs.size() == N);
1360     std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
1361   }
1362   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1363     CGF.EmitRuntimeCall(Callee, Args);
1364   }
1365 };
1366 } // anonymous namespace
1367 
1368 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1369                                          StringRef CriticalName,
1370                                          const RegionCodeGenTy &CriticalOpGen,
1371                                          SourceLocation Loc) {
1372   // __kmpc_critical(ident_t *, gtid, Lock);
1373   // CriticalOpGen();
1374   // __kmpc_end_critical(ident_t *, gtid, Lock);
1375   // Prepare arguments and build a call to __kmpc_critical
1376   {
1377     CodeGenFunction::RunCleanupsScope Scope(CGF);
1378     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1379                            getCriticalRegionLock(CriticalName)};
1380     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1381     // Build a call to __kmpc_end_critical
1382     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1383         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1384         llvm::makeArrayRef(Args));
1385     emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
1386   }
1387 }
1388 
1389 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1390                        OpenMPDirectiveKind Kind, SourceLocation Loc,
1391                        const RegionCodeGenTy &BodyOpGen) {
1392   llvm::Value *CallBool = CGF.EmitScalarConversion(
1393       IfCond,
1394       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1395       CGF.getContext().BoolTy, Loc);
1396 
1397   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1398   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1399   // Generate the branch (If-stmt)
1400   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1401   CGF.EmitBlock(ThenBlock);
1402   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
1403   // Emit the rest of bblocks/branches
1404   CGF.EmitBranch(ContBlock);
1405   CGF.EmitBlock(ContBlock, true);
1406 }
1407 
1408 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1409                                        const RegionCodeGenTy &MasterOpGen,
1410                                        SourceLocation Loc) {
1411   // if(__kmpc_master(ident_t *, gtid)) {
1412   //   MasterOpGen();
1413   //   __kmpc_end_master(ident_t *, gtid);
1414   // }
1415   // Prepare arguments and build a call to __kmpc_master
1416   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1417   auto *IsMaster =
1418       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1419   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1420       MasterCallEndCleanup;
1421   emitIfStmt(
1422       CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void {
1423         CodeGenFunction::RunCleanupsScope Scope(CGF);
1424         CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
1425             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1426             llvm::makeArrayRef(Args));
1427         MasterOpGen(CGF);
1428       });
1429 }
1430 
1431 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1432                                         SourceLocation Loc) {
1433   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1434   llvm::Value *Args[] = {
1435       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1436       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1437   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1438 }
1439 
1440 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
1441                                           const RegionCodeGenTy &TaskgroupOpGen,
1442                                           SourceLocation Loc) {
1443   // __kmpc_taskgroup(ident_t *, gtid);
1444   // TaskgroupOpGen();
1445   // __kmpc_end_taskgroup(ident_t *, gtid);
1446   // Prepare arguments and build a call to __kmpc_taskgroup
1447   {
1448     CodeGenFunction::RunCleanupsScope Scope(CGF);
1449     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1450     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
1451     // Build a call to __kmpc_end_taskgroup
1452     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1453         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
1454         llvm::makeArrayRef(Args));
1455     emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
1456   }
1457 }
1458 
1459 /// Given an array of pointers to variables, project the address of a
1460 /// given variable.
1461 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
1462                                       unsigned Index, const VarDecl *Var) {
1463   // Pull out the pointer to the variable.
1464   Address PtrAddr =
1465       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
1466   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
1467 
1468   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
1469   Addr = CGF.Builder.CreateElementBitCast(
1470       Addr, CGF.ConvertTypeForMem(Var->getType()));
1471   return Addr;
1472 }
1473 
1474 static llvm::Value *emitCopyprivateCopyFunction(
1475     CodeGenModule &CGM, llvm::Type *ArgsType,
1476     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1477     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1478   auto &C = CGM.getContext();
1479   // void copy_func(void *LHSArg, void *RHSArg);
1480   FunctionArgList Args;
1481   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1482                            C.VoidPtrTy);
1483   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1484                            C.VoidPtrTy);
1485   Args.push_back(&LHSArg);
1486   Args.push_back(&RHSArg);
1487   FunctionType::ExtInfo EI;
1488   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1489       C.VoidTy, Args, EI, /*isVariadic=*/false);
1490   auto *Fn = llvm::Function::Create(
1491       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1492       ".omp.copyprivate.copy_func", &CGM.getModule());
1493   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
1494   CodeGenFunction CGF(CGM);
1495   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1496   // Dest = (void*[n])(LHSArg);
1497   // Src = (void*[n])(RHSArg);
1498   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1499       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
1500       ArgsType), CGF.getPointerAlign());
1501   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1502       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
1503       ArgsType), CGF.getPointerAlign());
1504   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1505   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1506   // ...
1507   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1508   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1509     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
1510     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
1511 
1512     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
1513     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
1514 
1515     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
1516     QualType Type = VD->getType();
1517     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
1518   }
1519   CGF.FinishFunction();
1520   return Fn;
1521 }
1522 
1523 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1524                                        const RegionCodeGenTy &SingleOpGen,
1525                                        SourceLocation Loc,
1526                                        ArrayRef<const Expr *> CopyprivateVars,
1527                                        ArrayRef<const Expr *> SrcExprs,
1528                                        ArrayRef<const Expr *> DstExprs,
1529                                        ArrayRef<const Expr *> AssignmentOps) {
1530   assert(CopyprivateVars.size() == SrcExprs.size() &&
1531          CopyprivateVars.size() == DstExprs.size() &&
1532          CopyprivateVars.size() == AssignmentOps.size());
1533   auto &C = CGM.getContext();
1534   // int32 did_it = 0;
1535   // if(__kmpc_single(ident_t *, gtid)) {
1536   //   SingleOpGen();
1537   //   __kmpc_end_single(ident_t *, gtid);
1538   //   did_it = 1;
1539   // }
1540   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1541   // <copy_func>, did_it);
1542 
1543   Address DidIt = Address::invalid();
1544   if (!CopyprivateVars.empty()) {
1545     // int32 did_it = 0;
1546     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1547     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1548     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
1549   }
1550   // Prepare arguments and build a call to __kmpc_single
1551   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1552   auto *IsSingle =
1553       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1554   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1555       SingleCallEndCleanup;
1556   emitIfStmt(
1557       CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void {
1558         CodeGenFunction::RunCleanupsScope Scope(CGF);
1559         CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
1560             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1561             llvm::makeArrayRef(Args));
1562         SingleOpGen(CGF);
1563         if (DidIt.isValid()) {
1564           // did_it = 1;
1565           CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
1566         }
1567       });
1568   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1569   // <copy_func>, did_it);
1570   if (DidIt.isValid()) {
1571     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1572     auto CopyprivateArrayTy =
1573         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1574                                /*IndexTypeQuals=*/0);
1575     // Create a list of all private variables for copyprivate.
1576     Address CopyprivateList =
1577         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1578     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1579       Address Elem = CGF.Builder.CreateConstArrayGEP(
1580           CopyprivateList, I, CGF.getPointerSize());
1581       CGF.Builder.CreateStore(
1582           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1583               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
1584           Elem);
1585     }
1586     // Build function that copies private values from single region to all other
1587     // threads in the corresponding parallel region.
1588     auto *CpyFn = emitCopyprivateCopyFunction(
1589         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1590         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1591     auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy);
1592     Address CL =
1593       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1594                                                       CGF.VoidPtrTy);
1595     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
1596     llvm::Value *Args[] = {
1597         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1598         getThreadID(CGF, Loc),        // i32 <gtid>
1599         BufSize,                      // size_t <buf_size>
1600         CL.getPointer(),              // void *<copyprivate list>
1601         CpyFn,                        // void (*) (void *, void *) <copy_func>
1602         DidItVal                      // i32 did_it
1603     };
1604     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1605   }
1606 }
1607 
1608 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
1609                                         const RegionCodeGenTy &OrderedOpGen,
1610                                         SourceLocation Loc, bool IsThreads) {
1611   // __kmpc_ordered(ident_t *, gtid);
1612   // OrderedOpGen();
1613   // __kmpc_end_ordered(ident_t *, gtid);
1614   // Prepare arguments and build a call to __kmpc_ordered
1615   CodeGenFunction::RunCleanupsScope Scope(CGF);
1616   if (IsThreads) {
1617     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1618     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
1619     // Build a call to __kmpc_end_ordered
1620     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1621         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
1622         llvm::makeArrayRef(Args));
1623   }
1624   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
1625 }
1626 
1627 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1628                                       OpenMPDirectiveKind Kind, bool EmitChecks,
1629                                       bool ForceSimpleCall) {
1630   // Build call __kmpc_cancel_barrier(loc, thread_id);
1631   // Build call __kmpc_barrier(loc, thread_id);
1632   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1633   if (Kind == OMPD_for) {
1634     Flags =
1635         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1636   } else if (Kind == OMPD_sections) {
1637     Flags = static_cast<OpenMPLocationFlags>(Flags |
1638                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
1639   } else if (Kind == OMPD_single) {
1640     Flags =
1641         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1642   } else if (Kind == OMPD_barrier) {
1643     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1644   } else {
1645     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1646   }
1647   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
1648   // thread_id);
1649   auto *OMPRegionInfo =
1650       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
1651   // Do not emit barrier call in the single directive emitted in some rare cases
1652   // for sections directives.
1653   if (OMPRegionInfo && OMPRegionInfo->getDirectiveKind() == OMPD_single)
1654     return;
1655   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1656                          getThreadID(CGF, Loc)};
1657   if (OMPRegionInfo) {
1658     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
1659       auto *Result = CGF.EmitRuntimeCall(
1660           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1661       if (EmitChecks) {
1662         // if (__kmpc_cancel_barrier()) {
1663         //   exit from construct;
1664         // }
1665         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
1666         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
1667         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
1668         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
1669         CGF.EmitBlock(ExitBB);
1670         //   exit from construct;
1671         auto CancelDestination =
1672             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
1673         CGF.EmitBranchThroughCleanup(CancelDestination);
1674         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
1675       }
1676       return;
1677     }
1678   }
1679   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
1680 }
1681 
1682 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1683 /// the enum sched_type in kmp.h).
1684 enum OpenMPSchedType {
1685   /// \brief Lower bound for default (unordered) versions.
1686   OMP_sch_lower = 32,
1687   OMP_sch_static_chunked = 33,
1688   OMP_sch_static = 34,
1689   OMP_sch_dynamic_chunked = 35,
1690   OMP_sch_guided_chunked = 36,
1691   OMP_sch_runtime = 37,
1692   OMP_sch_auto = 38,
1693   /// \brief Lower bound for 'ordered' versions.
1694   OMP_ord_lower = 64,
1695   OMP_ord_static_chunked = 65,
1696   OMP_ord_static = 66,
1697   OMP_ord_dynamic_chunked = 67,
1698   OMP_ord_guided_chunked = 68,
1699   OMP_ord_runtime = 69,
1700   OMP_ord_auto = 70,
1701   OMP_sch_default = OMP_sch_static,
1702 };
1703 
1704 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
1705 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1706                                           bool Chunked, bool Ordered) {
1707   switch (ScheduleKind) {
1708   case OMPC_SCHEDULE_static:
1709     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
1710                    : (Ordered ? OMP_ord_static : OMP_sch_static);
1711   case OMPC_SCHEDULE_dynamic:
1712     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
1713   case OMPC_SCHEDULE_guided:
1714     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
1715   case OMPC_SCHEDULE_runtime:
1716     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
1717   case OMPC_SCHEDULE_auto:
1718     return Ordered ? OMP_ord_auto : OMP_sch_auto;
1719   case OMPC_SCHEDULE_unknown:
1720     assert(!Chunked && "chunk was specified but schedule kind not known");
1721     return Ordered ? OMP_ord_static : OMP_sch_static;
1722   }
1723   llvm_unreachable("Unexpected runtime schedule");
1724 }
1725 
1726 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1727                                          bool Chunked) const {
1728   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
1729   return Schedule == OMP_sch_static;
1730 }
1731 
1732 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1733   auto Schedule =
1734       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
1735   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1736   return Schedule != OMP_sch_static;
1737 }
1738 
1739 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
1740                                           SourceLocation Loc,
1741                                           OpenMPScheduleClauseKind ScheduleKind,
1742                                           unsigned IVSize, bool IVSigned,
1743                                           bool Ordered, llvm::Value *UB,
1744                                           llvm::Value *Chunk) {
1745   OpenMPSchedType Schedule =
1746       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1747   assert(Ordered ||
1748          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
1749           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
1750   // Call __kmpc_dispatch_init(
1751   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1752   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1753   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1754 
1755   // If the Chunk was not specified in the clause - use default value 1.
1756   if (Chunk == nullptr)
1757     Chunk = CGF.Builder.getIntN(IVSize, 1);
1758   llvm::Value *Args[] = {
1759     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1760     getThreadID(CGF, Loc),
1761     CGF.Builder.getInt32(Schedule), // Schedule type
1762     CGF.Builder.getIntN(IVSize, 0), // Lower
1763     UB,                             // Upper
1764     CGF.Builder.getIntN(IVSize, 1), // Stride
1765     Chunk                           // Chunk
1766   };
1767   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1768 }
1769 
1770 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
1771                                         SourceLocation Loc,
1772                                         OpenMPScheduleClauseKind ScheduleKind,
1773                                         unsigned IVSize, bool IVSigned,
1774                                         bool Ordered, Address IL, Address LB,
1775                                         Address UB, Address ST,
1776                                         llvm::Value *Chunk) {
1777   OpenMPSchedType Schedule =
1778     getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1779   assert(!Ordered);
1780   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
1781          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked);
1782 
1783   // Call __kmpc_for_static_init(
1784   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1785   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1786   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1787   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1788   if (Chunk == nullptr) {
1789     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
1790            "expected static non-chunked schedule");
1791     // If the Chunk was not specified in the clause - use default value 1.
1792       Chunk = CGF.Builder.getIntN(IVSize, 1);
1793   } else {
1794     assert((Schedule == OMP_sch_static_chunked ||
1795             Schedule == OMP_ord_static_chunked) &&
1796            "expected static chunked schedule");
1797   }
1798   llvm::Value *Args[] = {
1799     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1800     getThreadID(CGF, Loc),
1801     CGF.Builder.getInt32(Schedule), // Schedule type
1802     IL.getPointer(),                // &isLastIter
1803     LB.getPointer(),                // &LB
1804     UB.getPointer(),                // &UB
1805     ST.getPointer(),                // &Stride
1806     CGF.Builder.getIntN(IVSize, 1), // Incr
1807     Chunk                           // Chunk
1808   };
1809   CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1810 }
1811 
1812 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
1813                                           SourceLocation Loc) {
1814   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1815   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1816                          getThreadID(CGF, Loc)};
1817   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1818                       Args);
1819 }
1820 
1821 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
1822                                                  SourceLocation Loc,
1823                                                  unsigned IVSize,
1824                                                  bool IVSigned) {
1825   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
1826   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1827                          getThreadID(CGF, Loc)};
1828   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
1829 }
1830 
1831 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1832                                           SourceLocation Loc, unsigned IVSize,
1833                                           bool IVSigned, Address IL,
1834                                           Address LB, Address UB,
1835                                           Address ST) {
1836   // Call __kmpc_dispatch_next(
1837   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1838   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1839   //          kmp_int[32|64] *p_stride);
1840   llvm::Value *Args[] = {
1841       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1842       IL.getPointer(), // &isLastIter
1843       LB.getPointer(), // &Lower
1844       UB.getPointer(), // &Upper
1845       ST.getPointer()  // &Stride
1846   };
1847   llvm::Value *Call =
1848       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1849   return CGF.EmitScalarConversion(
1850       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1851       CGF.getContext().BoolTy, Loc);
1852 }
1853 
1854 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1855                                            llvm::Value *NumThreads,
1856                                            SourceLocation Loc) {
1857   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1858   llvm::Value *Args[] = {
1859       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1860       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1861   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1862                       Args);
1863 }
1864 
1865 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
1866                                          OpenMPProcBindClauseKind ProcBind,
1867                                          SourceLocation Loc) {
1868   // Constants for proc bind value accepted by the runtime.
1869   enum ProcBindTy {
1870     ProcBindFalse = 0,
1871     ProcBindTrue,
1872     ProcBindMaster,
1873     ProcBindClose,
1874     ProcBindSpread,
1875     ProcBindIntel,
1876     ProcBindDefault
1877   } RuntimeProcBind;
1878   switch (ProcBind) {
1879   case OMPC_PROC_BIND_master:
1880     RuntimeProcBind = ProcBindMaster;
1881     break;
1882   case OMPC_PROC_BIND_close:
1883     RuntimeProcBind = ProcBindClose;
1884     break;
1885   case OMPC_PROC_BIND_spread:
1886     RuntimeProcBind = ProcBindSpread;
1887     break;
1888   case OMPC_PROC_BIND_unknown:
1889     llvm_unreachable("Unsupported proc_bind value.");
1890   }
1891   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
1892   llvm::Value *Args[] = {
1893       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1894       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
1895   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
1896 }
1897 
1898 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1899                                 SourceLocation Loc) {
1900   // Build call void __kmpc_flush(ident_t *loc)
1901   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1902                       emitUpdateLocation(CGF, Loc));
1903 }
1904 
1905 namespace {
1906 /// \brief Indexes of fields for type kmp_task_t.
1907 enum KmpTaskTFields {
1908   /// \brief List of shared variables.
1909   KmpTaskTShareds,
1910   /// \brief Task routine.
1911   KmpTaskTRoutine,
1912   /// \brief Partition id for the untied tasks.
1913   KmpTaskTPartId,
1914   /// \brief Function with call of destructors for private variables.
1915   KmpTaskTDestructors,
1916 };
1917 } // anonymous namespace
1918 
1919 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
1920   if (!KmpRoutineEntryPtrTy) {
1921     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
1922     auto &C = CGM.getContext();
1923     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
1924     FunctionProtoType::ExtProtoInfo EPI;
1925     KmpRoutineEntryPtrQTy = C.getPointerType(
1926         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
1927     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
1928   }
1929 }
1930 
1931 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1932                                        QualType FieldTy) {
1933   auto *Field = FieldDecl::Create(
1934       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1935       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1936       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1937   Field->setAccess(AS_public);
1938   DC->addDecl(Field);
1939   return Field;
1940 }
1941 
1942 namespace {
1943 struct PrivateHelpersTy {
1944   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
1945                    const VarDecl *PrivateElemInit)
1946       : Original(Original), PrivateCopy(PrivateCopy),
1947         PrivateElemInit(PrivateElemInit) {}
1948   const VarDecl *Original;
1949   const VarDecl *PrivateCopy;
1950   const VarDecl *PrivateElemInit;
1951 };
1952 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
1953 } // anonymous namespace
1954 
1955 static RecordDecl *
1956 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
1957   if (!Privates.empty()) {
1958     auto &C = CGM.getContext();
1959     // Build struct .kmp_privates_t. {
1960     //         /*  private vars  */
1961     //       };
1962     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
1963     RD->startDefinition();
1964     for (auto &&Pair : Privates) {
1965       auto *VD = Pair.second.Original;
1966       auto Type = VD->getType();
1967       Type = Type.getNonReferenceType();
1968       auto *FD = addFieldToRecordDecl(C, RD, Type);
1969       if (VD->hasAttrs()) {
1970         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
1971              E(VD->getAttrs().end());
1972              I != E; ++I)
1973           FD->addAttr(*I);
1974       }
1975     }
1976     RD->completeDefinition();
1977     return RD;
1978   }
1979   return nullptr;
1980 }
1981 
1982 static RecordDecl *
1983 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
1984                          QualType KmpRoutineEntryPointerQTy) {
1985   auto &C = CGM.getContext();
1986   // Build struct kmp_task_t {
1987   //         void *              shareds;
1988   //         kmp_routine_entry_t routine;
1989   //         kmp_int32           part_id;
1990   //         kmp_routine_entry_t destructors;
1991   //       };
1992   auto *RD = C.buildImplicitRecord("kmp_task_t");
1993   RD->startDefinition();
1994   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1995   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1996   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1997   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1998   RD->completeDefinition();
1999   return RD;
2000 }
2001 
2002 static RecordDecl *
2003 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2004                                      ArrayRef<PrivateDataTy> Privates) {
2005   auto &C = CGM.getContext();
2006   // Build struct kmp_task_t_with_privates {
2007   //         kmp_task_t task_data;
2008   //         .kmp_privates_t. privates;
2009   //       };
2010   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2011   RD->startDefinition();
2012   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2013   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
2014     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2015   }
2016   RD->completeDefinition();
2017   return RD;
2018 }
2019 
2020 /// \brief Emit a proxy function which accepts kmp_task_t as the second
2021 /// argument.
2022 /// \code
2023 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2024 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
2025 ///   tt->shareds);
2026 ///   return 0;
2027 /// }
2028 /// \endcode
2029 static llvm::Value *
2030 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2031                       QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
2032                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2033                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
2034                       llvm::Value *TaskPrivatesMap) {
2035   auto &C = CGM.getContext();
2036   FunctionArgList Args;
2037   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2038   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2039                                 /*Id=*/nullptr,
2040                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2041   Args.push_back(&GtidArg);
2042   Args.push_back(&TaskTypeArg);
2043   FunctionType::ExtInfo Info;
2044   auto &TaskEntryFnInfo =
2045       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2046                                                     /*isVariadic=*/false);
2047   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2048   auto *TaskEntry =
2049       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
2050                              ".omp_task_entry.", &CGM.getModule());
2051   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
2052   CodeGenFunction CGF(CGM);
2053   CGF.disableDebugInfo();
2054   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
2055 
2056   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
2057   // tt->task_data.shareds);
2058   auto *GtidParam = CGF.EmitLoadOfScalar(
2059       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
2060   LValue TDBase = emitLoadOfPointerLValue(
2061       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2062   auto *KmpTaskTWithPrivatesQTyRD =
2063       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2064   LValue Base =
2065       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
2066   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2067   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
2068   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
2069   auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
2070 
2071   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
2072   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
2073   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2074       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
2075       CGF.ConvertTypeForMem(SharedsPtrTy));
2076 
2077   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
2078   llvm::Value *PrivatesParam;
2079   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
2080     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
2081     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2082         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
2083   } else {
2084     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2085   }
2086 
2087   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
2088                              TaskPrivatesMap, SharedsParam};
2089   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
2090   CGF.EmitStoreThroughLValue(
2091       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
2092       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
2093   CGF.FinishFunction();
2094   return TaskEntry;
2095 }
2096 
2097 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
2098                                             SourceLocation Loc,
2099                                             QualType KmpInt32Ty,
2100                                             QualType KmpTaskTWithPrivatesPtrQTy,
2101                                             QualType KmpTaskTWithPrivatesQTy) {
2102   auto &C = CGM.getContext();
2103   FunctionArgList Args;
2104   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2105   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2106                                 /*Id=*/nullptr,
2107                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2108   Args.push_back(&GtidArg);
2109   Args.push_back(&TaskTypeArg);
2110   FunctionType::ExtInfo Info;
2111   auto &DestructorFnInfo =
2112       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2113                                                     /*isVariadic=*/false);
2114   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
2115   auto *DestructorFn =
2116       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
2117                              ".omp_task_destructor.", &CGM.getModule());
2118   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, DestructorFnInfo, DestructorFn);
2119   CodeGenFunction CGF(CGM);
2120   CGF.disableDebugInfo();
2121   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
2122                     Args);
2123 
2124   LValue Base = emitLoadOfPointerLValue(
2125       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2126   auto *KmpTaskTWithPrivatesQTyRD =
2127       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2128   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2129   Base = CGF.EmitLValueForField(Base, *FI);
2130   for (auto *Field :
2131        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
2132     if (auto DtorKind = Field->getType().isDestructedType()) {
2133       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
2134       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
2135     }
2136   }
2137   CGF.FinishFunction();
2138   return DestructorFn;
2139 }
2140 
2141 /// \brief Emit a privates mapping function for correct handling of private and
2142 /// firstprivate variables.
2143 /// \code
2144 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
2145 /// **noalias priv1,...,  <tyn> **noalias privn) {
2146 ///   *priv1 = &.privates.priv1;
2147 ///   ...;
2148 ///   *privn = &.privates.privn;
2149 /// }
2150 /// \endcode
2151 static llvm::Value *
2152 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
2153                                ArrayRef<const Expr *> PrivateVars,
2154                                ArrayRef<const Expr *> FirstprivateVars,
2155                                QualType PrivatesQTy,
2156                                ArrayRef<PrivateDataTy> Privates) {
2157   auto &C = CGM.getContext();
2158   FunctionArgList Args;
2159   ImplicitParamDecl TaskPrivatesArg(
2160       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2161       C.getPointerType(PrivatesQTy).withConst().withRestrict());
2162   Args.push_back(&TaskPrivatesArg);
2163   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
2164   unsigned Counter = 1;
2165   for (auto *E: PrivateVars) {
2166     Args.push_back(ImplicitParamDecl::Create(
2167         C, /*DC=*/nullptr, Loc,
2168         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2169                             .withConst()
2170                             .withRestrict()));
2171     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2172     PrivateVarsPos[VD] = Counter;
2173     ++Counter;
2174   }
2175   for (auto *E : FirstprivateVars) {
2176     Args.push_back(ImplicitParamDecl::Create(
2177         C, /*DC=*/nullptr, Loc,
2178         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2179                             .withConst()
2180                             .withRestrict()));
2181     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2182     PrivateVarsPos[VD] = Counter;
2183     ++Counter;
2184   }
2185   FunctionType::ExtInfo Info;
2186   auto &TaskPrivatesMapFnInfo =
2187       CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
2188                                                     /*isVariadic=*/false);
2189   auto *TaskPrivatesMapTy =
2190       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
2191   auto *TaskPrivatesMap = llvm::Function::Create(
2192       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
2193       ".omp_task_privates_map.", &CGM.getModule());
2194   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskPrivatesMapFnInfo,
2195                                 TaskPrivatesMap);
2196   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
2197   CodeGenFunction CGF(CGM);
2198   CGF.disableDebugInfo();
2199   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
2200                     TaskPrivatesMapFnInfo, Args);
2201 
2202   // *privi = &.privates.privi;
2203   LValue Base = emitLoadOfPointerLValue(
2204       CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType());
2205   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
2206   Counter = 0;
2207   for (auto *Field : PrivatesQTyRD->fields()) {
2208     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
2209     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
2210     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
2211     auto RefLoadLVal =
2212         emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType());
2213     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
2214     ++Counter;
2215   }
2216   CGF.FinishFunction();
2217   return TaskPrivatesMap;
2218 }
2219 
2220 static int array_pod_sort_comparator(const PrivateDataTy *P1,
2221                                      const PrivateDataTy *P2) {
2222   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
2223 }
2224 
2225 void CGOpenMPRuntime::emitTaskCall(
2226     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
2227     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
2228     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
2229     const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
2230     ArrayRef<const Expr *> PrivateCopies,
2231     ArrayRef<const Expr *> FirstprivateVars,
2232     ArrayRef<const Expr *> FirstprivateCopies,
2233     ArrayRef<const Expr *> FirstprivateInits,
2234     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
2235   auto &C = CGM.getContext();
2236   llvm::SmallVector<PrivateDataTy, 8> Privates;
2237   // Aggregate privates and sort them by the alignment.
2238   auto I = PrivateCopies.begin();
2239   for (auto *E : PrivateVars) {
2240     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2241     Privates.push_back(std::make_pair(
2242         C.getDeclAlign(VD),
2243         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2244                          /*PrivateElemInit=*/nullptr)));
2245     ++I;
2246   }
2247   I = FirstprivateCopies.begin();
2248   auto IElemInitRef = FirstprivateInits.begin();
2249   for (auto *E : FirstprivateVars) {
2250     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2251     Privates.push_back(std::make_pair(
2252         C.getDeclAlign(VD),
2253         PrivateHelpersTy(
2254             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2255             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
2256     ++I, ++IElemInitRef;
2257   }
2258   llvm::array_pod_sort(Privates.begin(), Privates.end(),
2259                        array_pod_sort_comparator);
2260   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2261   // Build type kmp_routine_entry_t (if not built yet).
2262   emitKmpRoutineEntryT(KmpInt32Ty);
2263   // Build type kmp_task_t (if not built yet).
2264   if (KmpTaskTQTy.isNull()) {
2265     KmpTaskTQTy = C.getRecordType(
2266         createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
2267   }
2268   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2269   // Build particular struct kmp_task_t for the given task.
2270   auto *KmpTaskTWithPrivatesQTyRD =
2271       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
2272   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
2273   QualType KmpTaskTWithPrivatesPtrQTy =
2274       C.getPointerType(KmpTaskTWithPrivatesQTy);
2275   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
2276   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
2277   auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy);
2278   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
2279 
2280   // Emit initial values for private copies (if any).
2281   llvm::Value *TaskPrivatesMap = nullptr;
2282   auto *TaskPrivatesMapTy =
2283       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
2284                 3)
2285           ->getType();
2286   if (!Privates.empty()) {
2287     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2288     TaskPrivatesMap = emitTaskPrivateMappingFunction(
2289         CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
2290     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2291         TaskPrivatesMap, TaskPrivatesMapTy);
2292   } else {
2293     TaskPrivatesMap = llvm::ConstantPointerNull::get(
2294         cast<llvm::PointerType>(TaskPrivatesMapTy));
2295   }
2296   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
2297   // kmp_task_t *tt);
2298   auto *TaskEntry = emitProxyTaskFunction(
2299       CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
2300       KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
2301 
2302   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2303   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2304   // kmp_routine_entry_t *task_entry);
2305   // Task flags. Format is taken from
2306   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
2307   // description of kmp_tasking_flags struct.
2308   const unsigned TiedFlag = 0x1;
2309   const unsigned FinalFlag = 0x2;
2310   unsigned Flags = Tied ? TiedFlag : 0;
2311   auto *TaskFlags =
2312       Final.getPointer()
2313           ? CGF.Builder.CreateSelect(Final.getPointer(),
2314                                      CGF.Builder.getInt32(FinalFlag),
2315                                      CGF.Builder.getInt32(/*C=*/0))
2316           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
2317   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
2318   auto *SharedsSize = getTypeSize(CGF, SharedsTy);
2319   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
2320                               getThreadID(CGF, Loc), TaskFlags,
2321                               KmpTaskTWithPrivatesTySize, SharedsSize,
2322                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2323                                   TaskEntry, KmpRoutineEntryPtrTy)};
2324   auto *NewTask = CGF.EmitRuntimeCall(
2325       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
2326   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2327       NewTask, KmpTaskTWithPrivatesPtrTy);
2328   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
2329                                                KmpTaskTWithPrivatesQTy);
2330   LValue TDBase =
2331       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
2332   // Fill the data in the resulting kmp_task_t record.
2333   // Copy shareds if there are any.
2334   Address KmpTaskSharedsPtr = Address::invalid();
2335   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
2336     KmpTaskSharedsPtr =
2337         Address(CGF.EmitLoadOfScalar(
2338                     CGF.EmitLValueForField(
2339                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
2340                                            KmpTaskTShareds)),
2341                     Loc),
2342                 CGF.getNaturalTypeAlignment(SharedsTy));
2343     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
2344   }
2345   // Emit initial values for private copies (if any).
2346   bool NeedsCleanup = false;
2347   if (!Privates.empty()) {
2348     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2349     auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
2350     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
2351     LValue SharedsBase;
2352     if (!FirstprivateVars.empty()) {
2353       SharedsBase = CGF.MakeAddrLValue(
2354           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2355               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
2356           SharedsTy);
2357     }
2358     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
2359         cast<CapturedStmt>(*D.getAssociatedStmt()));
2360     for (auto &&Pair : Privates) {
2361       auto *VD = Pair.second.PrivateCopy;
2362       auto *Init = VD->getAnyInitializer();
2363       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
2364       if (Init) {
2365         if (auto *Elem = Pair.second.PrivateElemInit) {
2366           auto *OriginalVD = Pair.second.Original;
2367           auto *SharedField = CapturesInfo.lookup(OriginalVD);
2368           auto SharedRefLValue =
2369               CGF.EmitLValueForField(SharedsBase, SharedField);
2370           SharedRefLValue = CGF.MakeAddrLValue(
2371               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
2372               SharedRefLValue.getType(), AlignmentSource::Decl);
2373           QualType Type = OriginalVD->getType();
2374           if (Type->isArrayType()) {
2375             // Initialize firstprivate array.
2376             if (!isa<CXXConstructExpr>(Init) ||
2377                 CGF.isTrivialInitializer(Init)) {
2378               // Perform simple memcpy.
2379               CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
2380                                       SharedRefLValue.getAddress(), Type);
2381             } else {
2382               // Initialize firstprivate array using element-by-element
2383               // intialization.
2384               CGF.EmitOMPAggregateAssign(
2385                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
2386                   Type, [&CGF, Elem, Init, &CapturesInfo](
2387                             Address DestElement, Address SrcElement) {
2388                     // Clean up any temporaries needed by the initialization.
2389                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
2390                     InitScope.addPrivate(Elem, [SrcElement]() -> Address {
2391                       return SrcElement;
2392                     });
2393                     (void)InitScope.Privatize();
2394                     // Emit initialization for single element.
2395                     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
2396                         CGF, &CapturesInfo);
2397                     CGF.EmitAnyExprToMem(Init, DestElement,
2398                                          Init->getType().getQualifiers(),
2399                                          /*IsInitializer=*/false);
2400                   });
2401             }
2402           } else {
2403             CodeGenFunction::OMPPrivateScope InitScope(CGF);
2404             InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
2405               return SharedRefLValue.getAddress();
2406             });
2407             (void)InitScope.Privatize();
2408             CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
2409             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
2410                                /*capturedByInit=*/false);
2411           }
2412         } else {
2413           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
2414         }
2415       }
2416       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
2417       ++FI;
2418     }
2419   }
2420   // Provide pointer to function with destructors for privates.
2421   llvm::Value *DestructorFn =
2422       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
2423                                              KmpTaskTWithPrivatesPtrQTy,
2424                                              KmpTaskTWithPrivatesQTy)
2425                    : llvm::ConstantPointerNull::get(
2426                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
2427   LValue Destructor = CGF.EmitLValueForField(
2428       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
2429   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2430                             DestructorFn, KmpRoutineEntryPtrTy),
2431                         Destructor);
2432 
2433   // Process list of dependences.
2434   Address DependenciesArray = Address::invalid();
2435   unsigned NumDependencies = Dependences.size();
2436   if (NumDependencies) {
2437     // Dependence kind for RTL.
2438     enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 };
2439     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
2440     RecordDecl *KmpDependInfoRD;
2441     QualType FlagsTy =
2442         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
2443     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
2444     if (KmpDependInfoTy.isNull()) {
2445       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
2446       KmpDependInfoRD->startDefinition();
2447       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
2448       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
2449       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
2450       KmpDependInfoRD->completeDefinition();
2451       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
2452     } else {
2453       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
2454     }
2455     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
2456     // Define type kmp_depend_info[<Dependences.size()>];
2457     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
2458         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
2459         ArrayType::Normal, /*IndexTypeQuals=*/0);
2460     // kmp_depend_info[<Dependences.size()>] deps;
2461     DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
2462     for (unsigned i = 0; i < NumDependencies; ++i) {
2463       const Expr *E = Dependences[i].second;
2464       auto Addr = CGF.EmitLValue(E);
2465       llvm::Value *Size;
2466       QualType Ty = E->getType();
2467       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
2468         LValue UpAddrLVal =
2469             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
2470         llvm::Value *UpAddr =
2471             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
2472         llvm::Value *LowIntPtr =
2473             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
2474         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
2475         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
2476       } else
2477         Size = getTypeSize(CGF, Ty);
2478       auto Base = CGF.MakeAddrLValue(
2479           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
2480           KmpDependInfoTy);
2481       // deps[i].base_addr = &<Dependences[i].second>;
2482       auto BaseAddrLVal = CGF.EmitLValueForField(
2483           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
2484       CGF.EmitStoreOfScalar(
2485           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
2486           BaseAddrLVal);
2487       // deps[i].len = sizeof(<Dependences[i].second>);
2488       auto LenLVal = CGF.EmitLValueForField(
2489           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
2490       CGF.EmitStoreOfScalar(Size, LenLVal);
2491       // deps[i].flags = <Dependences[i].first>;
2492       RTLDependenceKindTy DepKind;
2493       switch (Dependences[i].first) {
2494       case OMPC_DEPEND_in:
2495         DepKind = DepIn;
2496         break;
2497       case OMPC_DEPEND_out:
2498         DepKind = DepOut;
2499         break;
2500       case OMPC_DEPEND_inout:
2501         DepKind = DepInOut;
2502         break;
2503       case OMPC_DEPEND_unknown:
2504         llvm_unreachable("Unknown task dependence type");
2505       }
2506       auto FlagsLVal = CGF.EmitLValueForField(
2507           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
2508       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
2509                             FlagsLVal);
2510     }
2511     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2512         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
2513         CGF.VoidPtrTy);
2514   }
2515 
2516   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
2517   // libcall.
2518   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2519   // *new_task);
2520   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2521   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2522   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
2523   // list is not empty
2524   auto *ThreadID = getThreadID(CGF, Loc);
2525   auto *UpLoc = emitUpdateLocation(CGF, Loc);
2526   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
2527   llvm::Value *DepTaskArgs[7];
2528   if (NumDependencies) {
2529     DepTaskArgs[0] = UpLoc;
2530     DepTaskArgs[1] = ThreadID;
2531     DepTaskArgs[2] = NewTask;
2532     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
2533     DepTaskArgs[4] = DependenciesArray.getPointer();
2534     DepTaskArgs[5] = CGF.Builder.getInt32(0);
2535     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2536   }
2537   auto &&ThenCodeGen = [this, NumDependencies,
2538                         &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
2539     // TODO: add check for untied tasks.
2540     if (NumDependencies) {
2541       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
2542                           DepTaskArgs);
2543     } else {
2544       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
2545                           TaskArgs);
2546     }
2547   };
2548   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
2549       IfCallEndCleanup;
2550 
2551   llvm::Value *DepWaitTaskArgs[6];
2552   if (NumDependencies) {
2553     DepWaitTaskArgs[0] = UpLoc;
2554     DepWaitTaskArgs[1] = ThreadID;
2555     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
2556     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
2557     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
2558     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2559   }
2560   auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
2561                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
2562     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
2563     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2564     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
2565     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
2566     // is specified.
2567     if (NumDependencies)
2568       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
2569                           DepWaitTaskArgs);
2570     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
2571     // kmp_task_t *new_task);
2572     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
2573                         TaskArgs);
2574     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
2575     // kmp_task_t *new_task);
2576     CGF.EHStack.pushCleanup<IfCallEndCleanup>(
2577         NormalAndEHCleanup,
2578         createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
2579         llvm::makeArrayRef(TaskArgs));
2580 
2581     // Call proxy_task_entry(gtid, new_task);
2582     llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
2583     CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
2584   };
2585 
2586   if (IfCond) {
2587     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
2588   } else {
2589     CodeGenFunction::RunCleanupsScope Scope(CGF);
2590     ThenCodeGen(CGF);
2591   }
2592 }
2593 
2594 /// \brief Emit reduction operation for each element of array (required for
2595 /// array sections) LHS op = RHS.
2596 /// \param Type Type of array.
2597 /// \param LHSVar Variable on the left side of the reduction operation
2598 /// (references element of array in original variable).
2599 /// \param RHSVar Variable on the right side of the reduction operation
2600 /// (references element of array in original variable).
2601 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
2602 /// RHSVar.
2603 void EmitOMPAggregateReduction(
2604     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
2605     const VarDecl *RHSVar,
2606     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
2607                                   const Expr *, const Expr *)> &RedOpGen,
2608     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
2609     const Expr *UpExpr = nullptr) {
2610   // Perform element-by-element initialization.
2611   QualType ElementTy;
2612   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
2613   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
2614 
2615   // Drill down to the base element type on both arrays.
2616   auto ArrayTy = Type->getAsArrayTypeUnsafe();
2617   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
2618 
2619   auto RHSBegin = RHSAddr.getPointer();
2620   auto LHSBegin = LHSAddr.getPointer();
2621   // Cast from pointer to array type to pointer to single element.
2622   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
2623   // The basic structure here is a while-do loop.
2624   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
2625   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
2626   auto IsEmpty =
2627       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
2628   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
2629 
2630   // Enter the loop body, making that address the current address.
2631   auto EntryBB = CGF.Builder.GetInsertBlock();
2632   CGF.EmitBlock(BodyBB);
2633 
2634   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
2635 
2636   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
2637       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
2638   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
2639   Address RHSElementCurrent =
2640       Address(RHSElementPHI,
2641               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
2642 
2643   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
2644       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
2645   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
2646   Address LHSElementCurrent =
2647       Address(LHSElementPHI,
2648               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
2649 
2650   // Emit copy.
2651   CodeGenFunction::OMPPrivateScope Scope(CGF);
2652   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
2653   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
2654   Scope.Privatize();
2655   RedOpGen(CGF, XExpr, EExpr, UpExpr);
2656   Scope.ForceCleanup();
2657 
2658   // Shift the address forward by one element.
2659   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
2660       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
2661   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
2662       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
2663   // Check whether we've reached the end.
2664   auto Done =
2665       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
2666   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
2667   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
2668   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
2669 
2670   // Done.
2671   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
2672 }
2673 
2674 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
2675                                           llvm::Type *ArgsType,
2676                                           ArrayRef<const Expr *> Privates,
2677                                           ArrayRef<const Expr *> LHSExprs,
2678                                           ArrayRef<const Expr *> RHSExprs,
2679                                           ArrayRef<const Expr *> ReductionOps) {
2680   auto &C = CGM.getContext();
2681 
2682   // void reduction_func(void *LHSArg, void *RHSArg);
2683   FunctionArgList Args;
2684   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2685                            C.VoidPtrTy);
2686   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2687                            C.VoidPtrTy);
2688   Args.push_back(&LHSArg);
2689   Args.push_back(&RHSArg);
2690   FunctionType::ExtInfo EI;
2691   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
2692       C.VoidTy, Args, EI, /*isVariadic=*/false);
2693   auto *Fn = llvm::Function::Create(
2694       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2695       ".omp.reduction.reduction_func", &CGM.getModule());
2696   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
2697   CodeGenFunction CGF(CGM);
2698   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2699 
2700   // Dst = (void*[n])(LHSArg);
2701   // Src = (void*[n])(RHSArg);
2702   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2703       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2704       ArgsType), CGF.getPointerAlign());
2705   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2706       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2707       ArgsType), CGF.getPointerAlign());
2708 
2709   //  ...
2710   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
2711   //  ...
2712   CodeGenFunction::OMPPrivateScope Scope(CGF);
2713   auto IPriv = Privates.begin();
2714   unsigned Idx = 0;
2715   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
2716     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
2717     Scope.addPrivate(RHSVar, [&]() -> Address {
2718       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
2719     });
2720     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
2721     Scope.addPrivate(LHSVar, [&]() -> Address {
2722       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
2723     });
2724     QualType PrivTy = (*IPriv)->getType();
2725     if (PrivTy->isArrayType()) {
2726       // Get array size and emit VLA type.
2727       ++Idx;
2728       Address Elem =
2729           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
2730       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
2731       CodeGenFunction::OpaqueValueMapping OpaqueMap(
2732           CGF,
2733           cast<OpaqueValueExpr>(
2734               CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()),
2735           RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
2736       CGF.EmitVariablyModifiedType(PrivTy);
2737     }
2738   }
2739   Scope.Privatize();
2740   IPriv = Privates.begin();
2741   auto ILHS = LHSExprs.begin();
2742   auto IRHS = RHSExprs.begin();
2743   for (auto *E : ReductionOps) {
2744     if ((*IPriv)->getType()->isArrayType()) {
2745       // Emit reduction for array section.
2746       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
2747       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
2748       EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
2749                                 [=](CodeGenFunction &CGF, const Expr *,
2750                                     const Expr *,
2751                                     const Expr *) { CGF.EmitIgnoredExpr(E); });
2752     } else
2753       // Emit reduction for array subscript or single variable.
2754       CGF.EmitIgnoredExpr(E);
2755     ++IPriv, ++ILHS, ++IRHS;
2756   }
2757   Scope.ForceCleanup();
2758   CGF.FinishFunction();
2759   return Fn;
2760 }
2761 
2762 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
2763                                     ArrayRef<const Expr *> Privates,
2764                                     ArrayRef<const Expr *> LHSExprs,
2765                                     ArrayRef<const Expr *> RHSExprs,
2766                                     ArrayRef<const Expr *> ReductionOps,
2767                                     bool WithNowait, bool SimpleReduction) {
2768   // Next code should be emitted for reduction:
2769   //
2770   // static kmp_critical_name lock = { 0 };
2771   //
2772   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
2773   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
2774   //  ...
2775   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
2776   //  *(Type<n>-1*)rhs[<n>-1]);
2777   // }
2778   //
2779   // ...
2780   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
2781   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2782   // RedList, reduce_func, &<lock>)) {
2783   // case 1:
2784   //  ...
2785   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2786   //  ...
2787   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2788   // break;
2789   // case 2:
2790   //  ...
2791   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2792   //  ...
2793   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
2794   // break;
2795   // default:;
2796   // }
2797   //
2798   // if SimpleReduction is true, only the next code is generated:
2799   //  ...
2800   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2801   //  ...
2802 
2803   auto &C = CGM.getContext();
2804 
2805   if (SimpleReduction) {
2806     CodeGenFunction::RunCleanupsScope Scope(CGF);
2807     auto IPriv = Privates.begin();
2808     auto ILHS = LHSExprs.begin();
2809     auto IRHS = RHSExprs.begin();
2810     for (auto *E : ReductionOps) {
2811       if ((*IPriv)->getType()->isArrayType()) {
2812         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
2813         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
2814         EmitOMPAggregateReduction(
2815             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
2816             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
2817                 const Expr *) { CGF.EmitIgnoredExpr(E); });
2818       } else
2819         CGF.EmitIgnoredExpr(E);
2820       ++IPriv, ++ILHS, ++IRHS;
2821     }
2822     return;
2823   }
2824 
2825   // 1. Build a list of reduction variables.
2826   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
2827   auto Size = RHSExprs.size();
2828   for (auto *E : Privates) {
2829     if (E->getType()->isArrayType())
2830       // Reserve place for array size.
2831       ++Size;
2832   }
2833   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
2834   QualType ReductionArrayTy =
2835       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2836                              /*IndexTypeQuals=*/0);
2837   Address ReductionList =
2838       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
2839   auto IPriv = Privates.begin();
2840   unsigned Idx = 0;
2841   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
2842     Address Elem =
2843       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
2844     CGF.Builder.CreateStore(
2845         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2846             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
2847         Elem);
2848     if ((*IPriv)->getType()->isArrayType()) {
2849       // Store array size.
2850       ++Idx;
2851       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
2852                                              CGF.getPointerSize());
2853       CGF.Builder.CreateStore(
2854           CGF.Builder.CreateIntToPtr(
2855               CGF.Builder.CreateIntCast(
2856                   CGF.getVLASize(CGF.getContext().getAsVariableArrayType(
2857                                      (*IPriv)->getType()))
2858                       .first,
2859                   CGF.SizeTy, /*isSigned=*/false),
2860               CGF.VoidPtrTy),
2861           Elem);
2862     }
2863   }
2864 
2865   // 2. Emit reduce_func().
2866   auto *ReductionFn = emitReductionFunction(
2867       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
2868       LHSExprs, RHSExprs, ReductionOps);
2869 
2870   // 3. Create static kmp_critical_name lock = { 0 };
2871   auto *Lock = getCriticalRegionLock(".reduction");
2872 
2873   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2874   // RedList, reduce_func, &<lock>);
2875   auto *IdentTLoc = emitUpdateLocation(
2876       CGF, Loc,
2877       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
2878   auto *ThreadId = getThreadID(CGF, Loc);
2879   auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy);
2880   auto *RL =
2881     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
2882                                                     CGF.VoidPtrTy);
2883   llvm::Value *Args[] = {
2884       IdentTLoc,                             // ident_t *<loc>
2885       ThreadId,                              // i32 <gtid>
2886       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
2887       ReductionArrayTySize,                  // size_type sizeof(RedList)
2888       RL,                                    // void *RedList
2889       ReductionFn, // void (*) (void *, void *) <reduce_func>
2890       Lock         // kmp_critical_name *&<lock>
2891   };
2892   auto Res = CGF.EmitRuntimeCall(
2893       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
2894                                        : OMPRTL__kmpc_reduce),
2895       Args);
2896 
2897   // 5. Build switch(res)
2898   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
2899   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
2900 
2901   // 6. Build case 1:
2902   //  ...
2903   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2904   //  ...
2905   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2906   // break;
2907   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
2908   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
2909   CGF.EmitBlock(Case1BB);
2910 
2911   {
2912     CodeGenFunction::RunCleanupsScope Scope(CGF);
2913     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2914     llvm::Value *EndArgs[] = {
2915         IdentTLoc, // ident_t *<loc>
2916         ThreadId,  // i32 <gtid>
2917         Lock       // kmp_critical_name *&<lock>
2918     };
2919     CGF.EHStack
2920         .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2921             NormalAndEHCleanup,
2922             createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
2923                                              : OMPRTL__kmpc_end_reduce),
2924             llvm::makeArrayRef(EndArgs));
2925     auto IPriv = Privates.begin();
2926     auto ILHS = LHSExprs.begin();
2927     auto IRHS = RHSExprs.begin();
2928     for (auto *E : ReductionOps) {
2929       if ((*IPriv)->getType()->isArrayType()) {
2930         // Emit reduction for array section.
2931         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
2932         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
2933         EmitOMPAggregateReduction(
2934             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
2935             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
2936                 const Expr *) { CGF.EmitIgnoredExpr(E); });
2937       } else
2938         // Emit reduction for array subscript or single variable.
2939         CGF.EmitIgnoredExpr(E);
2940       ++IPriv, ++ILHS, ++IRHS;
2941     }
2942   }
2943 
2944   CGF.EmitBranch(DefaultBB);
2945 
2946   // 7. Build case 2:
2947   //  ...
2948   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2949   //  ...
2950   // break;
2951   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
2952   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
2953   CGF.EmitBlock(Case2BB);
2954 
2955   {
2956     CodeGenFunction::RunCleanupsScope Scope(CGF);
2957     if (!WithNowait) {
2958       // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
2959       llvm::Value *EndArgs[] = {
2960           IdentTLoc, // ident_t *<loc>
2961           ThreadId,  // i32 <gtid>
2962           Lock       // kmp_critical_name *&<lock>
2963       };
2964       CGF.EHStack
2965           .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2966               NormalAndEHCleanup,
2967               createRuntimeFunction(OMPRTL__kmpc_end_reduce),
2968               llvm::makeArrayRef(EndArgs));
2969     }
2970     auto ILHS = LHSExprs.begin();
2971     auto IRHS = RHSExprs.begin();
2972     auto IPriv = Privates.begin();
2973     for (auto *E : ReductionOps) {
2974         const Expr *XExpr = nullptr;
2975         const Expr *EExpr = nullptr;
2976         const Expr *UpExpr = nullptr;
2977         BinaryOperatorKind BO = BO_Comma;
2978         if (auto *BO = dyn_cast<BinaryOperator>(E)) {
2979           if (BO->getOpcode() == BO_Assign) {
2980             XExpr = BO->getLHS();
2981             UpExpr = BO->getRHS();
2982           }
2983         }
2984         // Try to emit update expression as a simple atomic.
2985         auto *RHSExpr = UpExpr;
2986         if (RHSExpr) {
2987           // Analyze RHS part of the whole expression.
2988           if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
2989                   RHSExpr->IgnoreParenImpCasts())) {
2990             // If this is a conditional operator, analyze its condition for
2991             // min/max reduction operator.
2992             RHSExpr = ACO->getCond();
2993           }
2994           if (auto *BORHS =
2995                   dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
2996             EExpr = BORHS->getRHS();
2997             BO = BORHS->getOpcode();
2998           }
2999         }
3000         if (XExpr) {
3001           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3002           auto &&AtomicRedGen = [this, BO, VD, IPriv,
3003                                  Loc](CodeGenFunction &CGF, const Expr *XExpr,
3004                                       const Expr *EExpr, const Expr *UpExpr) {
3005             LValue X = CGF.EmitLValue(XExpr);
3006             RValue E;
3007             if (EExpr)
3008               E = CGF.EmitAnyExpr(EExpr);
3009             CGF.EmitOMPAtomicSimpleUpdateExpr(
3010                 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
3011                 [&CGF, UpExpr, VD, IPriv](RValue XRValue) {
3012                   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3013                   PrivateScope.addPrivate(VD, [&CGF, VD, XRValue]() -> Address {
3014                     Address LHSTemp = CGF.CreateMemTemp(VD->getType());
3015                     CGF.EmitStoreThroughLValue(
3016                         XRValue, CGF.MakeAddrLValue(LHSTemp, VD->getType()));
3017                     return LHSTemp;
3018                   });
3019                   (void)PrivateScope.Privatize();
3020                   return CGF.EmitAnyExpr(UpExpr);
3021                 });
3022           };
3023           if ((*IPriv)->getType()->isArrayType()) {
3024             // Emit atomic reduction for array section.
3025             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3026             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
3027                                       AtomicRedGen, XExpr, EExpr, UpExpr);
3028           } else
3029             // Emit atomic reduction for array subscript or single variable.
3030             AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
3031         } else {
3032           // Emit as a critical region.
3033           auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *,
3034                                              const Expr *, const Expr *) {
3035             emitCriticalRegion(
3036                 CGF, ".atomic_reduction",
3037                 [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc);
3038           };
3039           if ((*IPriv)->getType()->isArrayType()) {
3040             auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3041             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3042             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3043                                       CritRedGen);
3044           } else
3045             CritRedGen(CGF, nullptr, nullptr, nullptr);
3046         }
3047       ++ILHS, ++IRHS, ++IPriv;
3048     }
3049   }
3050 
3051   CGF.EmitBranch(DefaultBB);
3052   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
3053 }
3054 
3055 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
3056                                        SourceLocation Loc) {
3057   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
3058   // global_tid);
3059   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3060   // Ignore return result until untied tasks are supported.
3061   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
3062 }
3063 
3064 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
3065                                            OpenMPDirectiveKind InnerKind,
3066                                            const RegionCodeGenTy &CodeGen,
3067                                            bool HasCancel) {
3068   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
3069   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
3070 }
3071 
3072 namespace {
3073 enum RTCancelKind {
3074   CancelNoreq = 0,
3075   CancelParallel = 1,
3076   CancelLoop = 2,
3077   CancelSections = 3,
3078   CancelTaskgroup = 4
3079 };
3080 }
3081 
3082 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
3083   RTCancelKind CancelKind = CancelNoreq;
3084   if (CancelRegion == OMPD_parallel)
3085     CancelKind = CancelParallel;
3086   else if (CancelRegion == OMPD_for)
3087     CancelKind = CancelLoop;
3088   else if (CancelRegion == OMPD_sections)
3089     CancelKind = CancelSections;
3090   else {
3091     assert(CancelRegion == OMPD_taskgroup);
3092     CancelKind = CancelTaskgroup;
3093   }
3094   return CancelKind;
3095 }
3096 
3097 void CGOpenMPRuntime::emitCancellationPointCall(
3098     CodeGenFunction &CGF, SourceLocation Loc,
3099     OpenMPDirectiveKind CancelRegion) {
3100   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
3101   // global_tid, kmp_int32 cncl_kind);
3102   if (auto *OMPRegionInfo =
3103           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3104     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
3105       return;
3106     if (OMPRegionInfo->hasCancel()) {
3107       llvm::Value *Args[] = {
3108           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3109           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
3110       // Ignore return result until untied tasks are supported.
3111       auto *Result = CGF.EmitRuntimeCall(
3112           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
3113       // if (__kmpc_cancellationpoint()) {
3114       //  __kmpc_cancel_barrier();
3115       //   exit from construct;
3116       // }
3117       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3118       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3119       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3120       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3121       CGF.EmitBlock(ExitBB);
3122       // __kmpc_cancel_barrier();
3123       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
3124       // exit from construct;
3125       auto CancelDest =
3126           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3127       CGF.EmitBranchThroughCleanup(CancelDest);
3128       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3129     }
3130   }
3131 }
3132 
3133 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
3134                                      const Expr *IfCond,
3135                                      OpenMPDirectiveKind CancelRegion) {
3136   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
3137   // kmp_int32 cncl_kind);
3138   if (auto *OMPRegionInfo =
3139           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3140     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
3141       return;
3142     auto &&ThenGen = [this, Loc, CancelRegion,
3143                       OMPRegionInfo](CodeGenFunction &CGF) {
3144       llvm::Value *Args[] = {
3145           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3146           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
3147       // Ignore return result until untied tasks are supported.
3148       auto *Result =
3149           CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
3150       // if (__kmpc_cancel()) {
3151       //  __kmpc_cancel_barrier();
3152       //   exit from construct;
3153       // }
3154       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3155       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3156       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3157       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3158       CGF.EmitBlock(ExitBB);
3159       // __kmpc_cancel_barrier();
3160       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
3161       // exit from construct;
3162       auto CancelDest =
3163           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3164       CGF.EmitBranchThroughCleanup(CancelDest);
3165       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3166     };
3167     if (IfCond)
3168       emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {});
3169     else
3170       ThenGen(CGF);
3171   }
3172 }
3173 
3174 llvm::Value *
3175 CGOpenMPRuntime::emitTargetOutlinedFunction(const OMPExecutableDirective &D,
3176                                             const RegionCodeGenTy &CodeGen) {
3177   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3178 
3179   CodeGenFunction CGF(CGM, true);
3180   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen);
3181   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
3182   return CGF.GenerateOpenMPCapturedStmtFunction(CS, /*UseOnlyReferences=*/true);
3183 }
3184 
3185 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
3186                                      const OMPExecutableDirective &D,
3187                                      llvm::Value *OutlinedFn,
3188                                      const Expr *IfCond, const Expr *Device,
3189                                      ArrayRef<llvm::Value *> CapturedVars) {
3190   /// \brief Values for bit flags used to specify the mapping type for
3191   /// offloading.
3192   enum OpenMPOffloadMappingFlags {
3193     /// \brief Allocate memory on the device and move data from host to device.
3194     OMP_MAP_TO = 0x01,
3195     /// \brief Allocate memory on the device and move data from device to host.
3196     OMP_MAP_FROM = 0x02,
3197   };
3198 
3199   enum OpenMPOffloadingReservedDeviceIDs {
3200     /// \brief Device ID if the device was not defined, runtime should get it
3201     /// from environment variables in the spec.
3202     OMP_DEVICEID_UNDEF = -1,
3203   };
3204 
3205   // Fill up the arrays with the all the captured variables.
3206   SmallVector<llvm::Value *, 16> BasePointers;
3207   SmallVector<llvm::Value *, 16> Pointers;
3208   SmallVector<llvm::Value *, 16> Sizes;
3209   SmallVector<unsigned, 16> MapTypes;
3210 
3211   bool hasVLACaptures = false;
3212 
3213   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3214   auto RI = CS.getCapturedRecordDecl()->field_begin();
3215   // auto II = CS.capture_init_begin();
3216   auto CV = CapturedVars.begin();
3217   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
3218                                             CE = CS.capture_end();
3219        CI != CE; ++CI, ++RI, ++CV) {
3220     StringRef Name;
3221     QualType Ty;
3222     llvm::Value *BasePointer;
3223     llvm::Value *Pointer;
3224     llvm::Value *Size;
3225     unsigned MapType;
3226 
3227     if (CI->capturesVariableArrayType()) {
3228       BasePointer = Pointer = *CV;
3229       Size = getTypeSize(CGF, RI->getType());
3230       hasVLACaptures = true;
3231       // VLA sizes don't need to be copied back from the device.
3232       MapType = OMP_MAP_TO;
3233     } else if (CI->capturesThis()) {
3234       BasePointer = Pointer = *CV;
3235       const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr());
3236       Size = getTypeSize(CGF, PtrTy->getPointeeType());
3237       // Default map type.
3238       MapType = OMP_MAP_TO | OMP_MAP_FROM;
3239     } else {
3240       BasePointer = Pointer = *CV;
3241 
3242       const ReferenceType *PtrTy =
3243           cast<ReferenceType>(RI->getType().getTypePtr());
3244       QualType ElementType = PtrTy->getPointeeType();
3245       Size = getTypeSize(CGF, ElementType);
3246       // Default map type.
3247       MapType = OMP_MAP_TO | OMP_MAP_FROM;
3248     }
3249 
3250     BasePointers.push_back(BasePointer);
3251     Pointers.push_back(Pointer);
3252     Sizes.push_back(Size);
3253     MapTypes.push_back(MapType);
3254   }
3255 
3256   // Keep track on whether the host function has to be executed.
3257   auto OffloadErrorQType =
3258       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
3259   auto OffloadError = CGF.MakeAddrLValue(
3260       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
3261       OffloadErrorQType);
3262   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
3263                         OffloadError);
3264 
3265   // Fill up the pointer arrays and transfer execution to the device.
3266   auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes,
3267                     hasVLACaptures, Device, OffloadError,
3268                     OffloadErrorQType](CodeGenFunction &CGF) {
3269     unsigned PointerNumVal = BasePointers.size();
3270     llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
3271     llvm::Value *BasePointersArray;
3272     llvm::Value *PointersArray;
3273     llvm::Value *SizesArray;
3274     llvm::Value *MapTypesArray;
3275 
3276     if (PointerNumVal) {
3277       llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
3278       QualType PointerArrayType = CGF.getContext().getConstantArrayType(
3279           CGF.getContext().VoidPtrTy, PointerNumAP, ArrayType::Normal,
3280           /*IndexTypeQuals=*/0);
3281 
3282       BasePointersArray =
3283           CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
3284       PointersArray =
3285           CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
3286 
3287       // If we don't have any VLA types, we can use a constant array for the map
3288       // sizes, otherwise we need to fill up the arrays as we do for the
3289       // pointers.
3290       if (hasVLACaptures) {
3291         QualType SizeArrayType = CGF.getContext().getConstantArrayType(
3292             CGF.getContext().getSizeType(), PointerNumAP, ArrayType::Normal,
3293             /*IndexTypeQuals=*/0);
3294         SizesArray =
3295             CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
3296       } else {
3297         // We expect all the sizes to be constant, so we collect them to create
3298         // a constant array.
3299         SmallVector<llvm::Constant *, 16> ConstSizes;
3300         for (auto S : Sizes)
3301           ConstSizes.push_back(cast<llvm::Constant>(S));
3302 
3303         auto *SizesArrayInit = llvm::ConstantArray::get(
3304             llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
3305         auto *SizesArrayGbl = new llvm::GlobalVariable(
3306             CGM.getModule(), SizesArrayInit->getType(),
3307             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
3308             SizesArrayInit, ".offload_sizes");
3309         SizesArrayGbl->setUnnamedAddr(true);
3310         SizesArray = SizesArrayGbl;
3311       }
3312 
3313       // The map types are always constant so we don't need to generate code to
3314       // fill arrays. Instead, we create an array constant.
3315       llvm::Constant *MapTypesArrayInit =
3316           llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
3317       auto *MapTypesArrayGbl = new llvm::GlobalVariable(
3318           CGM.getModule(), MapTypesArrayInit->getType(),
3319           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
3320           MapTypesArrayInit, ".offload_maptypes");
3321       MapTypesArrayGbl->setUnnamedAddr(true);
3322       MapTypesArray = MapTypesArrayGbl;
3323 
3324       for (unsigned i = 0; i < PointerNumVal; ++i) {
3325         llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
3326             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal),
3327             BasePointersArray, 0, i);
3328         Address BPAddr(BP, CGM.getContext().getTypeAlignInChars(
3329                                CGM.getContext().VoidPtrTy));
3330         CGF.Builder.CreateStore(
3331             CGF.Builder.CreateBitCast(BasePointers[i], CGM.VoidPtrTy), BPAddr);
3332 
3333         llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
3334             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
3335             0, i);
3336         Address PAddr(P, CGM.getContext().getTypeAlignInChars(
3337                              CGM.getContext().VoidPtrTy));
3338         CGF.Builder.CreateStore(
3339             CGF.Builder.CreateBitCast(Pointers[i], CGM.VoidPtrTy), PAddr);
3340 
3341         if (hasVLACaptures) {
3342           llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
3343               llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
3344               /*Idx0=*/0,
3345               /*Idx1=*/i);
3346           Address SAddr(S, CGM.getContext().getTypeAlignInChars(
3347                                CGM.getContext().getSizeType()));
3348           CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(
3349                                       Sizes[i], CGM.SizeTy, /*isSigned=*/true),
3350                                   SAddr);
3351         }
3352       }
3353 
3354       BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3355           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
3356           /*Idx0=*/0, /*Idx1=*/0);
3357       PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3358           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
3359           /*Idx0=*/0,
3360           /*Idx1=*/0);
3361       SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3362           llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
3363           /*Idx0=*/0, /*Idx1=*/0);
3364       MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3365           llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray,
3366           /*Idx0=*/0,
3367           /*Idx1=*/0);
3368 
3369     } else {
3370       BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
3371       PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
3372       SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
3373       MapTypesArray =
3374           llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
3375     }
3376 
3377     // On top of the arrays that were filled up, the target offloading call
3378     // takes as arguments the device id as well as the host pointer. The host
3379     // pointer is used by the runtime library to identify the current target
3380     // region, so it only has to be unique and not necessarily point to
3381     // anything. It could be the pointer to the outlined function that
3382     // implements the target region, but we aren't using that so that the
3383     // compiler doesn't need to keep that, and could therefore inline the host
3384     // function if proven worthwhile during optimization.
3385 
3386     llvm::Value *HostPtr = new llvm::GlobalVariable(
3387         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3388         llvm::GlobalValue::PrivateLinkage,
3389         llvm::Constant::getNullValue(CGM.Int8Ty), ".offload_hstptr");
3390 
3391     // Emit device ID if any.
3392     llvm::Value *DeviceID;
3393     if (Device)
3394       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3395                                            CGM.Int32Ty, /*isSigned=*/true);
3396     else
3397       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
3398 
3399     llvm::Value *OffloadingArgs[] = {
3400         DeviceID,      HostPtr,    PointerNum,   BasePointersArray,
3401         PointersArray, SizesArray, MapTypesArray};
3402     auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
3403                                       OffloadingArgs);
3404 
3405     CGF.EmitStoreOfScalar(Return, OffloadError);
3406   };
3407 
3408   if (IfCond) {
3409     // Notify that the host version must be executed.
3410     auto &&ElseGen = [this, OffloadError,
3411                       OffloadErrorQType](CodeGenFunction &CGF) {
3412       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u),
3413                             OffloadError);
3414     };
3415     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3416   } else {
3417     CodeGenFunction::RunCleanupsScope Scope(CGF);
3418     ThenGen(CGF);
3419   }
3420 
3421   // Check the error code and execute the host version if required.
3422   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
3423   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
3424   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
3425   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
3426   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
3427 
3428   CGF.EmitBlock(OffloadFailedBlock);
3429   CGF.Builder.CreateCall(OutlinedFn, BasePointers);
3430   CGF.EmitBranch(OffloadContBlock);
3431 
3432   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
3433   return;
3434 }
3435