1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CGCleanup.h"
17 #include "clang/AST/Decl.h"
18 #include "clang/AST/StmtOpenMP.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cassert>
26 
27 using namespace clang;
28 using namespace CodeGen;
29 
30 namespace {
31 /// \brief Base class for handling code generation inside OpenMP regions.
32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
33 public:
34   /// \brief Kinds of OpenMP regions used in codegen.
35   enum CGOpenMPRegionKind {
36     /// \brief Region with outlined function for standalone 'parallel'
37     /// directive.
38     ParallelOutlinedRegion,
39     /// \brief Region with outlined function for standalone 'task' directive.
40     TaskOutlinedRegion,
41     /// \brief Region for constructs that do not require function outlining,
42     /// like 'for', 'sections', 'atomic' etc. directives.
43     InlinedRegion,
44     /// \brief Region with outlined function for standalone 'target' directive.
45     TargetRegion,
46   };
47 
48   CGOpenMPRegionInfo(const CapturedStmt &CS,
49                      const CGOpenMPRegionKind RegionKind,
50                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
51                      bool HasCancel)
52       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
53         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
54 
55   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
56                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
57                      bool HasCancel)
58       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
59         Kind(Kind), HasCancel(HasCancel) {}
60 
61   /// \brief Get a variable or parameter for storing global thread id
62   /// inside OpenMP construct.
63   virtual const VarDecl *getThreadIDVariable() const = 0;
64 
65   /// \brief Emit the captured statement body.
66   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
67 
68   /// \brief Get an LValue for the current ThreadID variable.
69   /// \return LValue for thread id variable. This LValue always has type int32*.
70   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
71 
72   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
73 
74   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
75 
76   bool hasCancel() const { return HasCancel; }
77 
78   static bool classof(const CGCapturedStmtInfo *Info) {
79     return Info->getKind() == CR_OpenMP;
80   }
81 
82 protected:
83   CGOpenMPRegionKind RegionKind;
84   const RegionCodeGenTy &CodeGen;
85   OpenMPDirectiveKind Kind;
86   bool HasCancel;
87 };
88 
89 /// \brief API for captured statement code generation in OpenMP constructs.
90 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
91 public:
92   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
93                              const RegionCodeGenTy &CodeGen,
94                              OpenMPDirectiveKind Kind, bool HasCancel)
95       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
96                            HasCancel),
97         ThreadIDVar(ThreadIDVar) {
98     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
99   }
100   /// \brief Get a variable or parameter for storing global thread id
101   /// inside OpenMP construct.
102   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
103 
104   /// \brief Get the name of the capture helper.
105   StringRef getHelperName() const override { return ".omp_outlined."; }
106 
107   static bool classof(const CGCapturedStmtInfo *Info) {
108     return CGOpenMPRegionInfo::classof(Info) &&
109            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
110                ParallelOutlinedRegion;
111   }
112 
113 private:
114   /// \brief A variable or parameter storing global thread id for OpenMP
115   /// constructs.
116   const VarDecl *ThreadIDVar;
117 };
118 
119 /// \brief API for captured statement code generation in OpenMP constructs.
120 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
121 public:
122   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
123                                  const VarDecl *ThreadIDVar,
124                                  const RegionCodeGenTy &CodeGen,
125                                  OpenMPDirectiveKind Kind, bool HasCancel)
126       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
127         ThreadIDVar(ThreadIDVar) {
128     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
129   }
130   /// \brief Get a variable or parameter for storing global thread id
131   /// inside OpenMP construct.
132   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
133 
134   /// \brief Get an LValue for the current ThreadID variable.
135   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
136 
137   /// \brief Get the name of the capture helper.
138   StringRef getHelperName() const override { return ".omp_outlined."; }
139 
140   static bool classof(const CGCapturedStmtInfo *Info) {
141     return CGOpenMPRegionInfo::classof(Info) &&
142            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
143                TaskOutlinedRegion;
144   }
145 
146 private:
147   /// \brief A variable or parameter storing global thread id for OpenMP
148   /// constructs.
149   const VarDecl *ThreadIDVar;
150 };
151 
152 /// \brief API for inlined captured statement code generation in OpenMP
153 /// constructs.
154 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
155 public:
156   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
157                             const RegionCodeGenTy &CodeGen,
158                             OpenMPDirectiveKind Kind, bool HasCancel)
159       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
160         OldCSI(OldCSI),
161         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
162   // \brief Retrieve the value of the context parameter.
163   llvm::Value *getContextValue() const override {
164     if (OuterRegionInfo)
165       return OuterRegionInfo->getContextValue();
166     llvm_unreachable("No context value for inlined OpenMP region");
167   }
168   void setContextValue(llvm::Value *V) override {
169     if (OuterRegionInfo) {
170       OuterRegionInfo->setContextValue(V);
171       return;
172     }
173     llvm_unreachable("No context value for inlined OpenMP region");
174   }
175   /// \brief Lookup the captured field decl for a variable.
176   const FieldDecl *lookup(const VarDecl *VD) const override {
177     if (OuterRegionInfo)
178       return OuterRegionInfo->lookup(VD);
179     // If there is no outer outlined region,no need to lookup in a list of
180     // captured variables, we can use the original one.
181     return nullptr;
182   }
183   FieldDecl *getThisFieldDecl() const override {
184     if (OuterRegionInfo)
185       return OuterRegionInfo->getThisFieldDecl();
186     return nullptr;
187   }
188   /// \brief Get a variable or parameter for storing global thread id
189   /// inside OpenMP construct.
190   const VarDecl *getThreadIDVariable() const override {
191     if (OuterRegionInfo)
192       return OuterRegionInfo->getThreadIDVariable();
193     return nullptr;
194   }
195 
196   /// \brief Get the name of the capture helper.
197   StringRef getHelperName() const override {
198     if (auto *OuterRegionInfo = getOldCSI())
199       return OuterRegionInfo->getHelperName();
200     llvm_unreachable("No helper name for inlined OpenMP construct");
201   }
202 
203   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
208   }
209 
210 private:
211   /// \brief CodeGen info about outer OpenMP region.
212   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
213   CGOpenMPRegionInfo *OuterRegionInfo;
214 };
215 
216 /// \brief API for captured statement code generation in OpenMP target
217 /// constructs. For this captures, implicit parameters are used instead of the
218 /// captured fields.
219 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
220 public:
221   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
222                            const RegionCodeGenTy &CodeGen)
223       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
224                            /*HasCancel = */ false) {}
225 
226   /// \brief This is unused for target regions because each starts executing
227   /// with a single thread.
228   const VarDecl *getThreadIDVariable() const override { return nullptr; }
229 
230   /// \brief Get the name of the capture helper.
231   StringRef getHelperName() const override { return ".omp_offloading."; }
232 
233   static bool classof(const CGCapturedStmtInfo *Info) {
234     return CGOpenMPRegionInfo::classof(Info) &&
235            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
236   }
237 };
238 
239 /// \brief RAII for emitting code of OpenMP constructs.
240 class InlinedOpenMPRegionRAII {
241   CodeGenFunction &CGF;
242 
243 public:
244   /// \brief Constructs region for combined constructs.
245   /// \param CodeGen Code generation sequence for combined directives. Includes
246   /// a list of functions used for code generation of implicitly inlined
247   /// regions.
248   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
249                           OpenMPDirectiveKind Kind, bool HasCancel)
250       : CGF(CGF) {
251     // Start emission for the construct.
252     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
253         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
254   }
255   ~InlinedOpenMPRegionRAII() {
256     // Restore original CapturedStmtInfo only if we're done with code emission.
257     auto *OldCSI =
258         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
259     delete CGF.CapturedStmtInfo;
260     CGF.CapturedStmtInfo = OldCSI;
261   }
262 };
263 
264 } // anonymous namespace
265 
266 static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr,
267                                       QualType Ty) {
268   AlignmentSource Source;
269   CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source);
270   return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align),
271                             Ty->getPointeeType(), Source);
272 }
273 
274 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
275   return emitLoadOfPointerLValue(CGF,
276                                  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
277                                  getThreadIDVariable()->getType());
278 }
279 
280 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
281   // 1.2.2 OpenMP Language Terminology
282   // Structured block - An executable statement with a single entry at the
283   // top and a single exit at the bottom.
284   // The point of exit cannot be a branch out of the structured block.
285   // longjmp() and throw() must not violate the entry/exit criteria.
286   CGF.EHStack.pushTerminate();
287   {
288     CodeGenFunction::RunCleanupsScope Scope(CGF);
289     CodeGen(CGF);
290   }
291   CGF.EHStack.popTerminate();
292 }
293 
294 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
295     CodeGenFunction &CGF) {
296   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
297                             getThreadIDVariable()->getType(),
298                             AlignmentSource::Decl);
299 }
300 
301 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
302     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
303   IdentTy = llvm::StructType::create(
304       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
305       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
306       CGM.Int8PtrTy /* psource */, nullptr);
307   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
308   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
309                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
310   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
311   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
312 }
313 
314 void CGOpenMPRuntime::clear() {
315   InternalVars.clear();
316 }
317 
318 // Layout information for ident_t.
319 static CharUnits getIdentAlign(CodeGenModule &CGM) {
320   return CGM.getPointerAlign();
321 }
322 static CharUnits getIdentSize(CodeGenModule &CGM) {
323   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
324   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
325 }
326 static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) {
327   // All the fields except the last are i32, so this works beautifully.
328   return unsigned(Field) * CharUnits::fromQuantity(4);
329 }
330 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
331                                    CGOpenMPRuntime::IdentFieldIndex Field,
332                                    const llvm::Twine &Name = "") {
333   auto Offset = getOffsetOfIdentField(Field);
334   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
335 }
336 
337 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
338     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
339     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
340   assert(ThreadIDVar->getType()->isPointerType() &&
341          "thread id variable must be of type kmp_int32 *");
342   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
343   CodeGenFunction CGF(CGM, true);
344   bool HasCancel = false;
345   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
346     HasCancel = OPD->hasCancel();
347   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
348     HasCancel = OPSD->hasCancel();
349   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
350     HasCancel = OPFD->hasCancel();
351   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
352                                     HasCancel);
353   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
354   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
355 }
356 
357 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
358     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
359     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
360   assert(!ThreadIDVar->getType()->isPointerType() &&
361          "thread id variable must be of type kmp_int32 for tasks");
362   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
363   CodeGenFunction CGF(CGM, true);
364   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
365                                         InnermostKind,
366                                         cast<OMPTaskDirective>(D).hasCancel());
367   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
368   return CGF.GenerateCapturedStmtFunction(*CS);
369 }
370 
371 Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
372   CharUnits Align = getIdentAlign(CGM);
373   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
374   if (!Entry) {
375     if (!DefaultOpenMPPSource) {
376       // Initialize default location for psource field of ident_t structure of
377       // all ident_t objects. Format is ";file;function;line;column;;".
378       // Taken from
379       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
380       DefaultOpenMPPSource =
381           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
382       DefaultOpenMPPSource =
383           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
384     }
385     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
386         CGM.getModule(), IdentTy, /*isConstant*/ true,
387         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
388     DefaultOpenMPLocation->setUnnamedAddr(true);
389     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
390 
391     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
392     llvm::Constant *Values[] = {Zero,
393                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
394                                 Zero, Zero, DefaultOpenMPPSource};
395     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
396     DefaultOpenMPLocation->setInitializer(Init);
397     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
398   }
399   return Address(Entry, Align);
400 }
401 
402 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
403                                                  SourceLocation Loc,
404                                                  OpenMPLocationFlags Flags) {
405   // If no debug info is generated - return global default location.
406   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
407       Loc.isInvalid())
408     return getOrCreateDefaultLocation(Flags).getPointer();
409 
410   assert(CGF.CurFn && "No function in current CodeGenFunction.");
411 
412   Address LocValue = Address::invalid();
413   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
414   if (I != OpenMPLocThreadIDMap.end())
415     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
416 
417   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
418   // GetOpenMPThreadID was called before this routine.
419   if (!LocValue.isValid()) {
420     // Generate "ident_t .kmpc_loc.addr;"
421     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
422                                       ".kmpc_loc.addr");
423     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
424     Elem.second.DebugLoc = AI.getPointer();
425     LocValue = AI;
426 
427     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
428     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
429     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
430                              CGM.getSize(getIdentSize(CGF.CGM)));
431   }
432 
433   // char **psource = &.kmpc_loc_<flags>.addr.psource;
434   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
435 
436   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
437   if (OMPDebugLoc == nullptr) {
438     SmallString<128> Buffer2;
439     llvm::raw_svector_ostream OS2(Buffer2);
440     // Build debug location
441     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
442     OS2 << ";" << PLoc.getFilename() << ";";
443     if (const FunctionDecl *FD =
444             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
445       OS2 << FD->getQualifiedNameAsString();
446     }
447     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
448     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
449     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
450   }
451   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
452   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
453 
454   // Our callers always pass this to a runtime function, so for
455   // convenience, go ahead and return a naked pointer.
456   return LocValue.getPointer();
457 }
458 
459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
460                                           SourceLocation Loc) {
461   assert(CGF.CurFn && "No function in current CodeGenFunction.");
462 
463   llvm::Value *ThreadID = nullptr;
464   // Check whether we've already cached a load of the thread id in this
465   // function.
466   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
467   if (I != OpenMPLocThreadIDMap.end()) {
468     ThreadID = I->second.ThreadID;
469     if (ThreadID != nullptr)
470       return ThreadID;
471   }
472   if (auto OMPRegionInfo =
473           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
474     if (OMPRegionInfo->getThreadIDVariable()) {
475       // Check if this an outlined function with thread id passed as argument.
476       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
477       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
478       // If value loaded in entry block, cache it and use it everywhere in
479       // function.
480       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
481         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
482         Elem.second.ThreadID = ThreadID;
483       }
484       return ThreadID;
485     }
486   }
487 
488   // This is not an outlined function region - need to call __kmpc_int32
489   // kmpc_global_thread_num(ident_t *loc).
490   // Generate thread id value and cache this value for use across the
491   // function.
492   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
493   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
494   ThreadID =
495       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
496                           emitUpdateLocation(CGF, Loc));
497   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
498   Elem.second.ThreadID = ThreadID;
499   return ThreadID;
500 }
501 
502 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
503   assert(CGF.CurFn && "No function in current CodeGenFunction.");
504   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
505     OpenMPLocThreadIDMap.erase(CGF.CurFn);
506 }
507 
508 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
509   return llvm::PointerType::getUnqual(IdentTy);
510 }
511 
512 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
513   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
514 }
515 
516 llvm::Constant *
517 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
518   llvm::Constant *RTLFn = nullptr;
519   switch (Function) {
520   case OMPRTL__kmpc_fork_call: {
521     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
522     // microtask, ...);
523     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
524                                 getKmpc_MicroPointerTy()};
525     llvm::FunctionType *FnTy =
526         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
527     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
528     break;
529   }
530   case OMPRTL__kmpc_global_thread_num: {
531     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
532     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
533     llvm::FunctionType *FnTy =
534         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
535     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
536     break;
537   }
538   case OMPRTL__kmpc_threadprivate_cached: {
539     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
540     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
541     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
542                                 CGM.VoidPtrTy, CGM.SizeTy,
543                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
544     llvm::FunctionType *FnTy =
545         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
546     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
547     break;
548   }
549   case OMPRTL__kmpc_critical: {
550     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
551     // kmp_critical_name *crit);
552     llvm::Type *TypeParams[] = {
553         getIdentTyPointerTy(), CGM.Int32Ty,
554         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
555     llvm::FunctionType *FnTy =
556         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
557     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
558     break;
559   }
560   case OMPRTL__kmpc_threadprivate_register: {
561     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
562     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
563     // typedef void *(*kmpc_ctor)(void *);
564     auto KmpcCtorTy =
565         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
566                                 /*isVarArg*/ false)->getPointerTo();
567     // typedef void *(*kmpc_cctor)(void *, void *);
568     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
569     auto KmpcCopyCtorTy =
570         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
571                                 /*isVarArg*/ false)->getPointerTo();
572     // typedef void (*kmpc_dtor)(void *);
573     auto KmpcDtorTy =
574         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
575             ->getPointerTo();
576     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
577                               KmpcCopyCtorTy, KmpcDtorTy};
578     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
579                                         /*isVarArg*/ false);
580     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
581     break;
582   }
583   case OMPRTL__kmpc_end_critical: {
584     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
585     // kmp_critical_name *crit);
586     llvm::Type *TypeParams[] = {
587         getIdentTyPointerTy(), CGM.Int32Ty,
588         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
589     llvm::FunctionType *FnTy =
590         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
591     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
592     break;
593   }
594   case OMPRTL__kmpc_cancel_barrier: {
595     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
596     // global_tid);
597     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
598     llvm::FunctionType *FnTy =
599         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
600     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
601     break;
602   }
603   case OMPRTL__kmpc_barrier: {
604     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
605     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
606     llvm::FunctionType *FnTy =
607         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
608     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
609     break;
610   }
611   case OMPRTL__kmpc_for_static_fini: {
612     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
613     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
614     llvm::FunctionType *FnTy =
615         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
616     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
617     break;
618   }
619   case OMPRTL__kmpc_push_num_threads: {
620     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
621     // kmp_int32 num_threads)
622     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
623                                 CGM.Int32Ty};
624     llvm::FunctionType *FnTy =
625         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
626     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
627     break;
628   }
629   case OMPRTL__kmpc_serialized_parallel: {
630     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
631     // global_tid);
632     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
633     llvm::FunctionType *FnTy =
634         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
635     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
636     break;
637   }
638   case OMPRTL__kmpc_end_serialized_parallel: {
639     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
640     // global_tid);
641     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
642     llvm::FunctionType *FnTy =
643         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
644     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
645     break;
646   }
647   case OMPRTL__kmpc_flush: {
648     // Build void __kmpc_flush(ident_t *loc);
649     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
650     llvm::FunctionType *FnTy =
651         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
652     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
653     break;
654   }
655   case OMPRTL__kmpc_master: {
656     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
657     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
658     llvm::FunctionType *FnTy =
659         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
660     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
661     break;
662   }
663   case OMPRTL__kmpc_end_master: {
664     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
665     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
666     llvm::FunctionType *FnTy =
667         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
668     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
669     break;
670   }
671   case OMPRTL__kmpc_omp_taskyield: {
672     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
673     // int end_part);
674     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
675     llvm::FunctionType *FnTy =
676         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
677     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
678     break;
679   }
680   case OMPRTL__kmpc_single: {
681     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
682     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
683     llvm::FunctionType *FnTy =
684         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
685     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
686     break;
687   }
688   case OMPRTL__kmpc_end_single: {
689     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
690     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
691     llvm::FunctionType *FnTy =
692         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
693     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
694     break;
695   }
696   case OMPRTL__kmpc_omp_task_alloc: {
697     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
698     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
699     // kmp_routine_entry_t *task_entry);
700     assert(KmpRoutineEntryPtrTy != nullptr &&
701            "Type kmp_routine_entry_t must be created.");
702     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
703                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
704     // Return void * and then cast to particular kmp_task_t type.
705     llvm::FunctionType *FnTy =
706         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
707     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
708     break;
709   }
710   case OMPRTL__kmpc_omp_task: {
711     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
712     // *new_task);
713     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
714                                 CGM.VoidPtrTy};
715     llvm::FunctionType *FnTy =
716         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
717     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
718     break;
719   }
720   case OMPRTL__kmpc_copyprivate: {
721     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
722     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
723     // kmp_int32 didit);
724     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
725     auto *CpyFnTy =
726         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
727     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
728                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
729                                 CGM.Int32Ty};
730     llvm::FunctionType *FnTy =
731         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
732     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
733     break;
734   }
735   case OMPRTL__kmpc_reduce: {
736     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
737     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
738     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
739     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
740     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
741                                                /*isVarArg=*/false);
742     llvm::Type *TypeParams[] = {
743         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
744         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
745         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
746     llvm::FunctionType *FnTy =
747         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
748     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
749     break;
750   }
751   case OMPRTL__kmpc_reduce_nowait: {
752     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
753     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
754     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
755     // *lck);
756     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
757     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
758                                                /*isVarArg=*/false);
759     llvm::Type *TypeParams[] = {
760         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
761         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
762         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
763     llvm::FunctionType *FnTy =
764         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
765     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
766     break;
767   }
768   case OMPRTL__kmpc_end_reduce: {
769     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
770     // kmp_critical_name *lck);
771     llvm::Type *TypeParams[] = {
772         getIdentTyPointerTy(), CGM.Int32Ty,
773         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
774     llvm::FunctionType *FnTy =
775         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
776     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
777     break;
778   }
779   case OMPRTL__kmpc_end_reduce_nowait: {
780     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
781     // kmp_critical_name *lck);
782     llvm::Type *TypeParams[] = {
783         getIdentTyPointerTy(), CGM.Int32Ty,
784         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
785     llvm::FunctionType *FnTy =
786         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
787     RTLFn =
788         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
789     break;
790   }
791   case OMPRTL__kmpc_omp_task_begin_if0: {
792     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
793     // *new_task);
794     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
795                                 CGM.VoidPtrTy};
796     llvm::FunctionType *FnTy =
797         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
798     RTLFn =
799         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
800     break;
801   }
802   case OMPRTL__kmpc_omp_task_complete_if0: {
803     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
804     // *new_task);
805     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
806                                 CGM.VoidPtrTy};
807     llvm::FunctionType *FnTy =
808         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
809     RTLFn = CGM.CreateRuntimeFunction(FnTy,
810                                       /*Name=*/"__kmpc_omp_task_complete_if0");
811     break;
812   }
813   case OMPRTL__kmpc_ordered: {
814     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
815     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
816     llvm::FunctionType *FnTy =
817         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
818     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
819     break;
820   }
821   case OMPRTL__kmpc_end_ordered: {
822     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
823     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
824     llvm::FunctionType *FnTy =
825         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
826     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
827     break;
828   }
829   case OMPRTL__kmpc_omp_taskwait: {
830     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
831     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
832     llvm::FunctionType *FnTy =
833         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
834     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
835     break;
836   }
837   case OMPRTL__kmpc_taskgroup: {
838     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
839     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
840     llvm::FunctionType *FnTy =
841         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
842     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
843     break;
844   }
845   case OMPRTL__kmpc_end_taskgroup: {
846     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
847     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
848     llvm::FunctionType *FnTy =
849         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
850     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
851     break;
852   }
853   case OMPRTL__kmpc_push_proc_bind: {
854     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
855     // int proc_bind)
856     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
857     llvm::FunctionType *FnTy =
858         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
859     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
860     break;
861   }
862   case OMPRTL__kmpc_omp_task_with_deps: {
863     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
864     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
865     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
866     llvm::Type *TypeParams[] = {
867         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
868         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
869     llvm::FunctionType *FnTy =
870         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
871     RTLFn =
872         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
873     break;
874   }
875   case OMPRTL__kmpc_omp_wait_deps: {
876     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
877     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
878     // kmp_depend_info_t *noalias_dep_list);
879     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
880                                 CGM.Int32Ty,           CGM.VoidPtrTy,
881                                 CGM.Int32Ty,           CGM.VoidPtrTy};
882     llvm::FunctionType *FnTy =
883         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
884     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
885     break;
886   }
887   case OMPRTL__kmpc_cancellationpoint: {
888     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
889     // global_tid, kmp_int32 cncl_kind)
890     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
891     llvm::FunctionType *FnTy =
892         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
893     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
894     break;
895   }
896   case OMPRTL__kmpc_cancel: {
897     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
898     // kmp_int32 cncl_kind)
899     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
900     llvm::FunctionType *FnTy =
901         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
902     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
903     break;
904   }
905   case OMPRTL__tgt_target: {
906     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
907     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
908     // *arg_types);
909     llvm::Type *TypeParams[] = {CGM.Int32Ty,
910                                 CGM.VoidPtrTy,
911                                 CGM.Int32Ty,
912                                 CGM.VoidPtrPtrTy,
913                                 CGM.VoidPtrPtrTy,
914                                 CGM.SizeTy->getPointerTo(),
915                                 CGM.Int32Ty->getPointerTo()};
916     llvm::FunctionType *FnTy =
917         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
918     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
919     break;
920   }
921   }
922   return RTLFn;
923 }
924 
925 static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) {
926   auto &C = CGF.getContext();
927   llvm::Value *Size = nullptr;
928   auto SizeInChars = C.getTypeSizeInChars(Ty);
929   if (SizeInChars.isZero()) {
930     // getTypeSizeInChars() returns 0 for a VLA.
931     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
932       llvm::Value *ArraySize;
933       std::tie(ArraySize, Ty) = CGF.getVLASize(VAT);
934       Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
935     }
936     SizeInChars = C.getTypeSizeInChars(Ty);
937     assert(!SizeInChars.isZero());
938     Size = CGF.Builder.CreateNUWMul(
939         Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()));
940   } else
941     Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity());
942   return Size;
943 }
944 
945 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
946                                                              bool IVSigned) {
947   assert((IVSize == 32 || IVSize == 64) &&
948          "IV size is not compatible with the omp runtime");
949   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
950                                        : "__kmpc_for_static_init_4u")
951                            : (IVSigned ? "__kmpc_for_static_init_8"
952                                        : "__kmpc_for_static_init_8u");
953   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
954   auto PtrTy = llvm::PointerType::getUnqual(ITy);
955   llvm::Type *TypeParams[] = {
956     getIdentTyPointerTy(),                     // loc
957     CGM.Int32Ty,                               // tid
958     CGM.Int32Ty,                               // schedtype
959     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
960     PtrTy,                                     // p_lower
961     PtrTy,                                     // p_upper
962     PtrTy,                                     // p_stride
963     ITy,                                       // incr
964     ITy                                        // chunk
965   };
966   llvm::FunctionType *FnTy =
967       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
968   return CGM.CreateRuntimeFunction(FnTy, Name);
969 }
970 
971 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
972                                                             bool IVSigned) {
973   assert((IVSize == 32 || IVSize == 64) &&
974          "IV size is not compatible with the omp runtime");
975   auto Name =
976       IVSize == 32
977           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
978           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
979   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
980   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
981                                CGM.Int32Ty,           // tid
982                                CGM.Int32Ty,           // schedtype
983                                ITy,                   // lower
984                                ITy,                   // upper
985                                ITy,                   // stride
986                                ITy                    // chunk
987   };
988   llvm::FunctionType *FnTy =
989       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
990   return CGM.CreateRuntimeFunction(FnTy, Name);
991 }
992 
993 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
994                                                             bool IVSigned) {
995   assert((IVSize == 32 || IVSize == 64) &&
996          "IV size is not compatible with the omp runtime");
997   auto Name =
998       IVSize == 32
999           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1000           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1001   llvm::Type *TypeParams[] = {
1002       getIdentTyPointerTy(), // loc
1003       CGM.Int32Ty,           // tid
1004   };
1005   llvm::FunctionType *FnTy =
1006       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1007   return CGM.CreateRuntimeFunction(FnTy, Name);
1008 }
1009 
1010 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
1011                                                             bool IVSigned) {
1012   assert((IVSize == 32 || IVSize == 64) &&
1013          "IV size is not compatible with the omp runtime");
1014   auto Name =
1015       IVSize == 32
1016           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1017           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1018   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1019   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1020   llvm::Type *TypeParams[] = {
1021     getIdentTyPointerTy(),                     // loc
1022     CGM.Int32Ty,                               // tid
1023     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1024     PtrTy,                                     // p_lower
1025     PtrTy,                                     // p_upper
1026     PtrTy                                      // p_stride
1027   };
1028   llvm::FunctionType *FnTy =
1029       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1030   return CGM.CreateRuntimeFunction(FnTy, Name);
1031 }
1032 
1033 llvm::Constant *
1034 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1035   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1036          !CGM.getContext().getTargetInfo().isTLSSupported());
1037   // Lookup the entry, lazily creating it if necessary.
1038   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1039                                      Twine(CGM.getMangledName(VD)) + ".cache.");
1040 }
1041 
1042 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1043                                                 const VarDecl *VD,
1044                                                 Address VDAddr,
1045                                                 SourceLocation Loc) {
1046   if (CGM.getLangOpts().OpenMPUseTLS &&
1047       CGM.getContext().getTargetInfo().isTLSSupported())
1048     return VDAddr;
1049 
1050   auto VarTy = VDAddr.getElementType();
1051   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1052                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1053                                                        CGM.Int8PtrTy),
1054                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1055                          getOrCreateThreadPrivateCache(VD)};
1056   return Address(CGF.EmitRuntimeCall(
1057       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1058                  VDAddr.getAlignment());
1059 }
1060 
1061 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1062     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1063     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1064   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1065   // library.
1066   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1067   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1068                       OMPLoc);
1069   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1070   // to register constructor/destructor for variable.
1071   llvm::Value *Args[] = {OMPLoc,
1072                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1073                                                        CGM.VoidPtrTy),
1074                          Ctor, CopyCtor, Dtor};
1075   CGF.EmitRuntimeCall(
1076       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1077 }
1078 
1079 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1080     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1081     bool PerformInit, CodeGenFunction *CGF) {
1082   if (CGM.getLangOpts().OpenMPUseTLS &&
1083       CGM.getContext().getTargetInfo().isTLSSupported())
1084     return nullptr;
1085 
1086   VD = VD->getDefinition(CGM.getContext());
1087   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1088     ThreadPrivateWithDefinition.insert(VD);
1089     QualType ASTTy = VD->getType();
1090 
1091     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1092     auto Init = VD->getAnyInitializer();
1093     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1094       // Generate function that re-emits the declaration's initializer into the
1095       // threadprivate copy of the variable VD
1096       CodeGenFunction CtorCGF(CGM);
1097       FunctionArgList Args;
1098       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1099                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1100       Args.push_back(&Dst);
1101 
1102       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1103           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
1104           /*isVariadic=*/false);
1105       auto FTy = CGM.getTypes().GetFunctionType(FI);
1106       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1107           FTy, ".__kmpc_global_ctor_.", FI, Loc);
1108       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1109                             Args, SourceLocation());
1110       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1111           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1112           CGM.getContext().VoidPtrTy, Dst.getLocation());
1113       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1114       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1115                                              CtorCGF.ConvertTypeForMem(ASTTy));
1116       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1117                                /*IsInitializer=*/true);
1118       ArgVal = CtorCGF.EmitLoadOfScalar(
1119           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1120           CGM.getContext().VoidPtrTy, Dst.getLocation());
1121       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1122       CtorCGF.FinishFunction();
1123       Ctor = Fn;
1124     }
1125     if (VD->getType().isDestructedType() != QualType::DK_none) {
1126       // Generate function that emits destructor call for the threadprivate copy
1127       // of the variable VD
1128       CodeGenFunction DtorCGF(CGM);
1129       FunctionArgList Args;
1130       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1131                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1132       Args.push_back(&Dst);
1133 
1134       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1135           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
1136           /*isVariadic=*/false);
1137       auto FTy = CGM.getTypes().GetFunctionType(FI);
1138       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1139           FTy, ".__kmpc_global_dtor_.", FI, Loc);
1140       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1141                             SourceLocation());
1142       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1143           DtorCGF.GetAddrOfLocalVar(&Dst),
1144           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1145       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1146                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1147                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1148       DtorCGF.FinishFunction();
1149       Dtor = Fn;
1150     }
1151     // Do not emit init function if it is not required.
1152     if (!Ctor && !Dtor)
1153       return nullptr;
1154 
1155     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1156     auto CopyCtorTy =
1157         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1158                                 /*isVarArg=*/false)->getPointerTo();
1159     // Copying constructor for the threadprivate variable.
1160     // Must be NULL - reserved by runtime, but currently it requires that this
1161     // parameter is always NULL. Otherwise it fires assertion.
1162     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1163     if (Ctor == nullptr) {
1164       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1165                                             /*isVarArg=*/false)->getPointerTo();
1166       Ctor = llvm::Constant::getNullValue(CtorTy);
1167     }
1168     if (Dtor == nullptr) {
1169       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1170                                             /*isVarArg=*/false)->getPointerTo();
1171       Dtor = llvm::Constant::getNullValue(DtorTy);
1172     }
1173     if (!CGF) {
1174       auto InitFunctionTy =
1175           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1176       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1177           InitFunctionTy, ".__omp_threadprivate_init_.",
1178           CGM.getTypes().arrangeNullaryFunction());
1179       CodeGenFunction InitCGF(CGM);
1180       FunctionArgList ArgList;
1181       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1182                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1183                             Loc);
1184       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1185       InitCGF.FinishFunction();
1186       return InitFunction;
1187     }
1188     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1189   }
1190   return nullptr;
1191 }
1192 
1193 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1194 /// function. Here is the logic:
1195 /// if (Cond) {
1196 ///   ThenGen();
1197 /// } else {
1198 ///   ElseGen();
1199 /// }
1200 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1201                             const RegionCodeGenTy &ThenGen,
1202                             const RegionCodeGenTy &ElseGen) {
1203   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1204 
1205   // If the condition constant folds and can be elided, try to avoid emitting
1206   // the condition and the dead arm of the if/else.
1207   bool CondConstant;
1208   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1209     CodeGenFunction::RunCleanupsScope Scope(CGF);
1210     if (CondConstant) {
1211       ThenGen(CGF);
1212     } else {
1213       ElseGen(CGF);
1214     }
1215     return;
1216   }
1217 
1218   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1219   // emit the conditional branch.
1220   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1221   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1222   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1223   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1224 
1225   // Emit the 'then' code.
1226   CGF.EmitBlock(ThenBlock);
1227   {
1228     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1229     ThenGen(CGF);
1230   }
1231   CGF.EmitBranch(ContBlock);
1232   // Emit the 'else' code if present.
1233   {
1234     // There is no need to emit line number for unconditional branch.
1235     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1236     CGF.EmitBlock(ElseBlock);
1237   }
1238   {
1239     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1240     ElseGen(CGF);
1241   }
1242   {
1243     // There is no need to emit line number for unconditional branch.
1244     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1245     CGF.EmitBranch(ContBlock);
1246   }
1247   // Emit the continuation block for code after the if.
1248   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1249 }
1250 
1251 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1252                                        llvm::Value *OutlinedFn,
1253                                        ArrayRef<llvm::Value *> CapturedVars,
1254                                        const Expr *IfCond) {
1255   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1256   auto &&ThenGen = [this, OutlinedFn, CapturedVars,
1257                     RTLoc](CodeGenFunction &CGF) {
1258     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1259     llvm::Value *Args[] = {
1260         RTLoc,
1261         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1262         CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
1263     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1264     RealArgs.append(std::begin(Args), std::end(Args));
1265     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1266 
1267     auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
1268     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1269   };
1270   auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc,
1271                     Loc](CodeGenFunction &CGF) {
1272     auto ThreadID = getThreadID(CGF, Loc);
1273     // Build calls:
1274     // __kmpc_serialized_parallel(&Loc, GTid);
1275     llvm::Value *Args[] = {RTLoc, ThreadID};
1276     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
1277                         Args);
1278 
1279     // OutlinedFn(&GTid, &zero, CapturedStruct);
1280     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
1281     Address ZeroAddr =
1282       CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1283                            /*Name*/ ".zero.addr");
1284     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1285     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1286     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1287     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1288     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1289     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1290 
1291     // __kmpc_end_serialized_parallel(&Loc, GTid);
1292     llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
1293     CGF.EmitRuntimeCall(
1294         createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
1295   };
1296   if (IfCond) {
1297     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1298   } else {
1299     CodeGenFunction::RunCleanupsScope Scope(CGF);
1300     ThenGen(CGF);
1301   }
1302 }
1303 
1304 // If we're inside an (outlined) parallel region, use the region info's
1305 // thread-ID variable (it is passed in a first argument of the outlined function
1306 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1307 // regular serial code region, get thread ID by calling kmp_int32
1308 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1309 // return the address of that temp.
1310 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1311                                              SourceLocation Loc) {
1312   if (auto OMPRegionInfo =
1313           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1314     if (OMPRegionInfo->getThreadIDVariable())
1315       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1316 
1317   auto ThreadID = getThreadID(CGF, Loc);
1318   auto Int32Ty =
1319       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1320   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1321   CGF.EmitStoreOfScalar(ThreadID,
1322                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1323 
1324   return ThreadIDTemp;
1325 }
1326 
1327 llvm::Constant *
1328 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1329                                              const llvm::Twine &Name) {
1330   SmallString<256> Buffer;
1331   llvm::raw_svector_ostream Out(Buffer);
1332   Out << Name;
1333   auto RuntimeName = Out.str();
1334   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1335   if (Elem.second) {
1336     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1337            "OMP internal variable has different type than requested");
1338     return &*Elem.second;
1339   }
1340 
1341   return Elem.second = new llvm::GlobalVariable(
1342              CGM.getModule(), Ty, /*IsConstant*/ false,
1343              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1344              Elem.first());
1345 }
1346 
1347 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1348   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1349   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1350 }
1351 
1352 namespace {
1353 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup {
1354   llvm::Value *Callee;
1355   llvm::Value *Args[N];
1356 
1357 public:
1358   CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
1359       : Callee(Callee) {
1360     assert(CleanupArgs.size() == N);
1361     std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
1362   }
1363   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1364     CGF.EmitRuntimeCall(Callee, Args);
1365   }
1366 };
1367 } // anonymous namespace
1368 
1369 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1370                                          StringRef CriticalName,
1371                                          const RegionCodeGenTy &CriticalOpGen,
1372                                          SourceLocation Loc) {
1373   // __kmpc_critical(ident_t *, gtid, Lock);
1374   // CriticalOpGen();
1375   // __kmpc_end_critical(ident_t *, gtid, Lock);
1376   // Prepare arguments and build a call to __kmpc_critical
1377   {
1378     CodeGenFunction::RunCleanupsScope Scope(CGF);
1379     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1380                            getCriticalRegionLock(CriticalName)};
1381     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1382     // Build a call to __kmpc_end_critical
1383     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1384         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1385         llvm::makeArrayRef(Args));
1386     emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
1387   }
1388 }
1389 
1390 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1391                        OpenMPDirectiveKind Kind, SourceLocation Loc,
1392                        const RegionCodeGenTy &BodyOpGen) {
1393   llvm::Value *CallBool = CGF.EmitScalarConversion(
1394       IfCond,
1395       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1396       CGF.getContext().BoolTy, Loc);
1397 
1398   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1399   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1400   // Generate the branch (If-stmt)
1401   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1402   CGF.EmitBlock(ThenBlock);
1403   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
1404   // Emit the rest of bblocks/branches
1405   CGF.EmitBranch(ContBlock);
1406   CGF.EmitBlock(ContBlock, true);
1407 }
1408 
1409 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1410                                        const RegionCodeGenTy &MasterOpGen,
1411                                        SourceLocation Loc) {
1412   // if(__kmpc_master(ident_t *, gtid)) {
1413   //   MasterOpGen();
1414   //   __kmpc_end_master(ident_t *, gtid);
1415   // }
1416   // Prepare arguments and build a call to __kmpc_master
1417   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1418   auto *IsMaster =
1419       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1420   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1421       MasterCallEndCleanup;
1422   emitIfStmt(
1423       CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void {
1424         CodeGenFunction::RunCleanupsScope Scope(CGF);
1425         CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
1426             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1427             llvm::makeArrayRef(Args));
1428         MasterOpGen(CGF);
1429       });
1430 }
1431 
1432 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1433                                         SourceLocation Loc) {
1434   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1435   llvm::Value *Args[] = {
1436       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1437       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1438   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1439 }
1440 
1441 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
1442                                           const RegionCodeGenTy &TaskgroupOpGen,
1443                                           SourceLocation Loc) {
1444   // __kmpc_taskgroup(ident_t *, gtid);
1445   // TaskgroupOpGen();
1446   // __kmpc_end_taskgroup(ident_t *, gtid);
1447   // Prepare arguments and build a call to __kmpc_taskgroup
1448   {
1449     CodeGenFunction::RunCleanupsScope Scope(CGF);
1450     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1451     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
1452     // Build a call to __kmpc_end_taskgroup
1453     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1454         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
1455         llvm::makeArrayRef(Args));
1456     emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
1457   }
1458 }
1459 
1460 /// Given an array of pointers to variables, project the address of a
1461 /// given variable.
1462 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
1463                                       unsigned Index, const VarDecl *Var) {
1464   // Pull out the pointer to the variable.
1465   Address PtrAddr =
1466       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
1467   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
1468 
1469   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
1470   Addr = CGF.Builder.CreateElementBitCast(
1471       Addr, CGF.ConvertTypeForMem(Var->getType()));
1472   return Addr;
1473 }
1474 
1475 static llvm::Value *emitCopyprivateCopyFunction(
1476     CodeGenModule &CGM, llvm::Type *ArgsType,
1477     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1478     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1479   auto &C = CGM.getContext();
1480   // void copy_func(void *LHSArg, void *RHSArg);
1481   FunctionArgList Args;
1482   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1483                            C.VoidPtrTy);
1484   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1485                            C.VoidPtrTy);
1486   Args.push_back(&LHSArg);
1487   Args.push_back(&RHSArg);
1488   FunctionType::ExtInfo EI;
1489   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1490       C.VoidTy, Args, EI, /*isVariadic=*/false);
1491   auto *Fn = llvm::Function::Create(
1492       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1493       ".omp.copyprivate.copy_func", &CGM.getModule());
1494   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
1495   CodeGenFunction CGF(CGM);
1496   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1497   // Dest = (void*[n])(LHSArg);
1498   // Src = (void*[n])(RHSArg);
1499   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1500       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
1501       ArgsType), CGF.getPointerAlign());
1502   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1503       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
1504       ArgsType), CGF.getPointerAlign());
1505   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1506   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1507   // ...
1508   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1509   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1510     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
1511     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
1512 
1513     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
1514     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
1515 
1516     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
1517     QualType Type = VD->getType();
1518     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
1519   }
1520   CGF.FinishFunction();
1521   return Fn;
1522 }
1523 
1524 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1525                                        const RegionCodeGenTy &SingleOpGen,
1526                                        SourceLocation Loc,
1527                                        ArrayRef<const Expr *> CopyprivateVars,
1528                                        ArrayRef<const Expr *> SrcExprs,
1529                                        ArrayRef<const Expr *> DstExprs,
1530                                        ArrayRef<const Expr *> AssignmentOps) {
1531   assert(CopyprivateVars.size() == SrcExprs.size() &&
1532          CopyprivateVars.size() == DstExprs.size() &&
1533          CopyprivateVars.size() == AssignmentOps.size());
1534   auto &C = CGM.getContext();
1535   // int32 did_it = 0;
1536   // if(__kmpc_single(ident_t *, gtid)) {
1537   //   SingleOpGen();
1538   //   __kmpc_end_single(ident_t *, gtid);
1539   //   did_it = 1;
1540   // }
1541   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1542   // <copy_func>, did_it);
1543 
1544   Address DidIt = Address::invalid();
1545   if (!CopyprivateVars.empty()) {
1546     // int32 did_it = 0;
1547     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1548     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1549     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
1550   }
1551   // Prepare arguments and build a call to __kmpc_single
1552   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1553   auto *IsSingle =
1554       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1555   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1556       SingleCallEndCleanup;
1557   emitIfStmt(
1558       CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void {
1559         CodeGenFunction::RunCleanupsScope Scope(CGF);
1560         CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
1561             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1562             llvm::makeArrayRef(Args));
1563         SingleOpGen(CGF);
1564         if (DidIt.isValid()) {
1565           // did_it = 1;
1566           CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
1567         }
1568       });
1569   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1570   // <copy_func>, did_it);
1571   if (DidIt.isValid()) {
1572     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1573     auto CopyprivateArrayTy =
1574         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1575                                /*IndexTypeQuals=*/0);
1576     // Create a list of all private variables for copyprivate.
1577     Address CopyprivateList =
1578         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1579     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1580       Address Elem = CGF.Builder.CreateConstArrayGEP(
1581           CopyprivateList, I, CGF.getPointerSize());
1582       CGF.Builder.CreateStore(
1583           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1584               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
1585           Elem);
1586     }
1587     // Build function that copies private values from single region to all other
1588     // threads in the corresponding parallel region.
1589     auto *CpyFn = emitCopyprivateCopyFunction(
1590         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1591         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1592     auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy);
1593     Address CL =
1594       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1595                                                       CGF.VoidPtrTy);
1596     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
1597     llvm::Value *Args[] = {
1598         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1599         getThreadID(CGF, Loc),        // i32 <gtid>
1600         BufSize,                      // size_t <buf_size>
1601         CL.getPointer(),              // void *<copyprivate list>
1602         CpyFn,                        // void (*) (void *, void *) <copy_func>
1603         DidItVal                      // i32 did_it
1604     };
1605     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1606   }
1607 }
1608 
1609 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
1610                                         const RegionCodeGenTy &OrderedOpGen,
1611                                         SourceLocation Loc, bool IsThreads) {
1612   // __kmpc_ordered(ident_t *, gtid);
1613   // OrderedOpGen();
1614   // __kmpc_end_ordered(ident_t *, gtid);
1615   // Prepare arguments and build a call to __kmpc_ordered
1616   CodeGenFunction::RunCleanupsScope Scope(CGF);
1617   if (IsThreads) {
1618     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1619     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
1620     // Build a call to __kmpc_end_ordered
1621     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1622         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
1623         llvm::makeArrayRef(Args));
1624   }
1625   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
1626 }
1627 
1628 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1629                                       OpenMPDirectiveKind Kind, bool EmitChecks,
1630                                       bool ForceSimpleCall) {
1631   // Build call __kmpc_cancel_barrier(loc, thread_id);
1632   // Build call __kmpc_barrier(loc, thread_id);
1633   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1634   if (Kind == OMPD_for) {
1635     Flags =
1636         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1637   } else if (Kind == OMPD_sections) {
1638     Flags = static_cast<OpenMPLocationFlags>(Flags |
1639                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
1640   } else if (Kind == OMPD_single) {
1641     Flags =
1642         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1643   } else if (Kind == OMPD_barrier) {
1644     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1645   } else {
1646     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1647   }
1648   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
1649   // thread_id);
1650   auto *OMPRegionInfo =
1651       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
1652   // Do not emit barrier call in the single directive emitted in some rare cases
1653   // for sections directives.
1654   if (OMPRegionInfo && OMPRegionInfo->getDirectiveKind() == OMPD_single)
1655     return;
1656   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1657                          getThreadID(CGF, Loc)};
1658   if (OMPRegionInfo) {
1659     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
1660       auto *Result = CGF.EmitRuntimeCall(
1661           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1662       if (EmitChecks) {
1663         // if (__kmpc_cancel_barrier()) {
1664         //   exit from construct;
1665         // }
1666         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
1667         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
1668         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
1669         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
1670         CGF.EmitBlock(ExitBB);
1671         //   exit from construct;
1672         auto CancelDestination =
1673             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
1674         CGF.EmitBranchThroughCleanup(CancelDestination);
1675         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
1676       }
1677       return;
1678     }
1679   }
1680   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
1681 }
1682 
1683 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1684 /// the enum sched_type in kmp.h).
1685 enum OpenMPSchedType {
1686   /// \brief Lower bound for default (unordered) versions.
1687   OMP_sch_lower = 32,
1688   OMP_sch_static_chunked = 33,
1689   OMP_sch_static = 34,
1690   OMP_sch_dynamic_chunked = 35,
1691   OMP_sch_guided_chunked = 36,
1692   OMP_sch_runtime = 37,
1693   OMP_sch_auto = 38,
1694   /// \brief Lower bound for 'ordered' versions.
1695   OMP_ord_lower = 64,
1696   OMP_ord_static_chunked = 65,
1697   OMP_ord_static = 66,
1698   OMP_ord_dynamic_chunked = 67,
1699   OMP_ord_guided_chunked = 68,
1700   OMP_ord_runtime = 69,
1701   OMP_ord_auto = 70,
1702   OMP_sch_default = OMP_sch_static,
1703 };
1704 
1705 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
1706 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1707                                           bool Chunked, bool Ordered) {
1708   switch (ScheduleKind) {
1709   case OMPC_SCHEDULE_static:
1710     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
1711                    : (Ordered ? OMP_ord_static : OMP_sch_static);
1712   case OMPC_SCHEDULE_dynamic:
1713     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
1714   case OMPC_SCHEDULE_guided:
1715     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
1716   case OMPC_SCHEDULE_runtime:
1717     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
1718   case OMPC_SCHEDULE_auto:
1719     return Ordered ? OMP_ord_auto : OMP_sch_auto;
1720   case OMPC_SCHEDULE_unknown:
1721     assert(!Chunked && "chunk was specified but schedule kind not known");
1722     return Ordered ? OMP_ord_static : OMP_sch_static;
1723   }
1724   llvm_unreachable("Unexpected runtime schedule");
1725 }
1726 
1727 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1728                                          bool Chunked) const {
1729   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
1730   return Schedule == OMP_sch_static;
1731 }
1732 
1733 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1734   auto Schedule =
1735       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
1736   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1737   return Schedule != OMP_sch_static;
1738 }
1739 
1740 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
1741                                           SourceLocation Loc,
1742                                           OpenMPScheduleClauseKind ScheduleKind,
1743                                           unsigned IVSize, bool IVSigned,
1744                                           bool Ordered, llvm::Value *UB,
1745                                           llvm::Value *Chunk) {
1746   OpenMPSchedType Schedule =
1747       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1748   assert(Ordered ||
1749          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
1750           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
1751   // Call __kmpc_dispatch_init(
1752   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1753   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1754   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1755 
1756   // If the Chunk was not specified in the clause - use default value 1.
1757   if (Chunk == nullptr)
1758     Chunk = CGF.Builder.getIntN(IVSize, 1);
1759   llvm::Value *Args[] = {
1760     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1761     getThreadID(CGF, Loc),
1762     CGF.Builder.getInt32(Schedule), // Schedule type
1763     CGF.Builder.getIntN(IVSize, 0), // Lower
1764     UB,                             // Upper
1765     CGF.Builder.getIntN(IVSize, 1), // Stride
1766     Chunk                           // Chunk
1767   };
1768   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1769 }
1770 
1771 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
1772                                         SourceLocation Loc,
1773                                         OpenMPScheduleClauseKind ScheduleKind,
1774                                         unsigned IVSize, bool IVSigned,
1775                                         bool Ordered, Address IL, Address LB,
1776                                         Address UB, Address ST,
1777                                         llvm::Value *Chunk) {
1778   OpenMPSchedType Schedule =
1779     getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1780   assert(!Ordered);
1781   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
1782          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked);
1783 
1784   // Call __kmpc_for_static_init(
1785   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1786   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1787   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1788   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1789   if (Chunk == nullptr) {
1790     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
1791            "expected static non-chunked schedule");
1792     // If the Chunk was not specified in the clause - use default value 1.
1793       Chunk = CGF.Builder.getIntN(IVSize, 1);
1794   } else {
1795     assert((Schedule == OMP_sch_static_chunked ||
1796             Schedule == OMP_ord_static_chunked) &&
1797            "expected static chunked schedule");
1798   }
1799   llvm::Value *Args[] = {
1800     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1801     getThreadID(CGF, Loc),
1802     CGF.Builder.getInt32(Schedule), // Schedule type
1803     IL.getPointer(),                // &isLastIter
1804     LB.getPointer(),                // &LB
1805     UB.getPointer(),                // &UB
1806     ST.getPointer(),                // &Stride
1807     CGF.Builder.getIntN(IVSize, 1), // Incr
1808     Chunk                           // Chunk
1809   };
1810   CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1811 }
1812 
1813 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
1814                                           SourceLocation Loc) {
1815   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1816   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1817                          getThreadID(CGF, Loc)};
1818   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1819                       Args);
1820 }
1821 
1822 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
1823                                                  SourceLocation Loc,
1824                                                  unsigned IVSize,
1825                                                  bool IVSigned) {
1826   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
1827   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1828                          getThreadID(CGF, Loc)};
1829   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
1830 }
1831 
1832 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1833                                           SourceLocation Loc, unsigned IVSize,
1834                                           bool IVSigned, Address IL,
1835                                           Address LB, Address UB,
1836                                           Address ST) {
1837   // Call __kmpc_dispatch_next(
1838   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1839   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1840   //          kmp_int[32|64] *p_stride);
1841   llvm::Value *Args[] = {
1842       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1843       IL.getPointer(), // &isLastIter
1844       LB.getPointer(), // &Lower
1845       UB.getPointer(), // &Upper
1846       ST.getPointer()  // &Stride
1847   };
1848   llvm::Value *Call =
1849       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1850   return CGF.EmitScalarConversion(
1851       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1852       CGF.getContext().BoolTy, Loc);
1853 }
1854 
1855 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1856                                            llvm::Value *NumThreads,
1857                                            SourceLocation Loc) {
1858   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1859   llvm::Value *Args[] = {
1860       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1861       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1862   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1863                       Args);
1864 }
1865 
1866 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
1867                                          OpenMPProcBindClauseKind ProcBind,
1868                                          SourceLocation Loc) {
1869   // Constants for proc bind value accepted by the runtime.
1870   enum ProcBindTy {
1871     ProcBindFalse = 0,
1872     ProcBindTrue,
1873     ProcBindMaster,
1874     ProcBindClose,
1875     ProcBindSpread,
1876     ProcBindIntel,
1877     ProcBindDefault
1878   } RuntimeProcBind;
1879   switch (ProcBind) {
1880   case OMPC_PROC_BIND_master:
1881     RuntimeProcBind = ProcBindMaster;
1882     break;
1883   case OMPC_PROC_BIND_close:
1884     RuntimeProcBind = ProcBindClose;
1885     break;
1886   case OMPC_PROC_BIND_spread:
1887     RuntimeProcBind = ProcBindSpread;
1888     break;
1889   case OMPC_PROC_BIND_unknown:
1890     llvm_unreachable("Unsupported proc_bind value.");
1891   }
1892   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
1893   llvm::Value *Args[] = {
1894       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1895       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
1896   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
1897 }
1898 
1899 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1900                                 SourceLocation Loc) {
1901   // Build call void __kmpc_flush(ident_t *loc)
1902   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1903                       emitUpdateLocation(CGF, Loc));
1904 }
1905 
1906 namespace {
1907 /// \brief Indexes of fields for type kmp_task_t.
1908 enum KmpTaskTFields {
1909   /// \brief List of shared variables.
1910   KmpTaskTShareds,
1911   /// \brief Task routine.
1912   KmpTaskTRoutine,
1913   /// \brief Partition id for the untied tasks.
1914   KmpTaskTPartId,
1915   /// \brief Function with call of destructors for private variables.
1916   KmpTaskTDestructors,
1917 };
1918 } // anonymous namespace
1919 
1920 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
1921   if (!KmpRoutineEntryPtrTy) {
1922     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
1923     auto &C = CGM.getContext();
1924     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
1925     FunctionProtoType::ExtProtoInfo EPI;
1926     KmpRoutineEntryPtrQTy = C.getPointerType(
1927         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
1928     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
1929   }
1930 }
1931 
1932 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1933                                        QualType FieldTy) {
1934   auto *Field = FieldDecl::Create(
1935       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1936       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1937       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1938   Field->setAccess(AS_public);
1939   DC->addDecl(Field);
1940   return Field;
1941 }
1942 
1943 namespace {
1944 struct PrivateHelpersTy {
1945   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
1946                    const VarDecl *PrivateElemInit)
1947       : Original(Original), PrivateCopy(PrivateCopy),
1948         PrivateElemInit(PrivateElemInit) {}
1949   const VarDecl *Original;
1950   const VarDecl *PrivateCopy;
1951   const VarDecl *PrivateElemInit;
1952 };
1953 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
1954 } // anonymous namespace
1955 
1956 static RecordDecl *
1957 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
1958   if (!Privates.empty()) {
1959     auto &C = CGM.getContext();
1960     // Build struct .kmp_privates_t. {
1961     //         /*  private vars  */
1962     //       };
1963     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
1964     RD->startDefinition();
1965     for (auto &&Pair : Privates) {
1966       auto *VD = Pair.second.Original;
1967       auto Type = VD->getType();
1968       Type = Type.getNonReferenceType();
1969       auto *FD = addFieldToRecordDecl(C, RD, Type);
1970       if (VD->hasAttrs()) {
1971         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
1972              E(VD->getAttrs().end());
1973              I != E; ++I)
1974           FD->addAttr(*I);
1975       }
1976     }
1977     RD->completeDefinition();
1978     return RD;
1979   }
1980   return nullptr;
1981 }
1982 
1983 static RecordDecl *
1984 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
1985                          QualType KmpRoutineEntryPointerQTy) {
1986   auto &C = CGM.getContext();
1987   // Build struct kmp_task_t {
1988   //         void *              shareds;
1989   //         kmp_routine_entry_t routine;
1990   //         kmp_int32           part_id;
1991   //         kmp_routine_entry_t destructors;
1992   //       };
1993   auto *RD = C.buildImplicitRecord("kmp_task_t");
1994   RD->startDefinition();
1995   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1996   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1997   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1998   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1999   RD->completeDefinition();
2000   return RD;
2001 }
2002 
2003 static RecordDecl *
2004 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2005                                      ArrayRef<PrivateDataTy> Privates) {
2006   auto &C = CGM.getContext();
2007   // Build struct kmp_task_t_with_privates {
2008   //         kmp_task_t task_data;
2009   //         .kmp_privates_t. privates;
2010   //       };
2011   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2012   RD->startDefinition();
2013   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2014   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
2015     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2016   }
2017   RD->completeDefinition();
2018   return RD;
2019 }
2020 
2021 /// \brief Emit a proxy function which accepts kmp_task_t as the second
2022 /// argument.
2023 /// \code
2024 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2025 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
2026 ///   tt->shareds);
2027 ///   return 0;
2028 /// }
2029 /// \endcode
2030 static llvm::Value *
2031 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2032                       QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
2033                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2034                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
2035                       llvm::Value *TaskPrivatesMap) {
2036   auto &C = CGM.getContext();
2037   FunctionArgList Args;
2038   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2039   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2040                                 /*Id=*/nullptr,
2041                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2042   Args.push_back(&GtidArg);
2043   Args.push_back(&TaskTypeArg);
2044   FunctionType::ExtInfo Info;
2045   auto &TaskEntryFnInfo =
2046       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2047                                                     /*isVariadic=*/false);
2048   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2049   auto *TaskEntry =
2050       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
2051                              ".omp_task_entry.", &CGM.getModule());
2052   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
2053   CodeGenFunction CGF(CGM);
2054   CGF.disableDebugInfo();
2055   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
2056 
2057   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
2058   // tt->task_data.shareds);
2059   auto *GtidParam = CGF.EmitLoadOfScalar(
2060       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
2061   LValue TDBase = emitLoadOfPointerLValue(
2062       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2063   auto *KmpTaskTWithPrivatesQTyRD =
2064       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2065   LValue Base =
2066       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
2067   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2068   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
2069   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
2070   auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
2071 
2072   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
2073   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
2074   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2075       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
2076       CGF.ConvertTypeForMem(SharedsPtrTy));
2077 
2078   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
2079   llvm::Value *PrivatesParam;
2080   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
2081     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
2082     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2083         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
2084   } else {
2085     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2086   }
2087 
2088   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
2089                              TaskPrivatesMap, SharedsParam};
2090   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
2091   CGF.EmitStoreThroughLValue(
2092       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
2093       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
2094   CGF.FinishFunction();
2095   return TaskEntry;
2096 }
2097 
2098 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
2099                                             SourceLocation Loc,
2100                                             QualType KmpInt32Ty,
2101                                             QualType KmpTaskTWithPrivatesPtrQTy,
2102                                             QualType KmpTaskTWithPrivatesQTy) {
2103   auto &C = CGM.getContext();
2104   FunctionArgList Args;
2105   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2106   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2107                                 /*Id=*/nullptr,
2108                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2109   Args.push_back(&GtidArg);
2110   Args.push_back(&TaskTypeArg);
2111   FunctionType::ExtInfo Info;
2112   auto &DestructorFnInfo =
2113       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2114                                                     /*isVariadic=*/false);
2115   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
2116   auto *DestructorFn =
2117       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
2118                              ".omp_task_destructor.", &CGM.getModule());
2119   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
2120                                     DestructorFnInfo);
2121   CodeGenFunction CGF(CGM);
2122   CGF.disableDebugInfo();
2123   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
2124                     Args);
2125 
2126   LValue Base = emitLoadOfPointerLValue(
2127       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2128   auto *KmpTaskTWithPrivatesQTyRD =
2129       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2130   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2131   Base = CGF.EmitLValueForField(Base, *FI);
2132   for (auto *Field :
2133        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
2134     if (auto DtorKind = Field->getType().isDestructedType()) {
2135       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
2136       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
2137     }
2138   }
2139   CGF.FinishFunction();
2140   return DestructorFn;
2141 }
2142 
2143 /// \brief Emit a privates mapping function for correct handling of private and
2144 /// firstprivate variables.
2145 /// \code
2146 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
2147 /// **noalias priv1,...,  <tyn> **noalias privn) {
2148 ///   *priv1 = &.privates.priv1;
2149 ///   ...;
2150 ///   *privn = &.privates.privn;
2151 /// }
2152 /// \endcode
2153 static llvm::Value *
2154 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
2155                                ArrayRef<const Expr *> PrivateVars,
2156                                ArrayRef<const Expr *> FirstprivateVars,
2157                                QualType PrivatesQTy,
2158                                ArrayRef<PrivateDataTy> Privates) {
2159   auto &C = CGM.getContext();
2160   FunctionArgList Args;
2161   ImplicitParamDecl TaskPrivatesArg(
2162       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2163       C.getPointerType(PrivatesQTy).withConst().withRestrict());
2164   Args.push_back(&TaskPrivatesArg);
2165   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
2166   unsigned Counter = 1;
2167   for (auto *E: PrivateVars) {
2168     Args.push_back(ImplicitParamDecl::Create(
2169         C, /*DC=*/nullptr, Loc,
2170         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2171                             .withConst()
2172                             .withRestrict()));
2173     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2174     PrivateVarsPos[VD] = Counter;
2175     ++Counter;
2176   }
2177   for (auto *E : FirstprivateVars) {
2178     Args.push_back(ImplicitParamDecl::Create(
2179         C, /*DC=*/nullptr, Loc,
2180         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2181                             .withConst()
2182                             .withRestrict()));
2183     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2184     PrivateVarsPos[VD] = Counter;
2185     ++Counter;
2186   }
2187   FunctionType::ExtInfo Info;
2188   auto &TaskPrivatesMapFnInfo =
2189       CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
2190                                                     /*isVariadic=*/false);
2191   auto *TaskPrivatesMapTy =
2192       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
2193   auto *TaskPrivatesMap = llvm::Function::Create(
2194       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
2195       ".omp_task_privates_map.", &CGM.getModule());
2196   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
2197                                     TaskPrivatesMapFnInfo);
2198   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
2199   CodeGenFunction CGF(CGM);
2200   CGF.disableDebugInfo();
2201   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
2202                     TaskPrivatesMapFnInfo, Args);
2203 
2204   // *privi = &.privates.privi;
2205   LValue Base = emitLoadOfPointerLValue(
2206       CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType());
2207   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
2208   Counter = 0;
2209   for (auto *Field : PrivatesQTyRD->fields()) {
2210     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
2211     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
2212     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
2213     auto RefLoadLVal =
2214         emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType());
2215     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
2216     ++Counter;
2217   }
2218   CGF.FinishFunction();
2219   return TaskPrivatesMap;
2220 }
2221 
2222 static int array_pod_sort_comparator(const PrivateDataTy *P1,
2223                                      const PrivateDataTy *P2) {
2224   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
2225 }
2226 
2227 void CGOpenMPRuntime::emitTaskCall(
2228     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
2229     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
2230     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
2231     const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
2232     ArrayRef<const Expr *> PrivateCopies,
2233     ArrayRef<const Expr *> FirstprivateVars,
2234     ArrayRef<const Expr *> FirstprivateCopies,
2235     ArrayRef<const Expr *> FirstprivateInits,
2236     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
2237   auto &C = CGM.getContext();
2238   llvm::SmallVector<PrivateDataTy, 8> Privates;
2239   // Aggregate privates and sort them by the alignment.
2240   auto I = PrivateCopies.begin();
2241   for (auto *E : PrivateVars) {
2242     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2243     Privates.push_back(std::make_pair(
2244         C.getDeclAlign(VD),
2245         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2246                          /*PrivateElemInit=*/nullptr)));
2247     ++I;
2248   }
2249   I = FirstprivateCopies.begin();
2250   auto IElemInitRef = FirstprivateInits.begin();
2251   for (auto *E : FirstprivateVars) {
2252     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2253     Privates.push_back(std::make_pair(
2254         C.getDeclAlign(VD),
2255         PrivateHelpersTy(
2256             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2257             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
2258     ++I, ++IElemInitRef;
2259   }
2260   llvm::array_pod_sort(Privates.begin(), Privates.end(),
2261                        array_pod_sort_comparator);
2262   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2263   // Build type kmp_routine_entry_t (if not built yet).
2264   emitKmpRoutineEntryT(KmpInt32Ty);
2265   // Build type kmp_task_t (if not built yet).
2266   if (KmpTaskTQTy.isNull()) {
2267     KmpTaskTQTy = C.getRecordType(
2268         createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
2269   }
2270   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2271   // Build particular struct kmp_task_t for the given task.
2272   auto *KmpTaskTWithPrivatesQTyRD =
2273       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
2274   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
2275   QualType KmpTaskTWithPrivatesPtrQTy =
2276       C.getPointerType(KmpTaskTWithPrivatesQTy);
2277   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
2278   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
2279   auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy);
2280   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
2281 
2282   // Emit initial values for private copies (if any).
2283   llvm::Value *TaskPrivatesMap = nullptr;
2284   auto *TaskPrivatesMapTy =
2285       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
2286                 3)
2287           ->getType();
2288   if (!Privates.empty()) {
2289     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2290     TaskPrivatesMap = emitTaskPrivateMappingFunction(
2291         CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
2292     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2293         TaskPrivatesMap, TaskPrivatesMapTy);
2294   } else {
2295     TaskPrivatesMap = llvm::ConstantPointerNull::get(
2296         cast<llvm::PointerType>(TaskPrivatesMapTy));
2297   }
2298   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
2299   // kmp_task_t *tt);
2300   auto *TaskEntry = emitProxyTaskFunction(
2301       CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
2302       KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
2303 
2304   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2305   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2306   // kmp_routine_entry_t *task_entry);
2307   // Task flags. Format is taken from
2308   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
2309   // description of kmp_tasking_flags struct.
2310   const unsigned TiedFlag = 0x1;
2311   const unsigned FinalFlag = 0x2;
2312   unsigned Flags = Tied ? TiedFlag : 0;
2313   auto *TaskFlags =
2314       Final.getPointer()
2315           ? CGF.Builder.CreateSelect(Final.getPointer(),
2316                                      CGF.Builder.getInt32(FinalFlag),
2317                                      CGF.Builder.getInt32(/*C=*/0))
2318           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
2319   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
2320   auto *SharedsSize = getTypeSize(CGF, SharedsTy);
2321   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
2322                               getThreadID(CGF, Loc), TaskFlags,
2323                               KmpTaskTWithPrivatesTySize, SharedsSize,
2324                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2325                                   TaskEntry, KmpRoutineEntryPtrTy)};
2326   auto *NewTask = CGF.EmitRuntimeCall(
2327       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
2328   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2329       NewTask, KmpTaskTWithPrivatesPtrTy);
2330   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
2331                                                KmpTaskTWithPrivatesQTy);
2332   LValue TDBase =
2333       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
2334   // Fill the data in the resulting kmp_task_t record.
2335   // Copy shareds if there are any.
2336   Address KmpTaskSharedsPtr = Address::invalid();
2337   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
2338     KmpTaskSharedsPtr =
2339         Address(CGF.EmitLoadOfScalar(
2340                     CGF.EmitLValueForField(
2341                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
2342                                            KmpTaskTShareds)),
2343                     Loc),
2344                 CGF.getNaturalTypeAlignment(SharedsTy));
2345     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
2346   }
2347   // Emit initial values for private copies (if any).
2348   bool NeedsCleanup = false;
2349   if (!Privates.empty()) {
2350     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2351     auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
2352     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
2353     LValue SharedsBase;
2354     if (!FirstprivateVars.empty()) {
2355       SharedsBase = CGF.MakeAddrLValue(
2356           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2357               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
2358           SharedsTy);
2359     }
2360     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
2361         cast<CapturedStmt>(*D.getAssociatedStmt()));
2362     for (auto &&Pair : Privates) {
2363       auto *VD = Pair.second.PrivateCopy;
2364       auto *Init = VD->getAnyInitializer();
2365       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
2366       if (Init) {
2367         if (auto *Elem = Pair.second.PrivateElemInit) {
2368           auto *OriginalVD = Pair.second.Original;
2369           auto *SharedField = CapturesInfo.lookup(OriginalVD);
2370           auto SharedRefLValue =
2371               CGF.EmitLValueForField(SharedsBase, SharedField);
2372           SharedRefLValue = CGF.MakeAddrLValue(
2373               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
2374               SharedRefLValue.getType(), AlignmentSource::Decl);
2375           QualType Type = OriginalVD->getType();
2376           if (Type->isArrayType()) {
2377             // Initialize firstprivate array.
2378             if (!isa<CXXConstructExpr>(Init) ||
2379                 CGF.isTrivialInitializer(Init)) {
2380               // Perform simple memcpy.
2381               CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
2382                                       SharedRefLValue.getAddress(), Type);
2383             } else {
2384               // Initialize firstprivate array using element-by-element
2385               // intialization.
2386               CGF.EmitOMPAggregateAssign(
2387                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
2388                   Type, [&CGF, Elem, Init, &CapturesInfo](
2389                             Address DestElement, Address SrcElement) {
2390                     // Clean up any temporaries needed by the initialization.
2391                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
2392                     InitScope.addPrivate(Elem, [SrcElement]() -> Address {
2393                       return SrcElement;
2394                     });
2395                     (void)InitScope.Privatize();
2396                     // Emit initialization for single element.
2397                     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
2398                         CGF, &CapturesInfo);
2399                     CGF.EmitAnyExprToMem(Init, DestElement,
2400                                          Init->getType().getQualifiers(),
2401                                          /*IsInitializer=*/false);
2402                   });
2403             }
2404           } else {
2405             CodeGenFunction::OMPPrivateScope InitScope(CGF);
2406             InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
2407               return SharedRefLValue.getAddress();
2408             });
2409             (void)InitScope.Privatize();
2410             CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
2411             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
2412                                /*capturedByInit=*/false);
2413           }
2414         } else {
2415           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
2416         }
2417       }
2418       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
2419       ++FI;
2420     }
2421   }
2422   // Provide pointer to function with destructors for privates.
2423   llvm::Value *DestructorFn =
2424       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
2425                                              KmpTaskTWithPrivatesPtrQTy,
2426                                              KmpTaskTWithPrivatesQTy)
2427                    : llvm::ConstantPointerNull::get(
2428                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
2429   LValue Destructor = CGF.EmitLValueForField(
2430       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
2431   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2432                             DestructorFn, KmpRoutineEntryPtrTy),
2433                         Destructor);
2434 
2435   // Process list of dependences.
2436   Address DependenciesArray = Address::invalid();
2437   unsigned NumDependencies = Dependences.size();
2438   if (NumDependencies) {
2439     // Dependence kind for RTL.
2440     enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 };
2441     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
2442     RecordDecl *KmpDependInfoRD;
2443     QualType FlagsTy =
2444         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
2445     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
2446     if (KmpDependInfoTy.isNull()) {
2447       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
2448       KmpDependInfoRD->startDefinition();
2449       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
2450       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
2451       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
2452       KmpDependInfoRD->completeDefinition();
2453       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
2454     } else {
2455       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
2456     }
2457     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
2458     // Define type kmp_depend_info[<Dependences.size()>];
2459     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
2460         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
2461         ArrayType::Normal, /*IndexTypeQuals=*/0);
2462     // kmp_depend_info[<Dependences.size()>] deps;
2463     DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
2464     for (unsigned i = 0; i < NumDependencies; ++i) {
2465       const Expr *E = Dependences[i].second;
2466       auto Addr = CGF.EmitLValue(E);
2467       llvm::Value *Size;
2468       QualType Ty = E->getType();
2469       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
2470         LValue UpAddrLVal =
2471             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
2472         llvm::Value *UpAddr =
2473             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
2474         llvm::Value *LowIntPtr =
2475             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
2476         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
2477         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
2478       } else
2479         Size = getTypeSize(CGF, Ty);
2480       auto Base = CGF.MakeAddrLValue(
2481           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
2482           KmpDependInfoTy);
2483       // deps[i].base_addr = &<Dependences[i].second>;
2484       auto BaseAddrLVal = CGF.EmitLValueForField(
2485           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
2486       CGF.EmitStoreOfScalar(
2487           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
2488           BaseAddrLVal);
2489       // deps[i].len = sizeof(<Dependences[i].second>);
2490       auto LenLVal = CGF.EmitLValueForField(
2491           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
2492       CGF.EmitStoreOfScalar(Size, LenLVal);
2493       // deps[i].flags = <Dependences[i].first>;
2494       RTLDependenceKindTy DepKind;
2495       switch (Dependences[i].first) {
2496       case OMPC_DEPEND_in:
2497         DepKind = DepIn;
2498         break;
2499       case OMPC_DEPEND_out:
2500         DepKind = DepOut;
2501         break;
2502       case OMPC_DEPEND_inout:
2503         DepKind = DepInOut;
2504         break;
2505       case OMPC_DEPEND_unknown:
2506         llvm_unreachable("Unknown task dependence type");
2507       }
2508       auto FlagsLVal = CGF.EmitLValueForField(
2509           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
2510       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
2511                             FlagsLVal);
2512     }
2513     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2514         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
2515         CGF.VoidPtrTy);
2516   }
2517 
2518   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
2519   // libcall.
2520   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2521   // *new_task);
2522   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2523   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2524   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
2525   // list is not empty
2526   auto *ThreadID = getThreadID(CGF, Loc);
2527   auto *UpLoc = emitUpdateLocation(CGF, Loc);
2528   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
2529   llvm::Value *DepTaskArgs[7];
2530   if (NumDependencies) {
2531     DepTaskArgs[0] = UpLoc;
2532     DepTaskArgs[1] = ThreadID;
2533     DepTaskArgs[2] = NewTask;
2534     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
2535     DepTaskArgs[4] = DependenciesArray.getPointer();
2536     DepTaskArgs[5] = CGF.Builder.getInt32(0);
2537     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2538   }
2539   auto &&ThenCodeGen = [this, NumDependencies,
2540                         &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
2541     // TODO: add check for untied tasks.
2542     if (NumDependencies) {
2543       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
2544                           DepTaskArgs);
2545     } else {
2546       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
2547                           TaskArgs);
2548     }
2549   };
2550   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
2551       IfCallEndCleanup;
2552 
2553   llvm::Value *DepWaitTaskArgs[6];
2554   if (NumDependencies) {
2555     DepWaitTaskArgs[0] = UpLoc;
2556     DepWaitTaskArgs[1] = ThreadID;
2557     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
2558     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
2559     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
2560     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2561   }
2562   auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
2563                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
2564     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
2565     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2566     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
2567     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
2568     // is specified.
2569     if (NumDependencies)
2570       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
2571                           DepWaitTaskArgs);
2572     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
2573     // kmp_task_t *new_task);
2574     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
2575                         TaskArgs);
2576     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
2577     // kmp_task_t *new_task);
2578     CGF.EHStack.pushCleanup<IfCallEndCleanup>(
2579         NormalAndEHCleanup,
2580         createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
2581         llvm::makeArrayRef(TaskArgs));
2582 
2583     // Call proxy_task_entry(gtid, new_task);
2584     llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
2585     CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
2586   };
2587 
2588   if (IfCond) {
2589     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
2590   } else {
2591     CodeGenFunction::RunCleanupsScope Scope(CGF);
2592     ThenCodeGen(CGF);
2593   }
2594 }
2595 
2596 /// \brief Emit reduction operation for each element of array (required for
2597 /// array sections) LHS op = RHS.
2598 /// \param Type Type of array.
2599 /// \param LHSVar Variable on the left side of the reduction operation
2600 /// (references element of array in original variable).
2601 /// \param RHSVar Variable on the right side of the reduction operation
2602 /// (references element of array in original variable).
2603 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
2604 /// RHSVar.
2605 static void EmitOMPAggregateReduction(
2606     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
2607     const VarDecl *RHSVar,
2608     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
2609                                   const Expr *, const Expr *)> &RedOpGen,
2610     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
2611     const Expr *UpExpr = nullptr) {
2612   // Perform element-by-element initialization.
2613   QualType ElementTy;
2614   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
2615   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
2616 
2617   // Drill down to the base element type on both arrays.
2618   auto ArrayTy = Type->getAsArrayTypeUnsafe();
2619   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
2620 
2621   auto RHSBegin = RHSAddr.getPointer();
2622   auto LHSBegin = LHSAddr.getPointer();
2623   // Cast from pointer to array type to pointer to single element.
2624   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
2625   // The basic structure here is a while-do loop.
2626   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
2627   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
2628   auto IsEmpty =
2629       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
2630   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
2631 
2632   // Enter the loop body, making that address the current address.
2633   auto EntryBB = CGF.Builder.GetInsertBlock();
2634   CGF.EmitBlock(BodyBB);
2635 
2636   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
2637 
2638   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
2639       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
2640   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
2641   Address RHSElementCurrent =
2642       Address(RHSElementPHI,
2643               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
2644 
2645   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
2646       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
2647   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
2648   Address LHSElementCurrent =
2649       Address(LHSElementPHI,
2650               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
2651 
2652   // Emit copy.
2653   CodeGenFunction::OMPPrivateScope Scope(CGF);
2654   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
2655   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
2656   Scope.Privatize();
2657   RedOpGen(CGF, XExpr, EExpr, UpExpr);
2658   Scope.ForceCleanup();
2659 
2660   // Shift the address forward by one element.
2661   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
2662       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
2663   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
2664       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
2665   // Check whether we've reached the end.
2666   auto Done =
2667       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
2668   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
2669   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
2670   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
2671 
2672   // Done.
2673   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
2674 }
2675 
2676 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
2677                                           llvm::Type *ArgsType,
2678                                           ArrayRef<const Expr *> Privates,
2679                                           ArrayRef<const Expr *> LHSExprs,
2680                                           ArrayRef<const Expr *> RHSExprs,
2681                                           ArrayRef<const Expr *> ReductionOps) {
2682   auto &C = CGM.getContext();
2683 
2684   // void reduction_func(void *LHSArg, void *RHSArg);
2685   FunctionArgList Args;
2686   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2687                            C.VoidPtrTy);
2688   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2689                            C.VoidPtrTy);
2690   Args.push_back(&LHSArg);
2691   Args.push_back(&RHSArg);
2692   FunctionType::ExtInfo EI;
2693   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
2694       C.VoidTy, Args, EI, /*isVariadic=*/false);
2695   auto *Fn = llvm::Function::Create(
2696       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2697       ".omp.reduction.reduction_func", &CGM.getModule());
2698   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
2699   CodeGenFunction CGF(CGM);
2700   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2701 
2702   // Dst = (void*[n])(LHSArg);
2703   // Src = (void*[n])(RHSArg);
2704   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2705       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2706       ArgsType), CGF.getPointerAlign());
2707   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2708       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2709       ArgsType), CGF.getPointerAlign());
2710 
2711   //  ...
2712   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
2713   //  ...
2714   CodeGenFunction::OMPPrivateScope Scope(CGF);
2715   auto IPriv = Privates.begin();
2716   unsigned Idx = 0;
2717   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
2718     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
2719     Scope.addPrivate(RHSVar, [&]() -> Address {
2720       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
2721     });
2722     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
2723     Scope.addPrivate(LHSVar, [&]() -> Address {
2724       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
2725     });
2726     QualType PrivTy = (*IPriv)->getType();
2727     if (PrivTy->isArrayType()) {
2728       // Get array size and emit VLA type.
2729       ++Idx;
2730       Address Elem =
2731           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
2732       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
2733       CodeGenFunction::OpaqueValueMapping OpaqueMap(
2734           CGF,
2735           cast<OpaqueValueExpr>(
2736               CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()),
2737           RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
2738       CGF.EmitVariablyModifiedType(PrivTy);
2739     }
2740   }
2741   Scope.Privatize();
2742   IPriv = Privates.begin();
2743   auto ILHS = LHSExprs.begin();
2744   auto IRHS = RHSExprs.begin();
2745   for (auto *E : ReductionOps) {
2746     if ((*IPriv)->getType()->isArrayType()) {
2747       // Emit reduction for array section.
2748       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
2749       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
2750       EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
2751                                 [=](CodeGenFunction &CGF, const Expr *,
2752                                     const Expr *,
2753                                     const Expr *) { CGF.EmitIgnoredExpr(E); });
2754     } else
2755       // Emit reduction for array subscript or single variable.
2756       CGF.EmitIgnoredExpr(E);
2757     ++IPriv, ++ILHS, ++IRHS;
2758   }
2759   Scope.ForceCleanup();
2760   CGF.FinishFunction();
2761   return Fn;
2762 }
2763 
2764 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
2765                                     ArrayRef<const Expr *> Privates,
2766                                     ArrayRef<const Expr *> LHSExprs,
2767                                     ArrayRef<const Expr *> RHSExprs,
2768                                     ArrayRef<const Expr *> ReductionOps,
2769                                     bool WithNowait, bool SimpleReduction) {
2770   // Next code should be emitted for reduction:
2771   //
2772   // static kmp_critical_name lock = { 0 };
2773   //
2774   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
2775   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
2776   //  ...
2777   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
2778   //  *(Type<n>-1*)rhs[<n>-1]);
2779   // }
2780   //
2781   // ...
2782   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
2783   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2784   // RedList, reduce_func, &<lock>)) {
2785   // case 1:
2786   //  ...
2787   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2788   //  ...
2789   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2790   // break;
2791   // case 2:
2792   //  ...
2793   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2794   //  ...
2795   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
2796   // break;
2797   // default:;
2798   // }
2799   //
2800   // if SimpleReduction is true, only the next code is generated:
2801   //  ...
2802   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2803   //  ...
2804 
2805   auto &C = CGM.getContext();
2806 
2807   if (SimpleReduction) {
2808     CodeGenFunction::RunCleanupsScope Scope(CGF);
2809     auto IPriv = Privates.begin();
2810     auto ILHS = LHSExprs.begin();
2811     auto IRHS = RHSExprs.begin();
2812     for (auto *E : ReductionOps) {
2813       if ((*IPriv)->getType()->isArrayType()) {
2814         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
2815         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
2816         EmitOMPAggregateReduction(
2817             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
2818             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
2819                 const Expr *) { CGF.EmitIgnoredExpr(E); });
2820       } else
2821         CGF.EmitIgnoredExpr(E);
2822       ++IPriv, ++ILHS, ++IRHS;
2823     }
2824     return;
2825   }
2826 
2827   // 1. Build a list of reduction variables.
2828   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
2829   auto Size = RHSExprs.size();
2830   for (auto *E : Privates) {
2831     if (E->getType()->isArrayType())
2832       // Reserve place for array size.
2833       ++Size;
2834   }
2835   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
2836   QualType ReductionArrayTy =
2837       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2838                              /*IndexTypeQuals=*/0);
2839   Address ReductionList =
2840       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
2841   auto IPriv = Privates.begin();
2842   unsigned Idx = 0;
2843   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
2844     Address Elem =
2845       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
2846     CGF.Builder.CreateStore(
2847         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2848             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
2849         Elem);
2850     if ((*IPriv)->getType()->isArrayType()) {
2851       // Store array size.
2852       ++Idx;
2853       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
2854                                              CGF.getPointerSize());
2855       CGF.Builder.CreateStore(
2856           CGF.Builder.CreateIntToPtr(
2857               CGF.Builder.CreateIntCast(
2858                   CGF.getVLASize(CGF.getContext().getAsVariableArrayType(
2859                                      (*IPriv)->getType()))
2860                       .first,
2861                   CGF.SizeTy, /*isSigned=*/false),
2862               CGF.VoidPtrTy),
2863           Elem);
2864     }
2865   }
2866 
2867   // 2. Emit reduce_func().
2868   auto *ReductionFn = emitReductionFunction(
2869       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
2870       LHSExprs, RHSExprs, ReductionOps);
2871 
2872   // 3. Create static kmp_critical_name lock = { 0 };
2873   auto *Lock = getCriticalRegionLock(".reduction");
2874 
2875   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2876   // RedList, reduce_func, &<lock>);
2877   auto *IdentTLoc = emitUpdateLocation(
2878       CGF, Loc,
2879       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
2880   auto *ThreadId = getThreadID(CGF, Loc);
2881   auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy);
2882   auto *RL =
2883     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
2884                                                     CGF.VoidPtrTy);
2885   llvm::Value *Args[] = {
2886       IdentTLoc,                             // ident_t *<loc>
2887       ThreadId,                              // i32 <gtid>
2888       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
2889       ReductionArrayTySize,                  // size_type sizeof(RedList)
2890       RL,                                    // void *RedList
2891       ReductionFn, // void (*) (void *, void *) <reduce_func>
2892       Lock         // kmp_critical_name *&<lock>
2893   };
2894   auto Res = CGF.EmitRuntimeCall(
2895       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
2896                                        : OMPRTL__kmpc_reduce),
2897       Args);
2898 
2899   // 5. Build switch(res)
2900   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
2901   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
2902 
2903   // 6. Build case 1:
2904   //  ...
2905   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2906   //  ...
2907   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2908   // break;
2909   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
2910   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
2911   CGF.EmitBlock(Case1BB);
2912 
2913   {
2914     CodeGenFunction::RunCleanupsScope Scope(CGF);
2915     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2916     llvm::Value *EndArgs[] = {
2917         IdentTLoc, // ident_t *<loc>
2918         ThreadId,  // i32 <gtid>
2919         Lock       // kmp_critical_name *&<lock>
2920     };
2921     CGF.EHStack
2922         .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2923             NormalAndEHCleanup,
2924             createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
2925                                              : OMPRTL__kmpc_end_reduce),
2926             llvm::makeArrayRef(EndArgs));
2927     auto IPriv = Privates.begin();
2928     auto ILHS = LHSExprs.begin();
2929     auto IRHS = RHSExprs.begin();
2930     for (auto *E : ReductionOps) {
2931       if ((*IPriv)->getType()->isArrayType()) {
2932         // Emit reduction for array section.
2933         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
2934         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
2935         EmitOMPAggregateReduction(
2936             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
2937             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
2938                 const Expr *) { CGF.EmitIgnoredExpr(E); });
2939       } else
2940         // Emit reduction for array subscript or single variable.
2941         CGF.EmitIgnoredExpr(E);
2942       ++IPriv, ++ILHS, ++IRHS;
2943     }
2944   }
2945 
2946   CGF.EmitBranch(DefaultBB);
2947 
2948   // 7. Build case 2:
2949   //  ...
2950   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2951   //  ...
2952   // break;
2953   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
2954   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
2955   CGF.EmitBlock(Case2BB);
2956 
2957   {
2958     CodeGenFunction::RunCleanupsScope Scope(CGF);
2959     if (!WithNowait) {
2960       // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
2961       llvm::Value *EndArgs[] = {
2962           IdentTLoc, // ident_t *<loc>
2963           ThreadId,  // i32 <gtid>
2964           Lock       // kmp_critical_name *&<lock>
2965       };
2966       CGF.EHStack
2967           .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2968               NormalAndEHCleanup,
2969               createRuntimeFunction(OMPRTL__kmpc_end_reduce),
2970               llvm::makeArrayRef(EndArgs));
2971     }
2972     auto ILHS = LHSExprs.begin();
2973     auto IRHS = RHSExprs.begin();
2974     auto IPriv = Privates.begin();
2975     for (auto *E : ReductionOps) {
2976         const Expr *XExpr = nullptr;
2977         const Expr *EExpr = nullptr;
2978         const Expr *UpExpr = nullptr;
2979         BinaryOperatorKind BO = BO_Comma;
2980         if (auto *BO = dyn_cast<BinaryOperator>(E)) {
2981           if (BO->getOpcode() == BO_Assign) {
2982             XExpr = BO->getLHS();
2983             UpExpr = BO->getRHS();
2984           }
2985         }
2986         // Try to emit update expression as a simple atomic.
2987         auto *RHSExpr = UpExpr;
2988         if (RHSExpr) {
2989           // Analyze RHS part of the whole expression.
2990           if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
2991                   RHSExpr->IgnoreParenImpCasts())) {
2992             // If this is a conditional operator, analyze its condition for
2993             // min/max reduction operator.
2994             RHSExpr = ACO->getCond();
2995           }
2996           if (auto *BORHS =
2997                   dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
2998             EExpr = BORHS->getRHS();
2999             BO = BORHS->getOpcode();
3000           }
3001         }
3002         if (XExpr) {
3003           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3004           auto &&AtomicRedGen = [this, BO, VD, IPriv,
3005                                  Loc](CodeGenFunction &CGF, const Expr *XExpr,
3006                                       const Expr *EExpr, const Expr *UpExpr) {
3007             LValue X = CGF.EmitLValue(XExpr);
3008             RValue E;
3009             if (EExpr)
3010               E = CGF.EmitAnyExpr(EExpr);
3011             CGF.EmitOMPAtomicSimpleUpdateExpr(
3012                 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
3013                 [&CGF, UpExpr, VD, IPriv](RValue XRValue) {
3014                   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3015                   PrivateScope.addPrivate(VD, [&CGF, VD, XRValue]() -> Address {
3016                     Address LHSTemp = CGF.CreateMemTemp(VD->getType());
3017                     CGF.EmitStoreThroughLValue(
3018                         XRValue, CGF.MakeAddrLValue(LHSTemp, VD->getType()));
3019                     return LHSTemp;
3020                   });
3021                   (void)PrivateScope.Privatize();
3022                   return CGF.EmitAnyExpr(UpExpr);
3023                 });
3024           };
3025           if ((*IPriv)->getType()->isArrayType()) {
3026             // Emit atomic reduction for array section.
3027             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3028             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
3029                                       AtomicRedGen, XExpr, EExpr, UpExpr);
3030           } else
3031             // Emit atomic reduction for array subscript or single variable.
3032             AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
3033         } else {
3034           // Emit as a critical region.
3035           auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *,
3036                                              const Expr *, const Expr *) {
3037             emitCriticalRegion(
3038                 CGF, ".atomic_reduction",
3039                 [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc);
3040           };
3041           if ((*IPriv)->getType()->isArrayType()) {
3042             auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3043             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3044             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3045                                       CritRedGen);
3046           } else
3047             CritRedGen(CGF, nullptr, nullptr, nullptr);
3048         }
3049       ++ILHS, ++IRHS, ++IPriv;
3050     }
3051   }
3052 
3053   CGF.EmitBranch(DefaultBB);
3054   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
3055 }
3056 
3057 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
3058                                        SourceLocation Loc) {
3059   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
3060   // global_tid);
3061   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3062   // Ignore return result until untied tasks are supported.
3063   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
3064 }
3065 
3066 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
3067                                            OpenMPDirectiveKind InnerKind,
3068                                            const RegionCodeGenTy &CodeGen,
3069                                            bool HasCancel) {
3070   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
3071   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
3072 }
3073 
3074 namespace {
3075 enum RTCancelKind {
3076   CancelNoreq = 0,
3077   CancelParallel = 1,
3078   CancelLoop = 2,
3079   CancelSections = 3,
3080   CancelTaskgroup = 4
3081 };
3082 }
3083 
3084 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
3085   RTCancelKind CancelKind = CancelNoreq;
3086   if (CancelRegion == OMPD_parallel)
3087     CancelKind = CancelParallel;
3088   else if (CancelRegion == OMPD_for)
3089     CancelKind = CancelLoop;
3090   else if (CancelRegion == OMPD_sections)
3091     CancelKind = CancelSections;
3092   else {
3093     assert(CancelRegion == OMPD_taskgroup);
3094     CancelKind = CancelTaskgroup;
3095   }
3096   return CancelKind;
3097 }
3098 
3099 void CGOpenMPRuntime::emitCancellationPointCall(
3100     CodeGenFunction &CGF, SourceLocation Loc,
3101     OpenMPDirectiveKind CancelRegion) {
3102   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
3103   // global_tid, kmp_int32 cncl_kind);
3104   if (auto *OMPRegionInfo =
3105           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3106     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
3107       return;
3108     if (OMPRegionInfo->hasCancel()) {
3109       llvm::Value *Args[] = {
3110           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3111           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
3112       // Ignore return result until untied tasks are supported.
3113       auto *Result = CGF.EmitRuntimeCall(
3114           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
3115       // if (__kmpc_cancellationpoint()) {
3116       //  __kmpc_cancel_barrier();
3117       //   exit from construct;
3118       // }
3119       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3120       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3121       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3122       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3123       CGF.EmitBlock(ExitBB);
3124       // __kmpc_cancel_barrier();
3125       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
3126       // exit from construct;
3127       auto CancelDest =
3128           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3129       CGF.EmitBranchThroughCleanup(CancelDest);
3130       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3131     }
3132   }
3133 }
3134 
3135 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
3136                                      const Expr *IfCond,
3137                                      OpenMPDirectiveKind CancelRegion) {
3138   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
3139   // kmp_int32 cncl_kind);
3140   if (auto *OMPRegionInfo =
3141           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3142     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
3143       return;
3144     auto &&ThenGen = [this, Loc, CancelRegion,
3145                       OMPRegionInfo](CodeGenFunction &CGF) {
3146       llvm::Value *Args[] = {
3147           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3148           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
3149       // Ignore return result until untied tasks are supported.
3150       auto *Result =
3151           CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
3152       // if (__kmpc_cancel()) {
3153       //  __kmpc_cancel_barrier();
3154       //   exit from construct;
3155       // }
3156       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3157       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3158       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3159       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3160       CGF.EmitBlock(ExitBB);
3161       // __kmpc_cancel_barrier();
3162       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
3163       // exit from construct;
3164       auto CancelDest =
3165           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3166       CGF.EmitBranchThroughCleanup(CancelDest);
3167       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3168     };
3169     if (IfCond)
3170       emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {});
3171     else
3172       ThenGen(CGF);
3173   }
3174 }
3175 
3176 llvm::Value *
3177 CGOpenMPRuntime::emitTargetOutlinedFunction(const OMPExecutableDirective &D,
3178                                             const RegionCodeGenTy &CodeGen) {
3179   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3180 
3181   CodeGenFunction CGF(CGM, true);
3182   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen);
3183   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
3184   return CGF.GenerateOpenMPCapturedStmtFunction(CS, /*UseOnlyReferences=*/true);
3185 }
3186 
3187 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
3188                                      const OMPExecutableDirective &D,
3189                                      llvm::Value *OutlinedFn,
3190                                      const Expr *IfCond, const Expr *Device,
3191                                      ArrayRef<llvm::Value *> CapturedVars) {
3192   /// \brief Values for bit flags used to specify the mapping type for
3193   /// offloading.
3194   enum OpenMPOffloadMappingFlags {
3195     /// \brief Allocate memory on the device and move data from host to device.
3196     OMP_MAP_TO = 0x01,
3197     /// \brief Allocate memory on the device and move data from device to host.
3198     OMP_MAP_FROM = 0x02,
3199   };
3200 
3201   enum OpenMPOffloadingReservedDeviceIDs {
3202     /// \brief Device ID if the device was not defined, runtime should get it
3203     /// from environment variables in the spec.
3204     OMP_DEVICEID_UNDEF = -1,
3205   };
3206 
3207   // Fill up the arrays with the all the captured variables.
3208   SmallVector<llvm::Value *, 16> BasePointers;
3209   SmallVector<llvm::Value *, 16> Pointers;
3210   SmallVector<llvm::Value *, 16> Sizes;
3211   SmallVector<unsigned, 16> MapTypes;
3212 
3213   bool hasVLACaptures = false;
3214 
3215   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3216   auto RI = CS.getCapturedRecordDecl()->field_begin();
3217   // auto II = CS.capture_init_begin();
3218   auto CV = CapturedVars.begin();
3219   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
3220                                             CE = CS.capture_end();
3221        CI != CE; ++CI, ++RI, ++CV) {
3222     StringRef Name;
3223     QualType Ty;
3224     llvm::Value *BasePointer;
3225     llvm::Value *Pointer;
3226     llvm::Value *Size;
3227     unsigned MapType;
3228 
3229     if (CI->capturesVariableArrayType()) {
3230       BasePointer = Pointer = *CV;
3231       Size = getTypeSize(CGF, RI->getType());
3232       hasVLACaptures = true;
3233       // VLA sizes don't need to be copied back from the device.
3234       MapType = OMP_MAP_TO;
3235     } else if (CI->capturesThis()) {
3236       BasePointer = Pointer = *CV;
3237       const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr());
3238       Size = getTypeSize(CGF, PtrTy->getPointeeType());
3239       // Default map type.
3240       MapType = OMP_MAP_TO | OMP_MAP_FROM;
3241     } else {
3242       BasePointer = Pointer = *CV;
3243 
3244       const ReferenceType *PtrTy =
3245           cast<ReferenceType>(RI->getType().getTypePtr());
3246       QualType ElementType = PtrTy->getPointeeType();
3247       Size = getTypeSize(CGF, ElementType);
3248       // Default map type.
3249       MapType = OMP_MAP_TO | OMP_MAP_FROM;
3250     }
3251 
3252     BasePointers.push_back(BasePointer);
3253     Pointers.push_back(Pointer);
3254     Sizes.push_back(Size);
3255     MapTypes.push_back(MapType);
3256   }
3257 
3258   // Keep track on whether the host function has to be executed.
3259   auto OffloadErrorQType =
3260       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
3261   auto OffloadError = CGF.MakeAddrLValue(
3262       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
3263       OffloadErrorQType);
3264   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
3265                         OffloadError);
3266 
3267   // Fill up the pointer arrays and transfer execution to the device.
3268   auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes,
3269                     hasVLACaptures, Device, OffloadError,
3270                     OffloadErrorQType](CodeGenFunction &CGF) {
3271     unsigned PointerNumVal = BasePointers.size();
3272     llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
3273     llvm::Value *BasePointersArray;
3274     llvm::Value *PointersArray;
3275     llvm::Value *SizesArray;
3276     llvm::Value *MapTypesArray;
3277 
3278     if (PointerNumVal) {
3279       llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
3280       QualType PointerArrayType = CGF.getContext().getConstantArrayType(
3281           CGF.getContext().VoidPtrTy, PointerNumAP, ArrayType::Normal,
3282           /*IndexTypeQuals=*/0);
3283 
3284       BasePointersArray =
3285           CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
3286       PointersArray =
3287           CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
3288 
3289       // If we don't have any VLA types, we can use a constant array for the map
3290       // sizes, otherwise we need to fill up the arrays as we do for the
3291       // pointers.
3292       if (hasVLACaptures) {
3293         QualType SizeArrayType = CGF.getContext().getConstantArrayType(
3294             CGF.getContext().getSizeType(), PointerNumAP, ArrayType::Normal,
3295             /*IndexTypeQuals=*/0);
3296         SizesArray =
3297             CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
3298       } else {
3299         // We expect all the sizes to be constant, so we collect them to create
3300         // a constant array.
3301         SmallVector<llvm::Constant *, 16> ConstSizes;
3302         for (auto S : Sizes)
3303           ConstSizes.push_back(cast<llvm::Constant>(S));
3304 
3305         auto *SizesArrayInit = llvm::ConstantArray::get(
3306             llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
3307         auto *SizesArrayGbl = new llvm::GlobalVariable(
3308             CGM.getModule(), SizesArrayInit->getType(),
3309             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
3310             SizesArrayInit, ".offload_sizes");
3311         SizesArrayGbl->setUnnamedAddr(true);
3312         SizesArray = SizesArrayGbl;
3313       }
3314 
3315       // The map types are always constant so we don't need to generate code to
3316       // fill arrays. Instead, we create an array constant.
3317       llvm::Constant *MapTypesArrayInit =
3318           llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
3319       auto *MapTypesArrayGbl = new llvm::GlobalVariable(
3320           CGM.getModule(), MapTypesArrayInit->getType(),
3321           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
3322           MapTypesArrayInit, ".offload_maptypes");
3323       MapTypesArrayGbl->setUnnamedAddr(true);
3324       MapTypesArray = MapTypesArrayGbl;
3325 
3326       for (unsigned i = 0; i < PointerNumVal; ++i) {
3327         llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
3328             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal),
3329             BasePointersArray, 0, i);
3330         Address BPAddr(BP, CGM.getContext().getTypeAlignInChars(
3331                                CGM.getContext().VoidPtrTy));
3332         CGF.Builder.CreateStore(
3333             CGF.Builder.CreateBitCast(BasePointers[i], CGM.VoidPtrTy), BPAddr);
3334 
3335         llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
3336             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
3337             0, i);
3338         Address PAddr(P, CGM.getContext().getTypeAlignInChars(
3339                              CGM.getContext().VoidPtrTy));
3340         CGF.Builder.CreateStore(
3341             CGF.Builder.CreateBitCast(Pointers[i], CGM.VoidPtrTy), PAddr);
3342 
3343         if (hasVLACaptures) {
3344           llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
3345               llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
3346               /*Idx0=*/0,
3347               /*Idx1=*/i);
3348           Address SAddr(S, CGM.getContext().getTypeAlignInChars(
3349                                CGM.getContext().getSizeType()));
3350           CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(
3351                                       Sizes[i], CGM.SizeTy, /*isSigned=*/true),
3352                                   SAddr);
3353         }
3354       }
3355 
3356       BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3357           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
3358           /*Idx0=*/0, /*Idx1=*/0);
3359       PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3360           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
3361           /*Idx0=*/0,
3362           /*Idx1=*/0);
3363       SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3364           llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
3365           /*Idx0=*/0, /*Idx1=*/0);
3366       MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3367           llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray,
3368           /*Idx0=*/0,
3369           /*Idx1=*/0);
3370 
3371     } else {
3372       BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
3373       PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
3374       SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
3375       MapTypesArray =
3376           llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
3377     }
3378 
3379     // On top of the arrays that were filled up, the target offloading call
3380     // takes as arguments the device id as well as the host pointer. The host
3381     // pointer is used by the runtime library to identify the current target
3382     // region, so it only has to be unique and not necessarily point to
3383     // anything. It could be the pointer to the outlined function that
3384     // implements the target region, but we aren't using that so that the
3385     // compiler doesn't need to keep that, and could therefore inline the host
3386     // function if proven worthwhile during optimization.
3387 
3388     llvm::Value *HostPtr = new llvm::GlobalVariable(
3389         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3390         llvm::GlobalValue::PrivateLinkage,
3391         llvm::Constant::getNullValue(CGM.Int8Ty), ".offload_hstptr");
3392 
3393     // Emit device ID if any.
3394     llvm::Value *DeviceID;
3395     if (Device)
3396       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3397                                            CGM.Int32Ty, /*isSigned=*/true);
3398     else
3399       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
3400 
3401     llvm::Value *OffloadingArgs[] = {
3402         DeviceID,      HostPtr,    PointerNum,   BasePointersArray,
3403         PointersArray, SizesArray, MapTypesArray};
3404     auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
3405                                       OffloadingArgs);
3406 
3407     CGF.EmitStoreOfScalar(Return, OffloadError);
3408   };
3409 
3410   if (IfCond) {
3411     // Notify that the host version must be executed.
3412     auto &&ElseGen = [this, OffloadError,
3413                       OffloadErrorQType](CodeGenFunction &CGF) {
3414       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u),
3415                             OffloadError);
3416     };
3417     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3418   } else {
3419     CodeGenFunction::RunCleanupsScope Scope(CGF);
3420     ThenGen(CGF);
3421   }
3422 
3423   // Check the error code and execute the host version if required.
3424   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
3425   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
3426   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
3427   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
3428   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
3429 
3430   CGF.EmitBlock(OffloadFailedBlock);
3431   CGF.Builder.CreateCall(OutlinedFn, BasePointers);
3432   CGF.EmitBranch(OffloadContBlock);
3433 
3434   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
3435   return;
3436 }
3437