1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Decl.h" 19 #include "clang/AST/StmtOpenMP.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/Bitcode/ReaderWriter.h" 22 #include "llvm/IR/CallSite.h" 23 #include "llvm/IR/DerivedTypes.h" 24 #include "llvm/IR/GlobalValue.h" 25 #include "llvm/IR/Value.h" 26 #include "llvm/Support/Format.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cassert> 29 30 using namespace clang; 31 using namespace CodeGen; 32 33 namespace { 34 /// \brief Base class for handling code generation inside OpenMP regions. 35 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 36 public: 37 /// \brief Kinds of OpenMP regions used in codegen. 38 enum CGOpenMPRegionKind { 39 /// \brief Region with outlined function for standalone 'parallel' 40 /// directive. 41 ParallelOutlinedRegion, 42 /// \brief Region with outlined function for standalone 'task' directive. 43 TaskOutlinedRegion, 44 /// \brief Region for constructs that do not require function outlining, 45 /// like 'for', 'sections', 'atomic' etc. directives. 46 InlinedRegion, 47 /// \brief Region with outlined function for standalone 'target' directive. 48 TargetRegion, 49 }; 50 51 CGOpenMPRegionInfo(const CapturedStmt &CS, 52 const CGOpenMPRegionKind RegionKind, 53 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 54 bool HasCancel) 55 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 56 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 57 58 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 59 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 60 bool HasCancel) 61 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 62 Kind(Kind), HasCancel(HasCancel) {} 63 64 /// \brief Get a variable or parameter for storing global thread id 65 /// inside OpenMP construct. 66 virtual const VarDecl *getThreadIDVariable() const = 0; 67 68 /// \brief Emit the captured statement body. 69 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 70 71 /// \brief Get an LValue for the current ThreadID variable. 72 /// \return LValue for thread id variable. This LValue always has type int32*. 73 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 74 75 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 76 77 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 78 79 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 80 81 bool hasCancel() const { return HasCancel; } 82 83 static bool classof(const CGCapturedStmtInfo *Info) { 84 return Info->getKind() == CR_OpenMP; 85 } 86 87 ~CGOpenMPRegionInfo() override = default; 88 89 protected: 90 CGOpenMPRegionKind RegionKind; 91 RegionCodeGenTy CodeGen; 92 OpenMPDirectiveKind Kind; 93 bool HasCancel; 94 }; 95 96 /// \brief API for captured statement code generation in OpenMP constructs. 97 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 98 public: 99 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 100 const RegionCodeGenTy &CodeGen, 101 OpenMPDirectiveKind Kind, bool HasCancel) 102 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 103 HasCancel), 104 ThreadIDVar(ThreadIDVar) { 105 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 106 } 107 108 /// \brief Get a variable or parameter for storing global thread id 109 /// inside OpenMP construct. 110 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 111 112 /// \brief Get the name of the capture helper. 113 StringRef getHelperName() const override { return ".omp_outlined."; } 114 115 static bool classof(const CGCapturedStmtInfo *Info) { 116 return CGOpenMPRegionInfo::classof(Info) && 117 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 118 ParallelOutlinedRegion; 119 } 120 121 private: 122 /// \brief A variable or parameter storing global thread id for OpenMP 123 /// constructs. 124 const VarDecl *ThreadIDVar; 125 }; 126 127 /// \brief API for captured statement code generation in OpenMP constructs. 128 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 129 public: 130 class UntiedTaskActionTy final : public PrePostActionTy { 131 bool Untied; 132 const VarDecl *PartIDVar; 133 const RegionCodeGenTy UntiedCodeGen; 134 llvm::SwitchInst *UntiedSwitch = nullptr; 135 136 public: 137 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 138 const RegionCodeGenTy &UntiedCodeGen) 139 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 140 void Enter(CodeGenFunction &CGF) override { 141 if (Untied) { 142 // Emit task switching point. 143 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 144 CGF.GetAddrOfLocalVar(PartIDVar), 145 PartIDVar->getType()->castAs<PointerType>()); 146 auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); 147 auto *DoneBB = CGF.createBasicBlock(".untied.done."); 148 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 149 CGF.EmitBlock(DoneBB); 150 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 151 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 152 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 153 CGF.Builder.GetInsertBlock()); 154 emitUntiedSwitch(CGF); 155 } 156 } 157 void emitUntiedSwitch(CodeGenFunction &CGF) const { 158 if (Untied) { 159 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 163 PartIdLVal); 164 UntiedCodeGen(CGF); 165 CodeGenFunction::JumpDest CurPoint = 166 CGF.getJumpDestInCurrentScope(".untied.next."); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 170 CGF.Builder.GetInsertBlock()); 171 CGF.EmitBranchThroughCleanup(CurPoint); 172 CGF.EmitBlock(CurPoint.getBlock()); 173 } 174 } 175 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 176 }; 177 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 178 const VarDecl *ThreadIDVar, 179 const RegionCodeGenTy &CodeGen, 180 OpenMPDirectiveKind Kind, bool HasCancel, 181 const UntiedTaskActionTy &Action) 182 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 183 ThreadIDVar(ThreadIDVar), Action(Action) { 184 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 185 } 186 187 /// \brief Get a variable or parameter for storing global thread id 188 /// inside OpenMP construct. 189 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 190 191 /// \brief Get an LValue for the current ThreadID variable. 192 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 193 194 /// \brief Get the name of the capture helper. 195 StringRef getHelperName() const override { return ".omp_outlined."; } 196 197 void emitUntiedSwitch(CodeGenFunction &CGF) override { 198 Action.emitUntiedSwitch(CGF); 199 } 200 201 static bool classof(const CGCapturedStmtInfo *Info) { 202 return CGOpenMPRegionInfo::classof(Info) && 203 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 204 TaskOutlinedRegion; 205 } 206 207 private: 208 /// \brief A variable or parameter storing global thread id for OpenMP 209 /// constructs. 210 const VarDecl *ThreadIDVar; 211 /// Action for emitting code for untied tasks. 212 const UntiedTaskActionTy &Action; 213 }; 214 215 /// \brief API for inlined captured statement code generation in OpenMP 216 /// constructs. 217 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 218 public: 219 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 220 const RegionCodeGenTy &CodeGen, 221 OpenMPDirectiveKind Kind, bool HasCancel) 222 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 223 OldCSI(OldCSI), 224 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 225 226 // \brief Retrieve the value of the context parameter. 227 llvm::Value *getContextValue() const override { 228 if (OuterRegionInfo) 229 return OuterRegionInfo->getContextValue(); 230 llvm_unreachable("No context value for inlined OpenMP region"); 231 } 232 233 void setContextValue(llvm::Value *V) override { 234 if (OuterRegionInfo) { 235 OuterRegionInfo->setContextValue(V); 236 return; 237 } 238 llvm_unreachable("No context value for inlined OpenMP region"); 239 } 240 241 /// \brief Lookup the captured field decl for a variable. 242 const FieldDecl *lookup(const VarDecl *VD) const override { 243 if (OuterRegionInfo) 244 return OuterRegionInfo->lookup(VD); 245 // If there is no outer outlined region,no need to lookup in a list of 246 // captured variables, we can use the original one. 247 return nullptr; 248 } 249 250 FieldDecl *getThisFieldDecl() const override { 251 if (OuterRegionInfo) 252 return OuterRegionInfo->getThisFieldDecl(); 253 return nullptr; 254 } 255 256 /// \brief Get a variable or parameter for storing global thread id 257 /// inside OpenMP construct. 258 const VarDecl *getThreadIDVariable() const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->getThreadIDVariable(); 261 return nullptr; 262 } 263 264 /// \brief Get the name of the capture helper. 265 StringRef getHelperName() const override { 266 if (auto *OuterRegionInfo = getOldCSI()) 267 return OuterRegionInfo->getHelperName(); 268 llvm_unreachable("No helper name for inlined OpenMP construct"); 269 } 270 271 void emitUntiedSwitch(CodeGenFunction &CGF) override { 272 if (OuterRegionInfo) 273 OuterRegionInfo->emitUntiedSwitch(CGF); 274 } 275 276 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 277 278 static bool classof(const CGCapturedStmtInfo *Info) { 279 return CGOpenMPRegionInfo::classof(Info) && 280 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 281 } 282 283 ~CGOpenMPInlinedRegionInfo() override = default; 284 285 private: 286 /// \brief CodeGen info about outer OpenMP region. 287 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 288 CGOpenMPRegionInfo *OuterRegionInfo; 289 }; 290 291 /// \brief API for captured statement code generation in OpenMP target 292 /// constructs. For this captures, implicit parameters are used instead of the 293 /// captured fields. The name of the target region has to be unique in a given 294 /// application so it is provided by the client, because only the client has 295 /// the information to generate that. 296 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 297 public: 298 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 299 const RegionCodeGenTy &CodeGen, StringRef HelperName) 300 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 301 /*HasCancel=*/false), 302 HelperName(HelperName) {} 303 304 /// \brief This is unused for target regions because each starts executing 305 /// with a single thread. 306 const VarDecl *getThreadIDVariable() const override { return nullptr; } 307 308 /// \brief Get the name of the capture helper. 309 StringRef getHelperName() const override { return HelperName; } 310 311 static bool classof(const CGCapturedStmtInfo *Info) { 312 return CGOpenMPRegionInfo::classof(Info) && 313 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 314 } 315 316 private: 317 StringRef HelperName; 318 }; 319 320 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 321 llvm_unreachable("No codegen for expressions"); 322 } 323 /// \brief API for generation of expressions captured in a innermost OpenMP 324 /// region. 325 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 326 public: 327 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 328 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 329 OMPD_unknown, 330 /*HasCancel=*/false), 331 PrivScope(CGF) { 332 // Make sure the globals captured in the provided statement are local by 333 // using the privatization logic. We assume the same variable is not 334 // captured more than once. 335 for (auto &C : CS.captures()) { 336 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 337 continue; 338 339 const VarDecl *VD = C.getCapturedVar(); 340 if (VD->isLocalVarDeclOrParm()) 341 continue; 342 343 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 344 /*RefersToEnclosingVariableOrCapture=*/false, 345 VD->getType().getNonReferenceType(), VK_LValue, 346 SourceLocation()); 347 PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { 348 return CGF.EmitLValue(&DRE).getAddress(); 349 }); 350 } 351 (void)PrivScope.Privatize(); 352 } 353 354 /// \brief Lookup the captured field decl for a variable. 355 const FieldDecl *lookup(const VarDecl *VD) const override { 356 if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 357 return FD; 358 return nullptr; 359 } 360 361 /// \brief Emit the captured statement body. 362 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 363 llvm_unreachable("No body for expressions"); 364 } 365 366 /// \brief Get a variable or parameter for storing global thread id 367 /// inside OpenMP construct. 368 const VarDecl *getThreadIDVariable() const override { 369 llvm_unreachable("No thread id for expressions"); 370 } 371 372 /// \brief Get the name of the capture helper. 373 StringRef getHelperName() const override { 374 llvm_unreachable("No helper name for expressions"); 375 } 376 377 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 378 379 private: 380 /// Private scope to capture global variables. 381 CodeGenFunction::OMPPrivateScope PrivScope; 382 }; 383 384 /// \brief RAII for emitting code of OpenMP constructs. 385 class InlinedOpenMPRegionRAII { 386 CodeGenFunction &CGF; 387 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 388 FieldDecl *LambdaThisCaptureField = nullptr; 389 390 public: 391 /// \brief Constructs region for combined constructs. 392 /// \param CodeGen Code generation sequence for combined directives. Includes 393 /// a list of functions used for code generation of implicitly inlined 394 /// regions. 395 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 396 OpenMPDirectiveKind Kind, bool HasCancel) 397 : CGF(CGF) { 398 // Start emission for the construct. 399 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 400 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 401 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 402 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 403 CGF.LambdaThisCaptureField = nullptr; 404 } 405 406 ~InlinedOpenMPRegionRAII() { 407 // Restore original CapturedStmtInfo only if we're done with code emission. 408 auto *OldCSI = 409 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 410 delete CGF.CapturedStmtInfo; 411 CGF.CapturedStmtInfo = OldCSI; 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 414 } 415 }; 416 417 /// \brief Values for bit flags used in the ident_t to describe the fields. 418 /// All enumeric elements are named and described in accordance with the code 419 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 420 enum OpenMPLocationFlags { 421 /// \brief Use trampoline for internal microtask. 422 OMP_IDENT_IMD = 0x01, 423 /// \brief Use c-style ident structure. 424 OMP_IDENT_KMPC = 0x02, 425 /// \brief Atomic reduction option for kmpc_reduce. 426 OMP_ATOMIC_REDUCE = 0x10, 427 /// \brief Explicit 'barrier' directive. 428 OMP_IDENT_BARRIER_EXPL = 0x20, 429 /// \brief Implicit barrier in code. 430 OMP_IDENT_BARRIER_IMPL = 0x40, 431 /// \brief Implicit barrier in 'for' directive. 432 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 433 /// \brief Implicit barrier in 'sections' directive. 434 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 435 /// \brief Implicit barrier in 'single' directive. 436 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 437 }; 438 439 /// \brief Describes ident structure that describes a source location. 440 /// All descriptions are taken from 441 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 442 /// Original structure: 443 /// typedef struct ident { 444 /// kmp_int32 reserved_1; /**< might be used in Fortran; 445 /// see above */ 446 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 447 /// KMP_IDENT_KMPC identifies this union 448 /// member */ 449 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 450 /// see above */ 451 ///#if USE_ITT_BUILD 452 /// /* but currently used for storing 453 /// region-specific ITT */ 454 /// /* contextual information. */ 455 ///#endif /* USE_ITT_BUILD */ 456 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 457 /// C++ */ 458 /// char const *psource; /**< String describing the source location. 459 /// The string is composed of semi-colon separated 460 // fields which describe the source file, 461 /// the function and a pair of line numbers that 462 /// delimit the construct. 463 /// */ 464 /// } ident_t; 465 enum IdentFieldIndex { 466 /// \brief might be used in Fortran 467 IdentField_Reserved_1, 468 /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 469 IdentField_Flags, 470 /// \brief Not really used in Fortran any more 471 IdentField_Reserved_2, 472 /// \brief Source[4] in Fortran, do not use for C++ 473 IdentField_Reserved_3, 474 /// \brief String describing the source location. The string is composed of 475 /// semi-colon separated fields which describe the source file, the function 476 /// and a pair of line numbers that delimit the construct. 477 IdentField_PSource 478 }; 479 480 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 481 /// the enum sched_type in kmp.h). 482 enum OpenMPSchedType { 483 /// \brief Lower bound for default (unordered) versions. 484 OMP_sch_lower = 32, 485 OMP_sch_static_chunked = 33, 486 OMP_sch_static = 34, 487 OMP_sch_dynamic_chunked = 35, 488 OMP_sch_guided_chunked = 36, 489 OMP_sch_runtime = 37, 490 OMP_sch_auto = 38, 491 /// \brief Lower bound for 'ordered' versions. 492 OMP_ord_lower = 64, 493 OMP_ord_static_chunked = 65, 494 OMP_ord_static = 66, 495 OMP_ord_dynamic_chunked = 67, 496 OMP_ord_guided_chunked = 68, 497 OMP_ord_runtime = 69, 498 OMP_ord_auto = 70, 499 OMP_sch_default = OMP_sch_static, 500 /// \brief dist_schedule types 501 OMP_dist_sch_static_chunked = 91, 502 OMP_dist_sch_static = 92, 503 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 504 /// Set if the monotonic schedule modifier was present. 505 OMP_sch_modifier_monotonic = (1 << 29), 506 /// Set if the nonmonotonic schedule modifier was present. 507 OMP_sch_modifier_nonmonotonic = (1 << 30), 508 }; 509 510 enum OpenMPRTLFunction { 511 /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 512 /// kmpc_micro microtask, ...); 513 OMPRTL__kmpc_fork_call, 514 /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, 515 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 516 OMPRTL__kmpc_threadprivate_cached, 517 /// \brief Call to void __kmpc_threadprivate_register( ident_t *, 518 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 519 OMPRTL__kmpc_threadprivate_register, 520 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 521 OMPRTL__kmpc_global_thread_num, 522 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 523 // kmp_critical_name *crit); 524 OMPRTL__kmpc_critical, 525 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 526 // global_tid, kmp_critical_name *crit, uintptr_t hint); 527 OMPRTL__kmpc_critical_with_hint, 528 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 529 // kmp_critical_name *crit); 530 OMPRTL__kmpc_end_critical, 531 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 532 // global_tid); 533 OMPRTL__kmpc_cancel_barrier, 534 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 535 OMPRTL__kmpc_barrier, 536 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 537 OMPRTL__kmpc_for_static_fini, 538 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 539 // global_tid); 540 OMPRTL__kmpc_serialized_parallel, 541 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 542 // global_tid); 543 OMPRTL__kmpc_end_serialized_parallel, 544 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 545 // kmp_int32 num_threads); 546 OMPRTL__kmpc_push_num_threads, 547 // Call to void __kmpc_flush(ident_t *loc); 548 OMPRTL__kmpc_flush, 549 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 550 OMPRTL__kmpc_master, 551 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 552 OMPRTL__kmpc_end_master, 553 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 554 // int end_part); 555 OMPRTL__kmpc_omp_taskyield, 556 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 557 OMPRTL__kmpc_single, 558 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 559 OMPRTL__kmpc_end_single, 560 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 561 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 562 // kmp_routine_entry_t *task_entry); 563 OMPRTL__kmpc_omp_task_alloc, 564 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 565 // new_task); 566 OMPRTL__kmpc_omp_task, 567 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 568 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 569 // kmp_int32 didit); 570 OMPRTL__kmpc_copyprivate, 571 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 572 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 573 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 574 OMPRTL__kmpc_reduce, 575 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 576 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 577 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 578 // *lck); 579 OMPRTL__kmpc_reduce_nowait, 580 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 581 // kmp_critical_name *lck); 582 OMPRTL__kmpc_end_reduce, 583 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 584 // kmp_critical_name *lck); 585 OMPRTL__kmpc_end_reduce_nowait, 586 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 587 // kmp_task_t * new_task); 588 OMPRTL__kmpc_omp_task_begin_if0, 589 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 590 // kmp_task_t * new_task); 591 OMPRTL__kmpc_omp_task_complete_if0, 592 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 593 OMPRTL__kmpc_ordered, 594 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 595 OMPRTL__kmpc_end_ordered, 596 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 597 // global_tid); 598 OMPRTL__kmpc_omp_taskwait, 599 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 600 OMPRTL__kmpc_taskgroup, 601 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 602 OMPRTL__kmpc_end_taskgroup, 603 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 604 // int proc_bind); 605 OMPRTL__kmpc_push_proc_bind, 606 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 607 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 608 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 609 OMPRTL__kmpc_omp_task_with_deps, 610 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 611 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 612 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 613 OMPRTL__kmpc_omp_wait_deps, 614 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 615 // global_tid, kmp_int32 cncl_kind); 616 OMPRTL__kmpc_cancellationpoint, 617 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 618 // kmp_int32 cncl_kind); 619 OMPRTL__kmpc_cancel, 620 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 621 // kmp_int32 num_teams, kmp_int32 thread_limit); 622 OMPRTL__kmpc_push_num_teams, 623 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 624 // microtask, ...); 625 OMPRTL__kmpc_fork_teams, 626 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 627 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 628 // sched, kmp_uint64 grainsize, void *task_dup); 629 OMPRTL__kmpc_taskloop, 630 631 // 632 // Offloading related calls 633 // 634 // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 635 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 636 // *arg_types); 637 OMPRTL__tgt_target, 638 // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 639 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 640 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 641 OMPRTL__tgt_target_teams, 642 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 643 OMPRTL__tgt_register_lib, 644 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 645 OMPRTL__tgt_unregister_lib, 646 // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 647 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 648 OMPRTL__tgt_target_data_begin, 649 // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 650 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 651 OMPRTL__tgt_target_data_end, 652 }; 653 654 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 655 /// region. 656 class CleanupTy final : public EHScopeStack::Cleanup { 657 PrePostActionTy *Action; 658 659 public: 660 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 661 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 662 if (!CGF.HaveInsertPoint()) 663 return; 664 Action->Exit(CGF); 665 } 666 }; 667 668 } // anonymous namespace 669 670 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 671 CodeGenFunction::RunCleanupsScope Scope(CGF); 672 if (PrePostAction) { 673 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 674 Callback(CodeGen, CGF, *PrePostAction); 675 } else { 676 PrePostActionTy Action; 677 Callback(CodeGen, CGF, Action); 678 } 679 } 680 681 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 682 return CGF.EmitLoadOfPointerLValue( 683 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 684 getThreadIDVariable()->getType()->castAs<PointerType>()); 685 } 686 687 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 688 if (!CGF.HaveInsertPoint()) 689 return; 690 // 1.2.2 OpenMP Language Terminology 691 // Structured block - An executable statement with a single entry at the 692 // top and a single exit at the bottom. 693 // The point of exit cannot be a branch out of the structured block. 694 // longjmp() and throw() must not violate the entry/exit criteria. 695 CGF.EHStack.pushTerminate(); 696 CodeGen(CGF); 697 CGF.EHStack.popTerminate(); 698 } 699 700 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 701 CodeGenFunction &CGF) { 702 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 703 getThreadIDVariable()->getType(), 704 AlignmentSource::Decl); 705 } 706 707 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 708 : CGM(CGM), OffloadEntriesInfoManager(CGM) { 709 IdentTy = llvm::StructType::create( 710 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 711 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 712 CGM.Int8PtrTy /* psource */, nullptr); 713 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 714 715 loadOffloadInfoMetadata(); 716 } 717 718 void CGOpenMPRuntime::clear() { 719 InternalVars.clear(); 720 } 721 722 static llvm::Function * 723 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 724 const Expr *CombinerInitializer, const VarDecl *In, 725 const VarDecl *Out, bool IsCombiner) { 726 // void .omp_combiner.(Ty *in, Ty *out); 727 auto &C = CGM.getContext(); 728 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 729 FunctionArgList Args; 730 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 731 /*Id=*/nullptr, PtrTy); 732 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 733 /*Id=*/nullptr, PtrTy); 734 Args.push_back(&OmpOutParm); 735 Args.push_back(&OmpInParm); 736 auto &FnInfo = 737 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 738 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 739 auto *Fn = llvm::Function::Create( 740 FnTy, llvm::GlobalValue::InternalLinkage, 741 IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); 742 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 743 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 744 CodeGenFunction CGF(CGM); 745 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 746 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 747 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 748 CodeGenFunction::OMPPrivateScope Scope(CGF); 749 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 750 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { 751 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 752 .getAddress(); 753 }); 754 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 755 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { 756 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 757 .getAddress(); 758 }); 759 (void)Scope.Privatize(); 760 CGF.EmitIgnoredExpr(CombinerInitializer); 761 Scope.ForceCleanup(); 762 CGF.FinishFunction(); 763 return Fn; 764 } 765 766 void CGOpenMPRuntime::emitUserDefinedReduction( 767 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 768 if (UDRMap.count(D) > 0) 769 return; 770 auto &C = CGM.getContext(); 771 if (!In || !Out) { 772 In = &C.Idents.get("omp_in"); 773 Out = &C.Idents.get("omp_out"); 774 } 775 llvm::Function *Combiner = emitCombinerOrInitializer( 776 CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), 777 cast<VarDecl>(D->lookup(Out).front()), 778 /*IsCombiner=*/true); 779 llvm::Function *Initializer = nullptr; 780 if (auto *Init = D->getInitializer()) { 781 if (!Priv || !Orig) { 782 Priv = &C.Idents.get("omp_priv"); 783 Orig = &C.Idents.get("omp_orig"); 784 } 785 Initializer = emitCombinerOrInitializer( 786 CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()), 787 cast<VarDecl>(D->lookup(Priv).front()), 788 /*IsCombiner=*/false); 789 } 790 UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); 791 if (CGF) { 792 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 793 Decls.second.push_back(D); 794 } 795 } 796 797 std::pair<llvm::Function *, llvm::Function *> 798 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 799 auto I = UDRMap.find(D); 800 if (I != UDRMap.end()) 801 return I->second; 802 emitUserDefinedReduction(/*CGF=*/nullptr, D); 803 return UDRMap.lookup(D); 804 } 805 806 // Layout information for ident_t. 807 static CharUnits getIdentAlign(CodeGenModule &CGM) { 808 return CGM.getPointerAlign(); 809 } 810 static CharUnits getIdentSize(CodeGenModule &CGM) { 811 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 812 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 813 } 814 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { 815 // All the fields except the last are i32, so this works beautifully. 816 return unsigned(Field) * CharUnits::fromQuantity(4); 817 } 818 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 819 IdentFieldIndex Field, 820 const llvm::Twine &Name = "") { 821 auto Offset = getOffsetOfIdentField(Field); 822 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 823 } 824 825 llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( 826 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 827 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 828 assert(ThreadIDVar->getType()->isPointerType() && 829 "thread id variable must be of type kmp_int32 *"); 830 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 831 CodeGenFunction CGF(CGM, true); 832 bool HasCancel = false; 833 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 834 HasCancel = OPD->hasCancel(); 835 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 836 HasCancel = OPSD->hasCancel(); 837 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 838 HasCancel = OPFD->hasCancel(); 839 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 840 HasCancel); 841 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 842 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 843 } 844 845 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 846 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 847 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 848 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 849 bool Tied, unsigned &NumberOfParts) { 850 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 851 PrePostActionTy &) { 852 auto *ThreadID = getThreadID(CGF, D.getLocStart()); 853 auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); 854 llvm::Value *TaskArgs[] = { 855 UpLoc, ThreadID, 856 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 857 TaskTVar->getType()->castAs<PointerType>()) 858 .getPointer()}; 859 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 860 }; 861 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 862 UntiedCodeGen); 863 CodeGen.setAction(Action); 864 assert(!ThreadIDVar->getType()->isPointerType() && 865 "thread id variable must be of type kmp_int32 for tasks"); 866 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 867 auto *TD = dyn_cast<OMPTaskDirective>(&D); 868 CodeGenFunction CGF(CGM, true); 869 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 870 InnermostKind, 871 TD ? TD->hasCancel() : false, Action); 872 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 873 auto *Res = CGF.GenerateCapturedStmtFunction(*CS); 874 if (!Tied) 875 NumberOfParts = Action.getNumberOfParts(); 876 return Res; 877 } 878 879 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 880 CharUnits Align = getIdentAlign(CGM); 881 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 882 if (!Entry) { 883 if (!DefaultOpenMPPSource) { 884 // Initialize default location for psource field of ident_t structure of 885 // all ident_t objects. Format is ";file;function;line;column;;". 886 // Taken from 887 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 888 DefaultOpenMPPSource = 889 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 890 DefaultOpenMPPSource = 891 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 892 } 893 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 894 CGM.getModule(), IdentTy, /*isConstant*/ true, 895 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 896 DefaultOpenMPLocation->setUnnamedAddr(true); 897 DefaultOpenMPLocation->setAlignment(Align.getQuantity()); 898 899 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 900 llvm::Constant *Values[] = {Zero, 901 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 902 Zero, Zero, DefaultOpenMPPSource}; 903 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 904 DefaultOpenMPLocation->setInitializer(Init); 905 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 906 } 907 return Address(Entry, Align); 908 } 909 910 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 911 SourceLocation Loc, 912 unsigned Flags) { 913 Flags |= OMP_IDENT_KMPC; 914 // If no debug info is generated - return global default location. 915 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 916 Loc.isInvalid()) 917 return getOrCreateDefaultLocation(Flags).getPointer(); 918 919 assert(CGF.CurFn && "No function in current CodeGenFunction."); 920 921 Address LocValue = Address::invalid(); 922 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 923 if (I != OpenMPLocThreadIDMap.end()) 924 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 925 926 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 927 // GetOpenMPThreadID was called before this routine. 928 if (!LocValue.isValid()) { 929 // Generate "ident_t .kmpc_loc.addr;" 930 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 931 ".kmpc_loc.addr"); 932 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 933 Elem.second.DebugLoc = AI.getPointer(); 934 LocValue = AI; 935 936 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 937 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 938 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 939 CGM.getSize(getIdentSize(CGF.CGM))); 940 } 941 942 // char **psource = &.kmpc_loc_<flags>.addr.psource; 943 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 944 945 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 946 if (OMPDebugLoc == nullptr) { 947 SmallString<128> Buffer2; 948 llvm::raw_svector_ostream OS2(Buffer2); 949 // Build debug location 950 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 951 OS2 << ";" << PLoc.getFilename() << ";"; 952 if (const FunctionDecl *FD = 953 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 954 OS2 << FD->getQualifiedNameAsString(); 955 } 956 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 957 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 958 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 959 } 960 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 961 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 962 963 // Our callers always pass this to a runtime function, so for 964 // convenience, go ahead and return a naked pointer. 965 return LocValue.getPointer(); 966 } 967 968 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 969 SourceLocation Loc) { 970 assert(CGF.CurFn && "No function in current CodeGenFunction."); 971 972 llvm::Value *ThreadID = nullptr; 973 // Check whether we've already cached a load of the thread id in this 974 // function. 975 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 976 if (I != OpenMPLocThreadIDMap.end()) { 977 ThreadID = I->second.ThreadID; 978 if (ThreadID != nullptr) 979 return ThreadID; 980 } 981 if (auto *OMPRegionInfo = 982 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 983 if (OMPRegionInfo->getThreadIDVariable()) { 984 // Check if this an outlined function with thread id passed as argument. 985 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 986 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 987 // If value loaded in entry block, cache it and use it everywhere in 988 // function. 989 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 990 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 991 Elem.second.ThreadID = ThreadID; 992 } 993 return ThreadID; 994 } 995 } 996 997 // This is not an outlined function region - need to call __kmpc_int32 998 // kmpc_global_thread_num(ident_t *loc). 999 // Generate thread id value and cache this value for use across the 1000 // function. 1001 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1002 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1003 ThreadID = 1004 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1005 emitUpdateLocation(CGF, Loc)); 1006 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1007 Elem.second.ThreadID = ThreadID; 1008 return ThreadID; 1009 } 1010 1011 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1012 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1013 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 1014 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1015 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1016 for(auto *D : FunctionUDRMap[CGF.CurFn]) { 1017 UDRMap.erase(D); 1018 } 1019 FunctionUDRMap.erase(CGF.CurFn); 1020 } 1021 } 1022 1023 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1024 if (!IdentTy) { 1025 } 1026 return llvm::PointerType::getUnqual(IdentTy); 1027 } 1028 1029 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1030 if (!Kmpc_MicroTy) { 1031 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1032 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1033 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1034 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1035 } 1036 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1037 } 1038 1039 llvm::Constant * 1040 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1041 llvm::Constant *RTLFn = nullptr; 1042 switch (static_cast<OpenMPRTLFunction>(Function)) { 1043 case OMPRTL__kmpc_fork_call: { 1044 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1045 // microtask, ...); 1046 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1047 getKmpc_MicroPointerTy()}; 1048 llvm::FunctionType *FnTy = 1049 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1050 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1051 break; 1052 } 1053 case OMPRTL__kmpc_global_thread_num: { 1054 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1055 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1056 llvm::FunctionType *FnTy = 1057 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1058 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1059 break; 1060 } 1061 case OMPRTL__kmpc_threadprivate_cached: { 1062 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1063 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1064 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1065 CGM.VoidPtrTy, CGM.SizeTy, 1066 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1067 llvm::FunctionType *FnTy = 1068 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1069 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1070 break; 1071 } 1072 case OMPRTL__kmpc_critical: { 1073 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1074 // kmp_critical_name *crit); 1075 llvm::Type *TypeParams[] = { 1076 getIdentTyPointerTy(), CGM.Int32Ty, 1077 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1078 llvm::FunctionType *FnTy = 1079 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1080 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1081 break; 1082 } 1083 case OMPRTL__kmpc_critical_with_hint: { 1084 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1085 // kmp_critical_name *crit, uintptr_t hint); 1086 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1087 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1088 CGM.IntPtrTy}; 1089 llvm::FunctionType *FnTy = 1090 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1091 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1092 break; 1093 } 1094 case OMPRTL__kmpc_threadprivate_register: { 1095 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1096 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1097 // typedef void *(*kmpc_ctor)(void *); 1098 auto KmpcCtorTy = 1099 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1100 /*isVarArg*/ false)->getPointerTo(); 1101 // typedef void *(*kmpc_cctor)(void *, void *); 1102 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1103 auto KmpcCopyCtorTy = 1104 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1105 /*isVarArg*/ false)->getPointerTo(); 1106 // typedef void (*kmpc_dtor)(void *); 1107 auto KmpcDtorTy = 1108 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1109 ->getPointerTo(); 1110 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1111 KmpcCopyCtorTy, KmpcDtorTy}; 1112 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1113 /*isVarArg*/ false); 1114 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1115 break; 1116 } 1117 case OMPRTL__kmpc_end_critical: { 1118 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1119 // kmp_critical_name *crit); 1120 llvm::Type *TypeParams[] = { 1121 getIdentTyPointerTy(), CGM.Int32Ty, 1122 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1123 llvm::FunctionType *FnTy = 1124 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1125 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1126 break; 1127 } 1128 case OMPRTL__kmpc_cancel_barrier: { 1129 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1130 // global_tid); 1131 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1132 llvm::FunctionType *FnTy = 1133 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1134 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1135 break; 1136 } 1137 case OMPRTL__kmpc_barrier: { 1138 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1139 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1140 llvm::FunctionType *FnTy = 1141 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1142 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1143 break; 1144 } 1145 case OMPRTL__kmpc_for_static_fini: { 1146 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1147 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1148 llvm::FunctionType *FnTy = 1149 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1150 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1151 break; 1152 } 1153 case OMPRTL__kmpc_push_num_threads: { 1154 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1155 // kmp_int32 num_threads) 1156 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1157 CGM.Int32Ty}; 1158 llvm::FunctionType *FnTy = 1159 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1160 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1161 break; 1162 } 1163 case OMPRTL__kmpc_serialized_parallel: { 1164 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1165 // global_tid); 1166 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1167 llvm::FunctionType *FnTy = 1168 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1169 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1170 break; 1171 } 1172 case OMPRTL__kmpc_end_serialized_parallel: { 1173 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1174 // global_tid); 1175 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1176 llvm::FunctionType *FnTy = 1177 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1178 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1179 break; 1180 } 1181 case OMPRTL__kmpc_flush: { 1182 // Build void __kmpc_flush(ident_t *loc); 1183 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1184 llvm::FunctionType *FnTy = 1185 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1186 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1187 break; 1188 } 1189 case OMPRTL__kmpc_master: { 1190 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1191 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1192 llvm::FunctionType *FnTy = 1193 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1194 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1195 break; 1196 } 1197 case OMPRTL__kmpc_end_master: { 1198 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1199 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1200 llvm::FunctionType *FnTy = 1201 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1202 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1203 break; 1204 } 1205 case OMPRTL__kmpc_omp_taskyield: { 1206 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1207 // int end_part); 1208 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1209 llvm::FunctionType *FnTy = 1210 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1211 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1212 break; 1213 } 1214 case OMPRTL__kmpc_single: { 1215 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1216 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1217 llvm::FunctionType *FnTy = 1218 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1219 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1220 break; 1221 } 1222 case OMPRTL__kmpc_end_single: { 1223 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1224 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1225 llvm::FunctionType *FnTy = 1226 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1227 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1228 break; 1229 } 1230 case OMPRTL__kmpc_omp_task_alloc: { 1231 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1232 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1233 // kmp_routine_entry_t *task_entry); 1234 assert(KmpRoutineEntryPtrTy != nullptr && 1235 "Type kmp_routine_entry_t must be created."); 1236 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1237 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1238 // Return void * and then cast to particular kmp_task_t type. 1239 llvm::FunctionType *FnTy = 1240 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1241 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1242 break; 1243 } 1244 case OMPRTL__kmpc_omp_task: { 1245 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1246 // *new_task); 1247 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1248 CGM.VoidPtrTy}; 1249 llvm::FunctionType *FnTy = 1250 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1251 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1252 break; 1253 } 1254 case OMPRTL__kmpc_copyprivate: { 1255 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1256 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1257 // kmp_int32 didit); 1258 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1259 auto *CpyFnTy = 1260 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1261 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1262 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1263 CGM.Int32Ty}; 1264 llvm::FunctionType *FnTy = 1265 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1266 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1267 break; 1268 } 1269 case OMPRTL__kmpc_reduce: { 1270 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1271 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1272 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1273 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1274 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1275 /*isVarArg=*/false); 1276 llvm::Type *TypeParams[] = { 1277 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1278 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1279 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1280 llvm::FunctionType *FnTy = 1281 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1282 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1283 break; 1284 } 1285 case OMPRTL__kmpc_reduce_nowait: { 1286 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1287 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1288 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1289 // *lck); 1290 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1291 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1292 /*isVarArg=*/false); 1293 llvm::Type *TypeParams[] = { 1294 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1295 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1296 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1297 llvm::FunctionType *FnTy = 1298 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1299 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1300 break; 1301 } 1302 case OMPRTL__kmpc_end_reduce: { 1303 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1304 // kmp_critical_name *lck); 1305 llvm::Type *TypeParams[] = { 1306 getIdentTyPointerTy(), CGM.Int32Ty, 1307 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1308 llvm::FunctionType *FnTy = 1309 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1310 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1311 break; 1312 } 1313 case OMPRTL__kmpc_end_reduce_nowait: { 1314 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1315 // kmp_critical_name *lck); 1316 llvm::Type *TypeParams[] = { 1317 getIdentTyPointerTy(), CGM.Int32Ty, 1318 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1319 llvm::FunctionType *FnTy = 1320 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1321 RTLFn = 1322 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1323 break; 1324 } 1325 case OMPRTL__kmpc_omp_task_begin_if0: { 1326 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1327 // *new_task); 1328 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1329 CGM.VoidPtrTy}; 1330 llvm::FunctionType *FnTy = 1331 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1332 RTLFn = 1333 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1334 break; 1335 } 1336 case OMPRTL__kmpc_omp_task_complete_if0: { 1337 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1338 // *new_task); 1339 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1340 CGM.VoidPtrTy}; 1341 llvm::FunctionType *FnTy = 1342 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1343 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1344 /*Name=*/"__kmpc_omp_task_complete_if0"); 1345 break; 1346 } 1347 case OMPRTL__kmpc_ordered: { 1348 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1349 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1350 llvm::FunctionType *FnTy = 1351 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1352 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1353 break; 1354 } 1355 case OMPRTL__kmpc_end_ordered: { 1356 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 1357 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1358 llvm::FunctionType *FnTy = 1359 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1360 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 1361 break; 1362 } 1363 case OMPRTL__kmpc_omp_taskwait: { 1364 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 1365 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1366 llvm::FunctionType *FnTy = 1367 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1368 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 1369 break; 1370 } 1371 case OMPRTL__kmpc_taskgroup: { 1372 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 1373 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1374 llvm::FunctionType *FnTy = 1375 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1376 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 1377 break; 1378 } 1379 case OMPRTL__kmpc_end_taskgroup: { 1380 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 1381 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1382 llvm::FunctionType *FnTy = 1383 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1384 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 1385 break; 1386 } 1387 case OMPRTL__kmpc_push_proc_bind: { 1388 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 1389 // int proc_bind) 1390 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1391 llvm::FunctionType *FnTy = 1392 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1393 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 1394 break; 1395 } 1396 case OMPRTL__kmpc_omp_task_with_deps: { 1397 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 1398 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 1399 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 1400 llvm::Type *TypeParams[] = { 1401 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 1402 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 1403 llvm::FunctionType *FnTy = 1404 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1405 RTLFn = 1406 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 1407 break; 1408 } 1409 case OMPRTL__kmpc_omp_wait_deps: { 1410 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 1411 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 1412 // kmp_depend_info_t *noalias_dep_list); 1413 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1414 CGM.Int32Ty, CGM.VoidPtrTy, 1415 CGM.Int32Ty, CGM.VoidPtrTy}; 1416 llvm::FunctionType *FnTy = 1417 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1418 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 1419 break; 1420 } 1421 case OMPRTL__kmpc_cancellationpoint: { 1422 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 1423 // global_tid, kmp_int32 cncl_kind) 1424 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1425 llvm::FunctionType *FnTy = 1426 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1427 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 1428 break; 1429 } 1430 case OMPRTL__kmpc_cancel: { 1431 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 1432 // kmp_int32 cncl_kind) 1433 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1434 llvm::FunctionType *FnTy = 1435 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1436 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 1437 break; 1438 } 1439 case OMPRTL__kmpc_push_num_teams: { 1440 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 1441 // kmp_int32 num_teams, kmp_int32 num_threads) 1442 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1443 CGM.Int32Ty}; 1444 llvm::FunctionType *FnTy = 1445 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1446 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 1447 break; 1448 } 1449 case OMPRTL__kmpc_fork_teams: { 1450 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 1451 // microtask, ...); 1452 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1453 getKmpc_MicroPointerTy()}; 1454 llvm::FunctionType *FnTy = 1455 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1456 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 1457 break; 1458 } 1459 case OMPRTL__kmpc_taskloop: { 1460 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 1461 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 1462 // sched, kmp_uint64 grainsize, void *task_dup); 1463 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1464 CGM.IntTy, 1465 CGM.VoidPtrTy, 1466 CGM.IntTy, 1467 CGM.Int64Ty->getPointerTo(), 1468 CGM.Int64Ty->getPointerTo(), 1469 CGM.Int64Ty, 1470 CGM.IntTy, 1471 CGM.IntTy, 1472 CGM.Int64Ty, 1473 CGM.VoidPtrTy}; 1474 llvm::FunctionType *FnTy = 1475 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1476 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 1477 break; 1478 } 1479 case OMPRTL__tgt_target: { 1480 // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 1481 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 1482 // *arg_types); 1483 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1484 CGM.VoidPtrTy, 1485 CGM.Int32Ty, 1486 CGM.VoidPtrPtrTy, 1487 CGM.VoidPtrPtrTy, 1488 CGM.SizeTy->getPointerTo(), 1489 CGM.Int32Ty->getPointerTo()}; 1490 llvm::FunctionType *FnTy = 1491 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1492 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 1493 break; 1494 } 1495 case OMPRTL__tgt_target_teams: { 1496 // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 1497 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 1498 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 1499 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1500 CGM.VoidPtrTy, 1501 CGM.Int32Ty, 1502 CGM.VoidPtrPtrTy, 1503 CGM.VoidPtrPtrTy, 1504 CGM.SizeTy->getPointerTo(), 1505 CGM.Int32Ty->getPointerTo(), 1506 CGM.Int32Ty, 1507 CGM.Int32Ty}; 1508 llvm::FunctionType *FnTy = 1509 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1510 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 1511 break; 1512 } 1513 case OMPRTL__tgt_register_lib: { 1514 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 1515 QualType ParamTy = 1516 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1517 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1518 llvm::FunctionType *FnTy = 1519 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1520 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 1521 break; 1522 } 1523 case OMPRTL__tgt_unregister_lib: { 1524 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 1525 QualType ParamTy = 1526 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1527 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1528 llvm::FunctionType *FnTy = 1529 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1530 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 1531 break; 1532 } 1533 case OMPRTL__tgt_target_data_begin: { 1534 // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 1535 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1536 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1537 CGM.Int32Ty, 1538 CGM.VoidPtrPtrTy, 1539 CGM.VoidPtrPtrTy, 1540 CGM.SizeTy->getPointerTo(), 1541 CGM.Int32Ty->getPointerTo()}; 1542 llvm::FunctionType *FnTy = 1543 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1544 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 1545 break; 1546 } 1547 case OMPRTL__tgt_target_data_end: { 1548 // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 1549 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1550 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1551 CGM.Int32Ty, 1552 CGM.VoidPtrPtrTy, 1553 CGM.VoidPtrPtrTy, 1554 CGM.SizeTy->getPointerTo(), 1555 CGM.Int32Ty->getPointerTo()}; 1556 llvm::FunctionType *FnTy = 1557 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1558 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 1559 break; 1560 } 1561 } 1562 assert(RTLFn && "Unable to find OpenMP runtime function"); 1563 return RTLFn; 1564 } 1565 1566 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 1567 bool IVSigned) { 1568 assert((IVSize == 32 || IVSize == 64) && 1569 "IV size is not compatible with the omp runtime"); 1570 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1571 : "__kmpc_for_static_init_4u") 1572 : (IVSigned ? "__kmpc_for_static_init_8" 1573 : "__kmpc_for_static_init_8u"); 1574 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1575 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1576 llvm::Type *TypeParams[] = { 1577 getIdentTyPointerTy(), // loc 1578 CGM.Int32Ty, // tid 1579 CGM.Int32Ty, // schedtype 1580 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1581 PtrTy, // p_lower 1582 PtrTy, // p_upper 1583 PtrTy, // p_stride 1584 ITy, // incr 1585 ITy // chunk 1586 }; 1587 llvm::FunctionType *FnTy = 1588 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1589 return CGM.CreateRuntimeFunction(FnTy, Name); 1590 } 1591 1592 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 1593 bool IVSigned) { 1594 assert((IVSize == 32 || IVSize == 64) && 1595 "IV size is not compatible with the omp runtime"); 1596 auto Name = 1597 IVSize == 32 1598 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1599 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1600 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1601 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1602 CGM.Int32Ty, // tid 1603 CGM.Int32Ty, // schedtype 1604 ITy, // lower 1605 ITy, // upper 1606 ITy, // stride 1607 ITy // chunk 1608 }; 1609 llvm::FunctionType *FnTy = 1610 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1611 return CGM.CreateRuntimeFunction(FnTy, Name); 1612 } 1613 1614 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 1615 bool IVSigned) { 1616 assert((IVSize == 32 || IVSize == 64) && 1617 "IV size is not compatible with the omp runtime"); 1618 auto Name = 1619 IVSize == 32 1620 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1621 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1622 llvm::Type *TypeParams[] = { 1623 getIdentTyPointerTy(), // loc 1624 CGM.Int32Ty, // tid 1625 }; 1626 llvm::FunctionType *FnTy = 1627 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1628 return CGM.CreateRuntimeFunction(FnTy, Name); 1629 } 1630 1631 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 1632 bool IVSigned) { 1633 assert((IVSize == 32 || IVSize == 64) && 1634 "IV size is not compatible with the omp runtime"); 1635 auto Name = 1636 IVSize == 32 1637 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1638 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1639 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1640 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1641 llvm::Type *TypeParams[] = { 1642 getIdentTyPointerTy(), // loc 1643 CGM.Int32Ty, // tid 1644 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1645 PtrTy, // p_lower 1646 PtrTy, // p_upper 1647 PtrTy // p_stride 1648 }; 1649 llvm::FunctionType *FnTy = 1650 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1651 return CGM.CreateRuntimeFunction(FnTy, Name); 1652 } 1653 1654 llvm::Constant * 1655 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1656 assert(!CGM.getLangOpts().OpenMPUseTLS || 1657 !CGM.getContext().getTargetInfo().isTLSSupported()); 1658 // Lookup the entry, lazily creating it if necessary. 1659 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 1660 Twine(CGM.getMangledName(VD)) + ".cache."); 1661 } 1662 1663 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1664 const VarDecl *VD, 1665 Address VDAddr, 1666 SourceLocation Loc) { 1667 if (CGM.getLangOpts().OpenMPUseTLS && 1668 CGM.getContext().getTargetInfo().isTLSSupported()) 1669 return VDAddr; 1670 1671 auto VarTy = VDAddr.getElementType(); 1672 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1673 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1674 CGM.Int8PtrTy), 1675 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1676 getOrCreateThreadPrivateCache(VD)}; 1677 return Address(CGF.EmitRuntimeCall( 1678 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 1679 VDAddr.getAlignment()); 1680 } 1681 1682 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1683 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1684 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1685 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1686 // library. 1687 auto OMPLoc = emitUpdateLocation(CGF, Loc); 1688 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1689 OMPLoc); 1690 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1691 // to register constructor/destructor for variable. 1692 llvm::Value *Args[] = {OMPLoc, 1693 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1694 CGM.VoidPtrTy), 1695 Ctor, CopyCtor, Dtor}; 1696 CGF.EmitRuntimeCall( 1697 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 1698 } 1699 1700 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1701 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1702 bool PerformInit, CodeGenFunction *CGF) { 1703 if (CGM.getLangOpts().OpenMPUseTLS && 1704 CGM.getContext().getTargetInfo().isTLSSupported()) 1705 return nullptr; 1706 1707 VD = VD->getDefinition(CGM.getContext()); 1708 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 1709 ThreadPrivateWithDefinition.insert(VD); 1710 QualType ASTTy = VD->getType(); 1711 1712 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1713 auto Init = VD->getAnyInitializer(); 1714 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1715 // Generate function that re-emits the declaration's initializer into the 1716 // threadprivate copy of the variable VD 1717 CodeGenFunction CtorCGF(CGM); 1718 FunctionArgList Args; 1719 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1720 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1721 Args.push_back(&Dst); 1722 1723 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1724 CGM.getContext().VoidPtrTy, Args); 1725 auto FTy = CGM.getTypes().GetFunctionType(FI); 1726 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1727 FTy, ".__kmpc_global_ctor_.", FI, Loc); 1728 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1729 Args, SourceLocation()); 1730 auto ArgVal = CtorCGF.EmitLoadOfScalar( 1731 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1732 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1733 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1734 Arg = CtorCGF.Builder.CreateElementBitCast(Arg, 1735 CtorCGF.ConvertTypeForMem(ASTTy)); 1736 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1737 /*IsInitializer=*/true); 1738 ArgVal = CtorCGF.EmitLoadOfScalar( 1739 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1740 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1741 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1742 CtorCGF.FinishFunction(); 1743 Ctor = Fn; 1744 } 1745 if (VD->getType().isDestructedType() != QualType::DK_none) { 1746 // Generate function that emits destructor call for the threadprivate copy 1747 // of the variable VD 1748 CodeGenFunction DtorCGF(CGM); 1749 FunctionArgList Args; 1750 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1751 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1752 Args.push_back(&Dst); 1753 1754 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1755 CGM.getContext().VoidTy, Args); 1756 auto FTy = CGM.getTypes().GetFunctionType(FI); 1757 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1758 FTy, ".__kmpc_global_dtor_.", FI, Loc); 1759 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1760 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1761 SourceLocation()); 1762 // Create a scope with an artificial location for the body of this function. 1763 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1764 auto ArgVal = DtorCGF.EmitLoadOfScalar( 1765 DtorCGF.GetAddrOfLocalVar(&Dst), 1766 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1767 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1768 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1769 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1770 DtorCGF.FinishFunction(); 1771 Dtor = Fn; 1772 } 1773 // Do not emit init function if it is not required. 1774 if (!Ctor && !Dtor) 1775 return nullptr; 1776 1777 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1778 auto CopyCtorTy = 1779 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1780 /*isVarArg=*/false)->getPointerTo(); 1781 // Copying constructor for the threadprivate variable. 1782 // Must be NULL - reserved by runtime, but currently it requires that this 1783 // parameter is always NULL. Otherwise it fires assertion. 1784 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1785 if (Ctor == nullptr) { 1786 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1787 /*isVarArg=*/false)->getPointerTo(); 1788 Ctor = llvm::Constant::getNullValue(CtorTy); 1789 } 1790 if (Dtor == nullptr) { 1791 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1792 /*isVarArg=*/false)->getPointerTo(); 1793 Dtor = llvm::Constant::getNullValue(DtorTy); 1794 } 1795 if (!CGF) { 1796 auto InitFunctionTy = 1797 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1798 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1799 InitFunctionTy, ".__omp_threadprivate_init_.", 1800 CGM.getTypes().arrangeNullaryFunction()); 1801 CodeGenFunction InitCGF(CGM); 1802 FunctionArgList ArgList; 1803 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1804 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1805 Loc); 1806 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1807 InitCGF.FinishFunction(); 1808 return InitFunction; 1809 } 1810 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1811 } 1812 return nullptr; 1813 } 1814 1815 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1816 /// function. Here is the logic: 1817 /// if (Cond) { 1818 /// ThenGen(); 1819 /// } else { 1820 /// ElseGen(); 1821 /// } 1822 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1823 const RegionCodeGenTy &ThenGen, 1824 const RegionCodeGenTy &ElseGen) { 1825 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1826 1827 // If the condition constant folds and can be elided, try to avoid emitting 1828 // the condition and the dead arm of the if/else. 1829 bool CondConstant; 1830 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1831 if (CondConstant) 1832 ThenGen(CGF); 1833 else 1834 ElseGen(CGF); 1835 return; 1836 } 1837 1838 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1839 // emit the conditional branch. 1840 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1841 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1842 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1843 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1844 1845 // Emit the 'then' code. 1846 CGF.EmitBlock(ThenBlock); 1847 ThenGen(CGF); 1848 CGF.EmitBranch(ContBlock); 1849 // Emit the 'else' code if present. 1850 // There is no need to emit line number for unconditional branch. 1851 (void)ApplyDebugLocation::CreateEmpty(CGF); 1852 CGF.EmitBlock(ElseBlock); 1853 ElseGen(CGF); 1854 // There is no need to emit line number for unconditional branch. 1855 (void)ApplyDebugLocation::CreateEmpty(CGF); 1856 CGF.EmitBranch(ContBlock); 1857 // Emit the continuation block for code after the if. 1858 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1859 } 1860 1861 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1862 llvm::Value *OutlinedFn, 1863 ArrayRef<llvm::Value *> CapturedVars, 1864 const Expr *IfCond) { 1865 if (!CGF.HaveInsertPoint()) 1866 return; 1867 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1868 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 1869 PrePostActionTy &) { 1870 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1871 auto &RT = CGF.CGM.getOpenMPRuntime(); 1872 llvm::Value *Args[] = { 1873 RTLoc, 1874 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1875 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 1876 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1877 RealArgs.append(std::begin(Args), std::end(Args)); 1878 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1879 1880 auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 1881 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1882 }; 1883 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 1884 PrePostActionTy &) { 1885 auto &RT = CGF.CGM.getOpenMPRuntime(); 1886 auto ThreadID = RT.getThreadID(CGF, Loc); 1887 // Build calls: 1888 // __kmpc_serialized_parallel(&Loc, GTid); 1889 llvm::Value *Args[] = {RTLoc, ThreadID}; 1890 CGF.EmitRuntimeCall( 1891 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 1892 1893 // OutlinedFn(>id, &zero, CapturedStruct); 1894 auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 1895 Address ZeroAddr = 1896 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 1897 /*Name*/ ".zero.addr"); 1898 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1899 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1900 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 1901 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 1902 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1903 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1904 1905 // __kmpc_end_serialized_parallel(&Loc, GTid); 1906 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 1907 CGF.EmitRuntimeCall( 1908 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 1909 EndArgs); 1910 }; 1911 if (IfCond) 1912 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 1913 else { 1914 RegionCodeGenTy ThenRCG(ThenGen); 1915 ThenRCG(CGF); 1916 } 1917 } 1918 1919 // If we're inside an (outlined) parallel region, use the region info's 1920 // thread-ID variable (it is passed in a first argument of the outlined function 1921 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1922 // regular serial code region, get thread ID by calling kmp_int32 1923 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1924 // return the address of that temp. 1925 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1926 SourceLocation Loc) { 1927 if (auto *OMPRegionInfo = 1928 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1929 if (OMPRegionInfo->getThreadIDVariable()) 1930 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 1931 1932 auto ThreadID = getThreadID(CGF, Loc); 1933 auto Int32Ty = 1934 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1935 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1936 CGF.EmitStoreOfScalar(ThreadID, 1937 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 1938 1939 return ThreadIDTemp; 1940 } 1941 1942 llvm::Constant * 1943 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 1944 const llvm::Twine &Name) { 1945 SmallString<256> Buffer; 1946 llvm::raw_svector_ostream Out(Buffer); 1947 Out << Name; 1948 auto RuntimeName = Out.str(); 1949 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 1950 if (Elem.second) { 1951 assert(Elem.second->getType()->getPointerElementType() == Ty && 1952 "OMP internal variable has different type than requested"); 1953 return &*Elem.second; 1954 } 1955 1956 return Elem.second = new llvm::GlobalVariable( 1957 CGM.getModule(), Ty, /*IsConstant*/ false, 1958 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 1959 Elem.first()); 1960 } 1961 1962 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1963 llvm::Twine Name(".gomp_critical_user_", CriticalName); 1964 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 1965 } 1966 1967 namespace { 1968 /// Common pre(post)-action for different OpenMP constructs. 1969 class CommonActionTy final : public PrePostActionTy { 1970 llvm::Value *EnterCallee; 1971 ArrayRef<llvm::Value *> EnterArgs; 1972 llvm::Value *ExitCallee; 1973 ArrayRef<llvm::Value *> ExitArgs; 1974 bool Conditional; 1975 llvm::BasicBlock *ContBlock = nullptr; 1976 1977 public: 1978 CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, 1979 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, 1980 bool Conditional = false) 1981 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 1982 ExitArgs(ExitArgs), Conditional(Conditional) {} 1983 void Enter(CodeGenFunction &CGF) override { 1984 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 1985 if (Conditional) { 1986 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 1987 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1988 ContBlock = CGF.createBasicBlock("omp_if.end"); 1989 // Generate the branch (If-stmt) 1990 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1991 CGF.EmitBlock(ThenBlock); 1992 } 1993 } 1994 void Done(CodeGenFunction &CGF) { 1995 // Emit the rest of blocks/branches 1996 CGF.EmitBranch(ContBlock); 1997 CGF.EmitBlock(ContBlock, true); 1998 } 1999 void Exit(CodeGenFunction &CGF) override { 2000 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2001 } 2002 }; 2003 } // anonymous namespace 2004 2005 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2006 StringRef CriticalName, 2007 const RegionCodeGenTy &CriticalOpGen, 2008 SourceLocation Loc, const Expr *Hint) { 2009 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2010 // CriticalOpGen(); 2011 // __kmpc_end_critical(ident_t *, gtid, Lock); 2012 // Prepare arguments and build a call to __kmpc_critical 2013 if (!CGF.HaveInsertPoint()) 2014 return; 2015 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2016 getCriticalRegionLock(CriticalName)}; 2017 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2018 std::end(Args)); 2019 if (Hint) { 2020 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2021 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 2022 } 2023 CommonActionTy Action( 2024 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 2025 : OMPRTL__kmpc_critical), 2026 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 2027 CriticalOpGen.setAction(Action); 2028 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2029 } 2030 2031 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2032 const RegionCodeGenTy &MasterOpGen, 2033 SourceLocation Loc) { 2034 if (!CGF.HaveInsertPoint()) 2035 return; 2036 // if(__kmpc_master(ident_t *, gtid)) { 2037 // MasterOpGen(); 2038 // __kmpc_end_master(ident_t *, gtid); 2039 // } 2040 // Prepare arguments and build a call to __kmpc_master 2041 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2042 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 2043 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 2044 /*Conditional=*/true); 2045 MasterOpGen.setAction(Action); 2046 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2047 Action.Done(CGF); 2048 } 2049 2050 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2051 SourceLocation Loc) { 2052 if (!CGF.HaveInsertPoint()) 2053 return; 2054 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2055 llvm::Value *Args[] = { 2056 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2057 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2058 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 2059 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2060 Region->emitUntiedSwitch(CGF); 2061 } 2062 2063 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2064 const RegionCodeGenTy &TaskgroupOpGen, 2065 SourceLocation Loc) { 2066 if (!CGF.HaveInsertPoint()) 2067 return; 2068 // __kmpc_taskgroup(ident_t *, gtid); 2069 // TaskgroupOpGen(); 2070 // __kmpc_end_taskgroup(ident_t *, gtid); 2071 // Prepare arguments and build a call to __kmpc_taskgroup 2072 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2073 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 2074 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 2075 Args); 2076 TaskgroupOpGen.setAction(Action); 2077 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2078 } 2079 2080 /// Given an array of pointers to variables, project the address of a 2081 /// given variable. 2082 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2083 unsigned Index, const VarDecl *Var) { 2084 // Pull out the pointer to the variable. 2085 Address PtrAddr = 2086 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 2087 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2088 2089 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2090 Addr = CGF.Builder.CreateElementBitCast( 2091 Addr, CGF.ConvertTypeForMem(Var->getType())); 2092 return Addr; 2093 } 2094 2095 static llvm::Value *emitCopyprivateCopyFunction( 2096 CodeGenModule &CGM, llvm::Type *ArgsType, 2097 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2098 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 2099 auto &C = CGM.getContext(); 2100 // void copy_func(void *LHSArg, void *RHSArg); 2101 FunctionArgList Args; 2102 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2103 C.VoidPtrTy); 2104 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2105 C.VoidPtrTy); 2106 Args.push_back(&LHSArg); 2107 Args.push_back(&RHSArg); 2108 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2109 auto *Fn = llvm::Function::Create( 2110 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2111 ".omp.copyprivate.copy_func", &CGM.getModule()); 2112 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 2113 CodeGenFunction CGF(CGM); 2114 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 2115 // Dest = (void*[n])(LHSArg); 2116 // Src = (void*[n])(RHSArg); 2117 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2118 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2119 ArgsType), CGF.getPointerAlign()); 2120 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2121 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2122 ArgsType), CGF.getPointerAlign()); 2123 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2124 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2125 // ... 2126 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2127 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2128 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2129 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2130 2131 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2132 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2133 2134 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2135 QualType Type = VD->getType(); 2136 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2137 } 2138 CGF.FinishFunction(); 2139 return Fn; 2140 } 2141 2142 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2143 const RegionCodeGenTy &SingleOpGen, 2144 SourceLocation Loc, 2145 ArrayRef<const Expr *> CopyprivateVars, 2146 ArrayRef<const Expr *> SrcExprs, 2147 ArrayRef<const Expr *> DstExprs, 2148 ArrayRef<const Expr *> AssignmentOps) { 2149 if (!CGF.HaveInsertPoint()) 2150 return; 2151 assert(CopyprivateVars.size() == SrcExprs.size() && 2152 CopyprivateVars.size() == DstExprs.size() && 2153 CopyprivateVars.size() == AssignmentOps.size()); 2154 auto &C = CGM.getContext(); 2155 // int32 did_it = 0; 2156 // if(__kmpc_single(ident_t *, gtid)) { 2157 // SingleOpGen(); 2158 // __kmpc_end_single(ident_t *, gtid); 2159 // did_it = 1; 2160 // } 2161 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2162 // <copy_func>, did_it); 2163 2164 Address DidIt = Address::invalid(); 2165 if (!CopyprivateVars.empty()) { 2166 // int32 did_it = 0; 2167 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2168 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2169 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2170 } 2171 // Prepare arguments and build a call to __kmpc_single 2172 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2173 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 2174 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 2175 /*Conditional=*/true); 2176 SingleOpGen.setAction(Action); 2177 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2178 if (DidIt.isValid()) { 2179 // did_it = 1; 2180 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2181 } 2182 Action.Done(CGF); 2183 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2184 // <copy_func>, did_it); 2185 if (DidIt.isValid()) { 2186 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2187 auto CopyprivateArrayTy = 2188 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2189 /*IndexTypeQuals=*/0); 2190 // Create a list of all private variables for copyprivate. 2191 Address CopyprivateList = 2192 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2193 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2194 Address Elem = CGF.Builder.CreateConstArrayGEP( 2195 CopyprivateList, I, CGF.getPointerSize()); 2196 CGF.Builder.CreateStore( 2197 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2198 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 2199 Elem); 2200 } 2201 // Build function that copies private values from single region to all other 2202 // threads in the corresponding parallel region. 2203 auto *CpyFn = emitCopyprivateCopyFunction( 2204 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2205 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 2206 auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2207 Address CL = 2208 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2209 CGF.VoidPtrTy); 2210 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 2211 llvm::Value *Args[] = { 2212 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2213 getThreadID(CGF, Loc), // i32 <gtid> 2214 BufSize, // size_t <buf_size> 2215 CL.getPointer(), // void *<copyprivate list> 2216 CpyFn, // void (*) (void *, void *) <copy_func> 2217 DidItVal // i32 did_it 2218 }; 2219 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 2220 } 2221 } 2222 2223 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2224 const RegionCodeGenTy &OrderedOpGen, 2225 SourceLocation Loc, bool IsThreads) { 2226 if (!CGF.HaveInsertPoint()) 2227 return; 2228 // __kmpc_ordered(ident_t *, gtid); 2229 // OrderedOpGen(); 2230 // __kmpc_end_ordered(ident_t *, gtid); 2231 // Prepare arguments and build a call to __kmpc_ordered 2232 if (IsThreads) { 2233 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2234 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 2235 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 2236 Args); 2237 OrderedOpGen.setAction(Action); 2238 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2239 return; 2240 } 2241 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2242 } 2243 2244 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2245 OpenMPDirectiveKind Kind, bool EmitChecks, 2246 bool ForceSimpleCall) { 2247 if (!CGF.HaveInsertPoint()) 2248 return; 2249 // Build call __kmpc_cancel_barrier(loc, thread_id); 2250 // Build call __kmpc_barrier(loc, thread_id); 2251 unsigned Flags; 2252 if (Kind == OMPD_for) 2253 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2254 else if (Kind == OMPD_sections) 2255 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2256 else if (Kind == OMPD_single) 2257 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2258 else if (Kind == OMPD_barrier) 2259 Flags = OMP_IDENT_BARRIER_EXPL; 2260 else 2261 Flags = OMP_IDENT_BARRIER_IMPL; 2262 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2263 // thread_id); 2264 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2265 getThreadID(CGF, Loc)}; 2266 if (auto *OMPRegionInfo = 2267 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2268 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2269 auto *Result = CGF.EmitRuntimeCall( 2270 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 2271 if (EmitChecks) { 2272 // if (__kmpc_cancel_barrier()) { 2273 // exit from construct; 2274 // } 2275 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2276 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2277 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2278 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2279 CGF.EmitBlock(ExitBB); 2280 // exit from construct; 2281 auto CancelDestination = 2282 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2283 CGF.EmitBranchThroughCleanup(CancelDestination); 2284 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2285 } 2286 return; 2287 } 2288 } 2289 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 2290 } 2291 2292 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 2293 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2294 bool Chunked, bool Ordered) { 2295 switch (ScheduleKind) { 2296 case OMPC_SCHEDULE_static: 2297 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2298 : (Ordered ? OMP_ord_static : OMP_sch_static); 2299 case OMPC_SCHEDULE_dynamic: 2300 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2301 case OMPC_SCHEDULE_guided: 2302 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2303 case OMPC_SCHEDULE_runtime: 2304 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2305 case OMPC_SCHEDULE_auto: 2306 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2307 case OMPC_SCHEDULE_unknown: 2308 assert(!Chunked && "chunk was specified but schedule kind not known"); 2309 return Ordered ? OMP_ord_static : OMP_sch_static; 2310 } 2311 llvm_unreachable("Unexpected runtime schedule"); 2312 } 2313 2314 /// \brief Map the OpenMP distribute schedule to the runtime enumeration. 2315 static OpenMPSchedType 2316 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2317 // only static is allowed for dist_schedule 2318 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2319 } 2320 2321 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2322 bool Chunked) const { 2323 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2324 return Schedule == OMP_sch_static; 2325 } 2326 2327 bool CGOpenMPRuntime::isStaticNonchunked( 2328 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2329 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2330 return Schedule == OMP_dist_sch_static; 2331 } 2332 2333 2334 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2335 auto Schedule = 2336 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2337 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2338 return Schedule != OMP_sch_static; 2339 } 2340 2341 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 2342 OpenMPScheduleClauseModifier M1, 2343 OpenMPScheduleClauseModifier M2) { 2344 switch (M1) { 2345 case OMPC_SCHEDULE_MODIFIER_monotonic: 2346 return Schedule | OMP_sch_modifier_monotonic; 2347 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2348 return Schedule | OMP_sch_modifier_nonmonotonic; 2349 case OMPC_SCHEDULE_MODIFIER_simd: 2350 case OMPC_SCHEDULE_MODIFIER_last: 2351 case OMPC_SCHEDULE_MODIFIER_unknown: 2352 break; 2353 } 2354 switch (M2) { 2355 case OMPC_SCHEDULE_MODIFIER_monotonic: 2356 return Schedule | OMP_sch_modifier_monotonic; 2357 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2358 return Schedule | OMP_sch_modifier_nonmonotonic; 2359 case OMPC_SCHEDULE_MODIFIER_simd: 2360 case OMPC_SCHEDULE_MODIFIER_last: 2361 case OMPC_SCHEDULE_MODIFIER_unknown: 2362 break; 2363 } 2364 return Schedule; 2365 } 2366 2367 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, 2368 SourceLocation Loc, 2369 const OpenMPScheduleTy &ScheduleKind, 2370 unsigned IVSize, bool IVSigned, 2371 bool Ordered, llvm::Value *UB, 2372 llvm::Value *Chunk) { 2373 if (!CGF.HaveInsertPoint()) 2374 return; 2375 OpenMPSchedType Schedule = 2376 getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); 2377 assert(Ordered || 2378 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2379 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)); 2380 // Call __kmpc_dispatch_init( 2381 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2382 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2383 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2384 2385 // If the Chunk was not specified in the clause - use default value 1. 2386 if (Chunk == nullptr) 2387 Chunk = CGF.Builder.getIntN(IVSize, 1); 2388 llvm::Value *Args[] = { 2389 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2390 CGF.Builder.getInt32(addMonoNonMonoModifier( 2391 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2392 CGF.Builder.getIntN(IVSize, 0), // Lower 2393 UB, // Upper 2394 CGF.Builder.getIntN(IVSize, 1), // Stride 2395 Chunk // Chunk 2396 }; 2397 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2398 } 2399 2400 static void emitForStaticInitCall( 2401 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2402 llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, 2403 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2404 unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB, 2405 Address ST, llvm::Value *Chunk) { 2406 if (!CGF.HaveInsertPoint()) 2407 return; 2408 2409 assert(!Ordered); 2410 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2411 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2412 Schedule == OMP_dist_sch_static || 2413 Schedule == OMP_dist_sch_static_chunked); 2414 2415 // Call __kmpc_for_static_init( 2416 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2417 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2418 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2419 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2420 if (Chunk == nullptr) { 2421 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2422 Schedule == OMP_dist_sch_static) && 2423 "expected static non-chunked schedule"); 2424 // If the Chunk was not specified in the clause - use default value 1. 2425 Chunk = CGF.Builder.getIntN(IVSize, 1); 2426 } else { 2427 assert((Schedule == OMP_sch_static_chunked || 2428 Schedule == OMP_ord_static_chunked || 2429 Schedule == OMP_dist_sch_static_chunked) && 2430 "expected static chunked schedule"); 2431 } 2432 llvm::Value *Args[] = { 2433 UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier( 2434 Schedule, M1, M2)), // Schedule type 2435 IL.getPointer(), // &isLastIter 2436 LB.getPointer(), // &LB 2437 UB.getPointer(), // &UB 2438 ST.getPointer(), // &Stride 2439 CGF.Builder.getIntN(IVSize, 1), // Incr 2440 Chunk // Chunk 2441 }; 2442 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2443 } 2444 2445 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2446 SourceLocation Loc, 2447 const OpenMPScheduleTy &ScheduleKind, 2448 unsigned IVSize, bool IVSigned, 2449 bool Ordered, Address IL, Address LB, 2450 Address UB, Address ST, 2451 llvm::Value *Chunk) { 2452 OpenMPSchedType ScheduleNum = 2453 getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); 2454 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2455 auto *ThreadId = getThreadID(CGF, Loc); 2456 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2457 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2458 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize, 2459 Ordered, IL, LB, UB, ST, Chunk); 2460 } 2461 2462 void CGOpenMPRuntime::emitDistributeStaticInit( 2463 CodeGenFunction &CGF, SourceLocation Loc, 2464 OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned, 2465 bool Ordered, Address IL, Address LB, Address UB, Address ST, 2466 llvm::Value *Chunk) { 2467 OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); 2468 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2469 auto *ThreadId = getThreadID(CGF, Loc); 2470 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2471 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2472 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2473 OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB, 2474 UB, ST, Chunk); 2475 } 2476 2477 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2478 SourceLocation Loc) { 2479 if (!CGF.HaveInsertPoint()) 2480 return; 2481 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2482 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2483 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 2484 Args); 2485 } 2486 2487 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2488 SourceLocation Loc, 2489 unsigned IVSize, 2490 bool IVSigned) { 2491 if (!CGF.HaveInsertPoint()) 2492 return; 2493 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2494 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2495 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2496 } 2497 2498 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2499 SourceLocation Loc, unsigned IVSize, 2500 bool IVSigned, Address IL, 2501 Address LB, Address UB, 2502 Address ST) { 2503 // Call __kmpc_dispatch_next( 2504 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2505 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2506 // kmp_int[32|64] *p_stride); 2507 llvm::Value *Args[] = { 2508 emitUpdateLocation(CGF, Loc), 2509 getThreadID(CGF, Loc), 2510 IL.getPointer(), // &isLastIter 2511 LB.getPointer(), // &Lower 2512 UB.getPointer(), // &Upper 2513 ST.getPointer() // &Stride 2514 }; 2515 llvm::Value *Call = 2516 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2517 return CGF.EmitScalarConversion( 2518 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 2519 CGF.getContext().BoolTy, Loc); 2520 } 2521 2522 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2523 llvm::Value *NumThreads, 2524 SourceLocation Loc) { 2525 if (!CGF.HaveInsertPoint()) 2526 return; 2527 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2528 llvm::Value *Args[] = { 2529 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2530 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2531 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 2532 Args); 2533 } 2534 2535 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2536 OpenMPProcBindClauseKind ProcBind, 2537 SourceLocation Loc) { 2538 if (!CGF.HaveInsertPoint()) 2539 return; 2540 // Constants for proc bind value accepted by the runtime. 2541 enum ProcBindTy { 2542 ProcBindFalse = 0, 2543 ProcBindTrue, 2544 ProcBindMaster, 2545 ProcBindClose, 2546 ProcBindSpread, 2547 ProcBindIntel, 2548 ProcBindDefault 2549 } RuntimeProcBind; 2550 switch (ProcBind) { 2551 case OMPC_PROC_BIND_master: 2552 RuntimeProcBind = ProcBindMaster; 2553 break; 2554 case OMPC_PROC_BIND_close: 2555 RuntimeProcBind = ProcBindClose; 2556 break; 2557 case OMPC_PROC_BIND_spread: 2558 RuntimeProcBind = ProcBindSpread; 2559 break; 2560 case OMPC_PROC_BIND_unknown: 2561 llvm_unreachable("Unsupported proc_bind value."); 2562 } 2563 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2564 llvm::Value *Args[] = { 2565 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2566 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 2567 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 2568 } 2569 2570 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2571 SourceLocation Loc) { 2572 if (!CGF.HaveInsertPoint()) 2573 return; 2574 // Build call void __kmpc_flush(ident_t *loc) 2575 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 2576 emitUpdateLocation(CGF, Loc)); 2577 } 2578 2579 namespace { 2580 /// \brief Indexes of fields for type kmp_task_t. 2581 enum KmpTaskTFields { 2582 /// \brief List of shared variables. 2583 KmpTaskTShareds, 2584 /// \brief Task routine. 2585 KmpTaskTRoutine, 2586 /// \brief Partition id for the untied tasks. 2587 KmpTaskTPartId, 2588 /// \brief Function with call of destructors for private variables. 2589 KmpTaskTDestructors, 2590 /// (Taskloops only) Lower bound. 2591 KmpTaskTLowerBound, 2592 /// (Taskloops only) Upper bound. 2593 KmpTaskTUpperBound, 2594 /// (Taskloops only) Stride. 2595 KmpTaskTStride, 2596 /// (Taskloops only) Is last iteration flag. 2597 KmpTaskTLastIter, 2598 }; 2599 } // anonymous namespace 2600 2601 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2602 // FIXME: Add other entries type when they become supported. 2603 return OffloadEntriesTargetRegion.empty(); 2604 } 2605 2606 /// \brief Initialize target region entry. 2607 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2608 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2609 StringRef ParentName, unsigned LineNum, 2610 unsigned Order) { 2611 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2612 "only required for the device " 2613 "code generation."); 2614 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2615 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr); 2616 ++OffloadingEntriesNum; 2617 } 2618 2619 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2620 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2621 StringRef ParentName, unsigned LineNum, 2622 llvm::Constant *Addr, llvm::Constant *ID) { 2623 // If we are emitting code for a target, the entry is already initialized, 2624 // only has to be registered. 2625 if (CGM.getLangOpts().OpenMPIsDevice) { 2626 assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2627 "Entry must exist."); 2628 auto &Entry = 2629 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2630 assert(Entry.isValid() && "Entry not initialized!"); 2631 Entry.setAddress(Addr); 2632 Entry.setID(ID); 2633 return; 2634 } else { 2635 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID); 2636 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2637 } 2638 } 2639 2640 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2641 unsigned DeviceID, unsigned FileID, StringRef ParentName, 2642 unsigned LineNum) const { 2643 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2644 if (PerDevice == OffloadEntriesTargetRegion.end()) 2645 return false; 2646 auto PerFile = PerDevice->second.find(FileID); 2647 if (PerFile == PerDevice->second.end()) 2648 return false; 2649 auto PerParentName = PerFile->second.find(ParentName); 2650 if (PerParentName == PerFile->second.end()) 2651 return false; 2652 auto PerLine = PerParentName->second.find(LineNum); 2653 if (PerLine == PerParentName->second.end()) 2654 return false; 2655 // Fail if this entry is already registered. 2656 if (PerLine->second.getAddress() || PerLine->second.getID()) 2657 return false; 2658 return true; 2659 } 2660 2661 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2662 const OffloadTargetRegionEntryInfoActTy &Action) { 2663 // Scan all target region entries and perform the provided action. 2664 for (auto &D : OffloadEntriesTargetRegion) 2665 for (auto &F : D.second) 2666 for (auto &P : F.second) 2667 for (auto &L : P.second) 2668 Action(D.first, F.first, P.first(), L.first, L.second); 2669 } 2670 2671 /// \brief Create a Ctor/Dtor-like function whose body is emitted through 2672 /// \a Codegen. This is used to emit the two functions that register and 2673 /// unregister the descriptor of the current compilation unit. 2674 static llvm::Function * 2675 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, 2676 const RegionCodeGenTy &Codegen) { 2677 auto &C = CGM.getContext(); 2678 FunctionArgList Args; 2679 ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(), 2680 /*Id=*/nullptr, C.VoidPtrTy); 2681 Args.push_back(&DummyPtr); 2682 2683 CodeGenFunction CGF(CGM); 2684 GlobalDecl(); 2685 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2686 auto FTy = CGM.getTypes().GetFunctionType(FI); 2687 auto *Fn = 2688 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); 2689 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); 2690 Codegen(CGF); 2691 CGF.FinishFunction(); 2692 return Fn; 2693 } 2694 2695 llvm::Function * 2696 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 2697 2698 // If we don't have entries or if we are emitting code for the device, we 2699 // don't need to do anything. 2700 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 2701 return nullptr; 2702 2703 auto &M = CGM.getModule(); 2704 auto &C = CGM.getContext(); 2705 2706 // Get list of devices we care about 2707 auto &Devices = CGM.getLangOpts().OMPTargetTriples; 2708 2709 // We should be creating an offloading descriptor only if there are devices 2710 // specified. 2711 assert(!Devices.empty() && "No OpenMP offloading devices??"); 2712 2713 // Create the external variables that will point to the begin and end of the 2714 // host entries section. These will be defined by the linker. 2715 auto *OffloadEntryTy = 2716 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 2717 llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( 2718 M, OffloadEntryTy, /*isConstant=*/true, 2719 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2720 ".omp_offloading.entries_begin"); 2721 llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( 2722 M, OffloadEntryTy, /*isConstant=*/true, 2723 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2724 ".omp_offloading.entries_end"); 2725 2726 // Create all device images 2727 llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires; 2728 auto *DeviceImageTy = cast<llvm::StructType>( 2729 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 2730 2731 for (unsigned i = 0; i < Devices.size(); ++i) { 2732 StringRef T = Devices[i].getTriple(); 2733 auto *ImgBegin = new llvm::GlobalVariable( 2734 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2735 /*Initializer=*/nullptr, 2736 Twine(".omp_offloading.img_start.") + Twine(T)); 2737 auto *ImgEnd = new llvm::GlobalVariable( 2738 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2739 /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); 2740 2741 llvm::Constant *Dev = 2742 llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd, 2743 HostEntriesBegin, HostEntriesEnd, nullptr); 2744 DeviceImagesEntires.push_back(Dev); 2745 } 2746 2747 // Create device images global array. 2748 llvm::ArrayType *DeviceImagesInitTy = 2749 llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size()); 2750 llvm::Constant *DeviceImagesInit = 2751 llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires); 2752 2753 llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable( 2754 M, DeviceImagesInitTy, /*isConstant=*/true, 2755 llvm::GlobalValue::InternalLinkage, DeviceImagesInit, 2756 ".omp_offloading.device_images"); 2757 DeviceImages->setUnnamedAddr(true); 2758 2759 // This is a Zero array to be used in the creation of the constant expressions 2760 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 2761 llvm::Constant::getNullValue(CGM.Int32Ty)}; 2762 2763 // Create the target region descriptor. 2764 auto *BinaryDescriptorTy = cast<llvm::StructType>( 2765 CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); 2766 llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get( 2767 BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 2768 llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages, 2769 Index), 2770 HostEntriesBegin, HostEntriesEnd, nullptr); 2771 2772 auto *Desc = new llvm::GlobalVariable( 2773 M, BinaryDescriptorTy, /*isConstant=*/true, 2774 llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit, 2775 ".omp_offloading.descriptor"); 2776 2777 // Emit code to register or unregister the descriptor at execution 2778 // startup or closing, respectively. 2779 2780 // Create a variable to drive the registration and unregistration of the 2781 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 2782 auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); 2783 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), 2784 IdentInfo, C.CharTy); 2785 2786 auto *UnRegFn = createOffloadingBinaryDescriptorFunction( 2787 CGM, ".omp_offloading.descriptor_unreg", 2788 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2789 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 2790 Desc); 2791 }); 2792 auto *RegFn = createOffloadingBinaryDescriptorFunction( 2793 CGM, ".omp_offloading.descriptor_reg", 2794 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2795 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), 2796 Desc); 2797 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 2798 }); 2799 return RegFn; 2800 } 2801 2802 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, 2803 llvm::Constant *Addr, uint64_t Size) { 2804 StringRef Name = Addr->getName(); 2805 auto *TgtOffloadEntryType = cast<llvm::StructType>( 2806 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); 2807 llvm::LLVMContext &C = CGM.getModule().getContext(); 2808 llvm::Module &M = CGM.getModule(); 2809 2810 // Make sure the address has the right type. 2811 llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); 2812 2813 // Create constant string with the name. 2814 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 2815 2816 llvm::GlobalVariable *Str = 2817 new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, 2818 llvm::GlobalValue::InternalLinkage, StrPtrInit, 2819 ".omp_offloading.entry_name"); 2820 Str->setUnnamedAddr(true); 2821 llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); 2822 2823 // Create the entry struct. 2824 llvm::Constant *EntryInit = llvm::ConstantStruct::get( 2825 TgtOffloadEntryType, AddrPtr, StrPtr, 2826 llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr); 2827 llvm::GlobalVariable *Entry = new llvm::GlobalVariable( 2828 M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage, 2829 EntryInit, ".omp_offloading.entry"); 2830 2831 // The entry has to be created in the section the linker expects it to be. 2832 Entry->setSection(".omp_offloading.entries"); 2833 // We can't have any padding between symbols, so we need to have 1-byte 2834 // alignment. 2835 Entry->setAlignment(1); 2836 } 2837 2838 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2839 // Emit the offloading entries and metadata so that the device codegen side 2840 // can 2841 // easily figure out what to emit. The produced metadata looks like this: 2842 // 2843 // !omp_offload.info = !{!1, ...} 2844 // 2845 // Right now we only generate metadata for function that contain target 2846 // regions. 2847 2848 // If we do not have entries, we dont need to do anything. 2849 if (OffloadEntriesInfoManager.empty()) 2850 return; 2851 2852 llvm::Module &M = CGM.getModule(); 2853 llvm::LLVMContext &C = M.getContext(); 2854 SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 2855 OrderedEntries(OffloadEntriesInfoManager.size()); 2856 2857 // Create the offloading info metadata node. 2858 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 2859 2860 // Auxiliar methods to create metadata values and strings. 2861 auto getMDInt = [&](unsigned v) { 2862 return llvm::ConstantAsMetadata::get( 2863 llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); 2864 }; 2865 2866 auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; 2867 2868 // Create function that emits metadata for each target region entry; 2869 auto &&TargetRegionMetadataEmitter = [&]( 2870 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, 2871 OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 2872 llvm::SmallVector<llvm::Metadata *, 32> Ops; 2873 // Generate metadata for target regions. Each entry of this metadata 2874 // contains: 2875 // - Entry 0 -> Kind of this type of metadata (0). 2876 // - Entry 1 -> Device ID of the file where the entry was identified. 2877 // - Entry 2 -> File ID of the file where the entry was identified. 2878 // - Entry 3 -> Mangled name of the function where the entry was identified. 2879 // - Entry 4 -> Line in the file where the entry was identified. 2880 // - Entry 5 -> Order the entry was created. 2881 // The first element of the metadata node is the kind. 2882 Ops.push_back(getMDInt(E.getKind())); 2883 Ops.push_back(getMDInt(DeviceID)); 2884 Ops.push_back(getMDInt(FileID)); 2885 Ops.push_back(getMDString(ParentName)); 2886 Ops.push_back(getMDInt(Line)); 2887 Ops.push_back(getMDInt(E.getOrder())); 2888 2889 // Save this entry in the right position of the ordered entries array. 2890 OrderedEntries[E.getOrder()] = &E; 2891 2892 // Add metadata to the named metadata node. 2893 MD->addOperand(llvm::MDNode::get(C, Ops)); 2894 }; 2895 2896 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 2897 TargetRegionMetadataEmitter); 2898 2899 for (auto *E : OrderedEntries) { 2900 assert(E && "All ordered entries must exist!"); 2901 if (auto *CE = 2902 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 2903 E)) { 2904 assert(CE->getID() && CE->getAddress() && 2905 "Entry ID and Addr are invalid!"); 2906 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0); 2907 } else 2908 llvm_unreachable("Unsupported entry kind."); 2909 } 2910 } 2911 2912 /// \brief Loads all the offload entries information from the host IR 2913 /// metadata. 2914 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 2915 // If we are in target mode, load the metadata from the host IR. This code has 2916 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 2917 2918 if (!CGM.getLangOpts().OpenMPIsDevice) 2919 return; 2920 2921 if (CGM.getLangOpts().OMPHostIRFile.empty()) 2922 return; 2923 2924 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 2925 if (Buf.getError()) 2926 return; 2927 2928 llvm::LLVMContext C; 2929 auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C); 2930 2931 if (ME.getError()) 2932 return; 2933 2934 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 2935 if (!MD) 2936 return; 2937 2938 for (auto I : MD->operands()) { 2939 llvm::MDNode *MN = cast<llvm::MDNode>(I); 2940 2941 auto getMDInt = [&](unsigned Idx) { 2942 llvm::ConstantAsMetadata *V = 2943 cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 2944 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 2945 }; 2946 2947 auto getMDString = [&](unsigned Idx) { 2948 llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); 2949 return V->getString(); 2950 }; 2951 2952 switch (getMDInt(0)) { 2953 default: 2954 llvm_unreachable("Unexpected metadata!"); 2955 break; 2956 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 2957 OFFLOAD_ENTRY_INFO_TARGET_REGION: 2958 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 2959 /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), 2960 /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), 2961 /*Order=*/getMDInt(5)); 2962 break; 2963 } 2964 } 2965 } 2966 2967 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 2968 if (!KmpRoutineEntryPtrTy) { 2969 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 2970 auto &C = CGM.getContext(); 2971 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 2972 FunctionProtoType::ExtProtoInfo EPI; 2973 KmpRoutineEntryPtrQTy = C.getPointerType( 2974 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 2975 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 2976 } 2977 } 2978 2979 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 2980 QualType FieldTy) { 2981 auto *Field = FieldDecl::Create( 2982 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 2983 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 2984 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 2985 Field->setAccess(AS_public); 2986 DC->addDecl(Field); 2987 return Field; 2988 } 2989 2990 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 2991 2992 // Make sure the type of the entry is already created. This is the type we 2993 // have to create: 2994 // struct __tgt_offload_entry{ 2995 // void *addr; // Pointer to the offload entry info. 2996 // // (function or global) 2997 // char *name; // Name of the function or global. 2998 // size_t size; // Size of the entry info (0 if it a function). 2999 // }; 3000 if (TgtOffloadEntryQTy.isNull()) { 3001 ASTContext &C = CGM.getContext(); 3002 auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3003 RD->startDefinition(); 3004 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3005 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3006 addFieldToRecordDecl(C, RD, C.getSizeType()); 3007 RD->completeDefinition(); 3008 TgtOffloadEntryQTy = C.getRecordType(RD); 3009 } 3010 return TgtOffloadEntryQTy; 3011 } 3012 3013 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 3014 // These are the types we need to build: 3015 // struct __tgt_device_image{ 3016 // void *ImageStart; // Pointer to the target code start. 3017 // void *ImageEnd; // Pointer to the target code end. 3018 // // We also add the host entries to the device image, as it may be useful 3019 // // for the target runtime to have access to that information. 3020 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 3021 // // the entries. 3022 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3023 // // entries (non inclusive). 3024 // }; 3025 if (TgtDeviceImageQTy.isNull()) { 3026 ASTContext &C = CGM.getContext(); 3027 auto *RD = C.buildImplicitRecord("__tgt_device_image"); 3028 RD->startDefinition(); 3029 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3030 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3031 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3032 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3033 RD->completeDefinition(); 3034 TgtDeviceImageQTy = C.getRecordType(RD); 3035 } 3036 return TgtDeviceImageQTy; 3037 } 3038 3039 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 3040 // struct __tgt_bin_desc{ 3041 // int32_t NumDevices; // Number of devices supported. 3042 // __tgt_device_image *DeviceImages; // Arrays of device images 3043 // // (one per device). 3044 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 3045 // // entries. 3046 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3047 // // entries (non inclusive). 3048 // }; 3049 if (TgtBinaryDescriptorQTy.isNull()) { 3050 ASTContext &C = CGM.getContext(); 3051 auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); 3052 RD->startDefinition(); 3053 addFieldToRecordDecl( 3054 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3055 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 3056 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3057 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3058 RD->completeDefinition(); 3059 TgtBinaryDescriptorQTy = C.getRecordType(RD); 3060 } 3061 return TgtBinaryDescriptorQTy; 3062 } 3063 3064 namespace { 3065 struct PrivateHelpersTy { 3066 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 3067 const VarDecl *PrivateElemInit) 3068 : Original(Original), PrivateCopy(PrivateCopy), 3069 PrivateElemInit(PrivateElemInit) {} 3070 const VarDecl *Original; 3071 const VarDecl *PrivateCopy; 3072 const VarDecl *PrivateElemInit; 3073 }; 3074 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3075 } // anonymous namespace 3076 3077 static RecordDecl * 3078 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3079 if (!Privates.empty()) { 3080 auto &C = CGM.getContext(); 3081 // Build struct .kmp_privates_t. { 3082 // /* private vars */ 3083 // }; 3084 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 3085 RD->startDefinition(); 3086 for (auto &&Pair : Privates) { 3087 auto *VD = Pair.second.Original; 3088 auto Type = VD->getType(); 3089 Type = Type.getNonReferenceType(); 3090 auto *FD = addFieldToRecordDecl(C, RD, Type); 3091 if (VD->hasAttrs()) { 3092 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3093 E(VD->getAttrs().end()); 3094 I != E; ++I) 3095 FD->addAttr(*I); 3096 } 3097 } 3098 RD->completeDefinition(); 3099 return RD; 3100 } 3101 return nullptr; 3102 } 3103 3104 static RecordDecl * 3105 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3106 QualType KmpInt32Ty, 3107 QualType KmpRoutineEntryPointerQTy) { 3108 auto &C = CGM.getContext(); 3109 // Build struct kmp_task_t { 3110 // void * shareds; 3111 // kmp_routine_entry_t routine; 3112 // kmp_int32 part_id; 3113 // kmp_routine_entry_t destructors; 3114 // For taskloops additional fields: 3115 // kmp_uint64 lb; 3116 // kmp_uint64 ub; 3117 // kmp_int64 st; 3118 // kmp_int32 liter; 3119 // }; 3120 auto *RD = C.buildImplicitRecord("kmp_task_t"); 3121 RD->startDefinition(); 3122 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3123 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3124 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3125 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3126 if (isOpenMPTaskLoopDirective(Kind)) { 3127 QualType KmpUInt64Ty = 3128 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3129 QualType KmpInt64Ty = 3130 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3131 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3132 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3133 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3134 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3135 } 3136 RD->completeDefinition(); 3137 return RD; 3138 } 3139 3140 static RecordDecl * 3141 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3142 ArrayRef<PrivateDataTy> Privates) { 3143 auto &C = CGM.getContext(); 3144 // Build struct kmp_task_t_with_privates { 3145 // kmp_task_t task_data; 3146 // .kmp_privates_t. privates; 3147 // }; 3148 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3149 RD->startDefinition(); 3150 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3151 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 3152 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3153 } 3154 RD->completeDefinition(); 3155 return RD; 3156 } 3157 3158 /// \brief Emit a proxy function which accepts kmp_task_t as the second 3159 /// argument. 3160 /// \code 3161 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3162 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3163 /// For taskloops: 3164 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3165 /// tt->shareds); 3166 /// return 0; 3167 /// } 3168 /// \endcode 3169 static llvm::Value * 3170 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3171 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3172 QualType KmpTaskTWithPrivatesPtrQTy, 3173 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3174 QualType SharedsPtrTy, llvm::Value *TaskFunction, 3175 llvm::Value *TaskPrivatesMap) { 3176 auto &C = CGM.getContext(); 3177 FunctionArgList Args; 3178 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 3179 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 3180 /*Id=*/nullptr, 3181 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 3182 Args.push_back(&GtidArg); 3183 Args.push_back(&TaskTypeArg); 3184 auto &TaskEntryFnInfo = 3185 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3186 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3187 auto *TaskEntry = 3188 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 3189 ".omp_task_entry.", &CGM.getModule()); 3190 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); 3191 CodeGenFunction CGF(CGM); 3192 CGF.disableDebugInfo(); 3193 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 3194 3195 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3196 // tt, 3197 // For taskloops: 3198 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3199 // tt->task_data.shareds); 3200 auto *GtidParam = CGF.EmitLoadOfScalar( 3201 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3202 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3203 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3204 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3205 auto *KmpTaskTWithPrivatesQTyRD = 3206 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3207 LValue Base = 3208 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3209 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3210 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3211 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3212 auto *PartidParam = PartIdLVal.getPointer(); 3213 3214 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3215 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3216 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3217 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 3218 CGF.ConvertTypeForMem(SharedsPtrTy)); 3219 3220 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3221 llvm::Value *PrivatesParam; 3222 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3223 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3224 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3225 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 3226 } else 3227 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3228 3229 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3230 TaskPrivatesMap, 3231 CGF.Builder 3232 .CreatePointerBitCastOrAddrSpaceCast( 3233 TDBase.getAddress(), CGF.VoidPtrTy) 3234 .getPointer()}; 3235 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3236 std::end(CommonArgs)); 3237 if (isOpenMPTaskLoopDirective(Kind)) { 3238 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3239 auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3240 auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal(); 3241 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3242 auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3243 auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal(); 3244 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3245 auto StLVal = CGF.EmitLValueForField(Base, *StFI); 3246 auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal(); 3247 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3248 auto LILVal = CGF.EmitLValueForField(Base, *LIFI); 3249 auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); 3250 CallArgs.push_back(LBParam); 3251 CallArgs.push_back(UBParam); 3252 CallArgs.push_back(StParam); 3253 CallArgs.push_back(LIParam); 3254 } 3255 CallArgs.push_back(SharedsParam); 3256 3257 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 3258 CGF.EmitStoreThroughLValue( 3259 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3260 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3261 CGF.FinishFunction(); 3262 return TaskEntry; 3263 } 3264 3265 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3266 SourceLocation Loc, 3267 QualType KmpInt32Ty, 3268 QualType KmpTaskTWithPrivatesPtrQTy, 3269 QualType KmpTaskTWithPrivatesQTy) { 3270 auto &C = CGM.getContext(); 3271 FunctionArgList Args; 3272 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 3273 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 3274 /*Id=*/nullptr, 3275 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 3276 Args.push_back(&GtidArg); 3277 Args.push_back(&TaskTypeArg); 3278 FunctionType::ExtInfo Info; 3279 auto &DestructorFnInfo = 3280 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3281 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 3282 auto *DestructorFn = 3283 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3284 ".omp_task_destructor.", &CGM.getModule()); 3285 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, 3286 DestructorFnInfo); 3287 CodeGenFunction CGF(CGM); 3288 CGF.disableDebugInfo(); 3289 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3290 Args); 3291 3292 LValue Base = CGF.EmitLoadOfPointerLValue( 3293 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3294 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3295 auto *KmpTaskTWithPrivatesQTyRD = 3296 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3297 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3298 Base = CGF.EmitLValueForField(Base, *FI); 3299 for (auto *Field : 3300 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3301 if (auto DtorKind = Field->getType().isDestructedType()) { 3302 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 3303 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 3304 } 3305 } 3306 CGF.FinishFunction(); 3307 return DestructorFn; 3308 } 3309 3310 /// \brief Emit a privates mapping function for correct handling of private and 3311 /// firstprivate variables. 3312 /// \code 3313 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3314 /// **noalias priv1,..., <tyn> **noalias privn) { 3315 /// *priv1 = &.privates.priv1; 3316 /// ...; 3317 /// *privn = &.privates.privn; 3318 /// } 3319 /// \endcode 3320 static llvm::Value * 3321 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3322 ArrayRef<const Expr *> PrivateVars, 3323 ArrayRef<const Expr *> FirstprivateVars, 3324 ArrayRef<const Expr *> LastprivateVars, 3325 QualType PrivatesQTy, 3326 ArrayRef<PrivateDataTy> Privates) { 3327 auto &C = CGM.getContext(); 3328 FunctionArgList Args; 3329 ImplicitParamDecl TaskPrivatesArg( 3330 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3331 C.getPointerType(PrivatesQTy).withConst().withRestrict()); 3332 Args.push_back(&TaskPrivatesArg); 3333 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3334 unsigned Counter = 1; 3335 for (auto *E: PrivateVars) { 3336 Args.push_back(ImplicitParamDecl::Create( 3337 C, /*DC=*/nullptr, Loc, 3338 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3339 .withConst() 3340 .withRestrict())); 3341 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3342 PrivateVarsPos[VD] = Counter; 3343 ++Counter; 3344 } 3345 for (auto *E : FirstprivateVars) { 3346 Args.push_back(ImplicitParamDecl::Create( 3347 C, /*DC=*/nullptr, Loc, 3348 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3349 .withConst() 3350 .withRestrict())); 3351 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3352 PrivateVarsPos[VD] = Counter; 3353 ++Counter; 3354 } 3355 for (auto *E: LastprivateVars) { 3356 Args.push_back(ImplicitParamDecl::Create( 3357 C, /*DC=*/nullptr, Loc, 3358 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3359 .withConst() 3360 .withRestrict())); 3361 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3362 PrivateVarsPos[VD] = Counter; 3363 ++Counter; 3364 } 3365 auto &TaskPrivatesMapFnInfo = 3366 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3367 auto *TaskPrivatesMapTy = 3368 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3369 auto *TaskPrivatesMap = llvm::Function::Create( 3370 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 3371 ".omp_task_privates_map.", &CGM.getModule()); 3372 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, 3373 TaskPrivatesMapFnInfo); 3374 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3375 CodeGenFunction CGF(CGM); 3376 CGF.disableDebugInfo(); 3377 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3378 TaskPrivatesMapFnInfo, Args); 3379 3380 // *privi = &.privates.privi; 3381 LValue Base = CGF.EmitLoadOfPointerLValue( 3382 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3383 TaskPrivatesArg.getType()->castAs<PointerType>()); 3384 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3385 Counter = 0; 3386 for (auto *Field : PrivatesQTyRD->fields()) { 3387 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 3388 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3389 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3390 auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3391 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 3392 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 3393 ++Counter; 3394 } 3395 CGF.FinishFunction(); 3396 return TaskPrivatesMap; 3397 } 3398 3399 static int array_pod_sort_comparator(const PrivateDataTy *P1, 3400 const PrivateDataTy *P2) { 3401 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 3402 } 3403 3404 /// Emit initialization for private variables in task-based directives. 3405 static void emitPrivatesInit(CodeGenFunction &CGF, 3406 const OMPExecutableDirective &D, 3407 Address KmpTaskSharedsPtr, LValue TDBase, 3408 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3409 QualType SharedsTy, QualType SharedsPtrTy, 3410 const OMPTaskDataTy &Data, 3411 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3412 auto &C = CGF.getContext(); 3413 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3414 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3415 LValue SrcBase; 3416 if (!Data.FirstprivateVars.empty()) { 3417 SrcBase = CGF.MakeAddrLValue( 3418 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3419 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3420 SharedsTy); 3421 } 3422 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 3423 cast<CapturedStmt>(*D.getAssociatedStmt())); 3424 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3425 for (auto &&Pair : Privates) { 3426 auto *VD = Pair.second.PrivateCopy; 3427 auto *Init = VD->getAnyInitializer(); 3428 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3429 !CGF.isTrivialInitializer(Init)))) { 3430 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3431 if (auto *Elem = Pair.second.PrivateElemInit) { 3432 auto *OriginalVD = Pair.second.Original; 3433 auto *SharedField = CapturesInfo.lookup(OriginalVD); 3434 auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3435 SharedRefLValue = CGF.MakeAddrLValue( 3436 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 3437 SharedRefLValue.getType(), AlignmentSource::Decl); 3438 QualType Type = OriginalVD->getType(); 3439 if (Type->isArrayType()) { 3440 // Initialize firstprivate array. 3441 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3442 // Perform simple memcpy. 3443 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 3444 SharedRefLValue.getAddress(), Type); 3445 } else { 3446 // Initialize firstprivate array using element-by-element 3447 // intialization. 3448 CGF.EmitOMPAggregateAssign( 3449 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 3450 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3451 Address SrcElement) { 3452 // Clean up any temporaries needed by the initialization. 3453 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3454 InitScope.addPrivate( 3455 Elem, [SrcElement]() -> Address { return SrcElement; }); 3456 (void)InitScope.Privatize(); 3457 // Emit initialization for single element. 3458 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3459 CGF, &CapturesInfo); 3460 CGF.EmitAnyExprToMem(Init, DestElement, 3461 Init->getType().getQualifiers(), 3462 /*IsInitializer=*/false); 3463 }); 3464 } 3465 } else { 3466 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3467 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 3468 return SharedRefLValue.getAddress(); 3469 }); 3470 (void)InitScope.Privatize(); 3471 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3472 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3473 /*capturedByInit=*/false); 3474 } 3475 } else 3476 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3477 } 3478 ++FI; 3479 } 3480 } 3481 3482 /// Check if duplication function is required for taskloops. 3483 static bool checkInitIsRequired(CodeGenFunction &CGF, 3484 ArrayRef<PrivateDataTy> Privates) { 3485 bool InitRequired = false; 3486 for (auto &&Pair : Privates) { 3487 auto *VD = Pair.second.PrivateCopy; 3488 auto *Init = VD->getAnyInitializer(); 3489 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3490 !CGF.isTrivialInitializer(Init)); 3491 } 3492 return InitRequired; 3493 } 3494 3495 3496 /// Emit task_dup function (for initialization of 3497 /// private/firstprivate/lastprivate vars and last_iter flag) 3498 /// \code 3499 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3500 /// lastpriv) { 3501 /// // setup lastprivate flag 3502 /// task_dst->last = lastpriv; 3503 /// // could be constructor calls here... 3504 /// } 3505 /// \endcode 3506 static llvm::Value * 3507 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3508 const OMPExecutableDirective &D, 3509 QualType KmpTaskTWithPrivatesPtrQTy, 3510 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3511 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3512 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3513 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3514 auto &C = CGM.getContext(); 3515 FunctionArgList Args; 3516 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, 3517 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 3518 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, 3519 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 3520 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, 3521 /*Id=*/nullptr, C.IntTy); 3522 Args.push_back(&DstArg); 3523 Args.push_back(&SrcArg); 3524 Args.push_back(&LastprivArg); 3525 auto &TaskDupFnInfo = 3526 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3527 auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3528 auto *TaskDup = 3529 llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, 3530 ".omp_task_dup.", &CGM.getModule()); 3531 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo); 3532 CodeGenFunction CGF(CGM); 3533 CGF.disableDebugInfo(); 3534 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args); 3535 3536 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3537 CGF.GetAddrOfLocalVar(&DstArg), 3538 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3539 // task_dst->liter = lastpriv; 3540 if (WithLastIter) { 3541 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3542 LValue Base = CGF.EmitLValueForField( 3543 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3544 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3545 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3546 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3547 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3548 } 3549 3550 // Emit initial values for private copies (if any). 3551 assert(!Privates.empty()); 3552 Address KmpTaskSharedsPtr = Address::invalid(); 3553 if (!Data.FirstprivateVars.empty()) { 3554 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3555 CGF.GetAddrOfLocalVar(&SrcArg), 3556 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3557 LValue Base = CGF.EmitLValueForField( 3558 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3559 KmpTaskSharedsPtr = Address( 3560 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3561 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3562 KmpTaskTShareds)), 3563 Loc), 3564 CGF.getNaturalTypeAlignment(SharedsTy)); 3565 } 3566 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3567 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3568 CGF.FinishFunction(); 3569 return TaskDup; 3570 } 3571 3572 /// Checks if destructor function is required to be generated. 3573 /// \return true if cleanups are required, false otherwise. 3574 static bool 3575 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 3576 bool NeedsCleanup = false; 3577 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3578 auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 3579 for (auto *FD : PrivateRD->fields()) { 3580 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 3581 if (NeedsCleanup) 3582 break; 3583 } 3584 return NeedsCleanup; 3585 } 3586 3587 CGOpenMPRuntime::TaskResultTy 3588 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 3589 const OMPExecutableDirective &D, 3590 llvm::Value *TaskFunction, QualType SharedsTy, 3591 Address Shareds, const OMPTaskDataTy &Data) { 3592 auto &C = CGM.getContext(); 3593 llvm::SmallVector<PrivateDataTy, 4> Privates; 3594 // Aggregate privates and sort them by the alignment. 3595 auto I = Data.PrivateCopies.begin(); 3596 for (auto *E : Data.PrivateVars) { 3597 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3598 Privates.push_back(std::make_pair( 3599 C.getDeclAlign(VD), 3600 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3601 /*PrivateElemInit=*/nullptr))); 3602 ++I; 3603 } 3604 I = Data.FirstprivateCopies.begin(); 3605 auto IElemInitRef = Data.FirstprivateInits.begin(); 3606 for (auto *E : Data.FirstprivateVars) { 3607 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3608 Privates.push_back(std::make_pair( 3609 C.getDeclAlign(VD), 3610 PrivateHelpersTy( 3611 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3612 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 3613 ++I; 3614 ++IElemInitRef; 3615 } 3616 I = Data.LastprivateCopies.begin(); 3617 for (auto *E : Data.LastprivateVars) { 3618 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3619 Privates.push_back(std::make_pair( 3620 C.getDeclAlign(VD), 3621 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3622 /*PrivateElemInit=*/nullptr))); 3623 ++I; 3624 } 3625 llvm::array_pod_sort(Privates.begin(), Privates.end(), 3626 array_pod_sort_comparator); 3627 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3628 // Build type kmp_routine_entry_t (if not built yet). 3629 emitKmpRoutineEntryT(KmpInt32Ty); 3630 // Build type kmp_task_t (if not built yet). 3631 if (KmpTaskTQTy.isNull()) { 3632 KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3633 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3634 } 3635 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3636 // Build particular struct kmp_task_t for the given task. 3637 auto *KmpTaskTWithPrivatesQTyRD = 3638 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 3639 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 3640 QualType KmpTaskTWithPrivatesPtrQTy = 3641 C.getPointerType(KmpTaskTWithPrivatesQTy); 3642 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 3643 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 3644 auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 3645 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 3646 3647 // Emit initial values for private copies (if any). 3648 llvm::Value *TaskPrivatesMap = nullptr; 3649 auto *TaskPrivatesMapTy = 3650 std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(), 3651 3) 3652 ->getType(); 3653 if (!Privates.empty()) { 3654 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3655 TaskPrivatesMap = emitTaskPrivateMappingFunction( 3656 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 3657 FI->getType(), Privates); 3658 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3659 TaskPrivatesMap, TaskPrivatesMapTy); 3660 } else { 3661 TaskPrivatesMap = llvm::ConstantPointerNull::get( 3662 cast<llvm::PointerType>(TaskPrivatesMapTy)); 3663 } 3664 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 3665 // kmp_task_t *tt); 3666 auto *TaskEntry = emitProxyTaskFunction( 3667 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3668 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 3669 TaskPrivatesMap); 3670 3671 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 3672 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3673 // kmp_routine_entry_t *task_entry); 3674 // Task flags. Format is taken from 3675 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 3676 // description of kmp_tasking_flags struct. 3677 enum { 3678 TiedFlag = 0x1, 3679 FinalFlag = 0x2, 3680 DestructorsFlag = 0x8, 3681 PriorityFlag = 0x20 3682 }; 3683 unsigned Flags = Data.Tied ? TiedFlag : 0; 3684 bool NeedsCleanup = false; 3685 if (!Privates.empty()) { 3686 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 3687 if (NeedsCleanup) 3688 Flags = Flags | DestructorsFlag; 3689 } 3690 if (Data.Priority.getInt()) 3691 Flags = Flags | PriorityFlag; 3692 auto *TaskFlags = 3693 Data.Final.getPointer() 3694 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 3695 CGF.Builder.getInt32(FinalFlag), 3696 CGF.Builder.getInt32(/*C=*/0)) 3697 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 3698 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 3699 auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 3700 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 3701 getThreadID(CGF, Loc), TaskFlags, 3702 KmpTaskTWithPrivatesTySize, SharedsSize, 3703 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3704 TaskEntry, KmpRoutineEntryPtrTy)}; 3705 auto *NewTask = CGF.EmitRuntimeCall( 3706 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 3707 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3708 NewTask, KmpTaskTWithPrivatesPtrTy); 3709 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 3710 KmpTaskTWithPrivatesQTy); 3711 LValue TDBase = 3712 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3713 // Fill the data in the resulting kmp_task_t record. 3714 // Copy shareds if there are any. 3715 Address KmpTaskSharedsPtr = Address::invalid(); 3716 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 3717 KmpTaskSharedsPtr = 3718 Address(CGF.EmitLoadOfScalar( 3719 CGF.EmitLValueForField( 3720 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 3721 KmpTaskTShareds)), 3722 Loc), 3723 CGF.getNaturalTypeAlignment(SharedsTy)); 3724 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 3725 } 3726 // Emit initial values for private copies (if any). 3727 TaskResultTy Result; 3728 if (!Privates.empty()) { 3729 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 3730 SharedsTy, SharedsPtrTy, Data, Privates, 3731 /*ForDup=*/false); 3732 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 3733 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 3734 Result.TaskDupFn = emitTaskDupFunction( 3735 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 3736 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 3737 /*WithLastIter=*/!Data.LastprivateVars.empty()); 3738 } 3739 } 3740 // Provide pointer to function with destructors for privates. 3741 llvm::Value *DestructorFn = 3742 NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty, 3743 KmpTaskTWithPrivatesPtrQTy, 3744 KmpTaskTWithPrivatesQTy) 3745 : llvm::ConstantPointerNull::get( 3746 cast<llvm::PointerType>(KmpRoutineEntryPtrTy)); 3747 LValue Destructor = CGF.EmitLValueForField( 3748 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors)); 3749 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3750 DestructorFn, KmpRoutineEntryPtrTy), 3751 Destructor); 3752 Result.NewTask = NewTask; 3753 Result.TaskEntry = TaskEntry; 3754 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 3755 Result.TDBase = TDBase; 3756 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 3757 return Result; 3758 } 3759 3760 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 3761 const OMPExecutableDirective &D, 3762 llvm::Value *TaskFunction, 3763 QualType SharedsTy, Address Shareds, 3764 const Expr *IfCond, 3765 const OMPTaskDataTy &Data) { 3766 if (!CGF.HaveInsertPoint()) 3767 return; 3768 3769 TaskResultTy Result = 3770 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 3771 llvm::Value *NewTask = Result.NewTask; 3772 llvm::Value *TaskEntry = Result.TaskEntry; 3773 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 3774 LValue TDBase = Result.TDBase; 3775 RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 3776 auto &C = CGM.getContext(); 3777 // Process list of dependences. 3778 Address DependenciesArray = Address::invalid(); 3779 unsigned NumDependencies = Data.Dependences.size(); 3780 if (NumDependencies) { 3781 // Dependence kind for RTL. 3782 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 3783 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 3784 RecordDecl *KmpDependInfoRD; 3785 QualType FlagsTy = 3786 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 3787 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 3788 if (KmpDependInfoTy.isNull()) { 3789 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 3790 KmpDependInfoRD->startDefinition(); 3791 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 3792 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 3793 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 3794 KmpDependInfoRD->completeDefinition(); 3795 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 3796 } else 3797 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 3798 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 3799 // Define type kmp_depend_info[<Dependences.size()>]; 3800 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 3801 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 3802 ArrayType::Normal, /*IndexTypeQuals=*/0); 3803 // kmp_depend_info[<Dependences.size()>] deps; 3804 DependenciesArray = 3805 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 3806 for (unsigned i = 0; i < NumDependencies; ++i) { 3807 const Expr *E = Data.Dependences[i].second; 3808 auto Addr = CGF.EmitLValue(E); 3809 llvm::Value *Size; 3810 QualType Ty = E->getType(); 3811 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 3812 LValue UpAddrLVal = 3813 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 3814 llvm::Value *UpAddr = 3815 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 3816 llvm::Value *LowIntPtr = 3817 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 3818 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 3819 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3820 } else 3821 Size = CGF.getTypeSize(Ty); 3822 auto Base = CGF.MakeAddrLValue( 3823 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 3824 KmpDependInfoTy); 3825 // deps[i].base_addr = &<Dependences[i].second>; 3826 auto BaseAddrLVal = CGF.EmitLValueForField( 3827 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 3828 CGF.EmitStoreOfScalar( 3829 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 3830 BaseAddrLVal); 3831 // deps[i].len = sizeof(<Dependences[i].second>); 3832 auto LenLVal = CGF.EmitLValueForField( 3833 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 3834 CGF.EmitStoreOfScalar(Size, LenLVal); 3835 // deps[i].flags = <Dependences[i].first>; 3836 RTLDependenceKindTy DepKind; 3837 switch (Data.Dependences[i].first) { 3838 case OMPC_DEPEND_in: 3839 DepKind = DepIn; 3840 break; 3841 // Out and InOut dependencies must use the same code. 3842 case OMPC_DEPEND_out: 3843 case OMPC_DEPEND_inout: 3844 DepKind = DepInOut; 3845 break; 3846 case OMPC_DEPEND_source: 3847 case OMPC_DEPEND_sink: 3848 case OMPC_DEPEND_unknown: 3849 llvm_unreachable("Unknown task dependence type"); 3850 } 3851 auto FlagsLVal = CGF.EmitLValueForField( 3852 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 3853 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 3854 FlagsLVal); 3855 } 3856 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3857 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 3858 CGF.VoidPtrTy); 3859 } 3860 3861 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 3862 // libcall. 3863 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 3864 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 3865 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 3866 // list is not empty 3867 auto *ThreadID = getThreadID(CGF, Loc); 3868 auto *UpLoc = emitUpdateLocation(CGF, Loc); 3869 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 3870 llvm::Value *DepTaskArgs[7]; 3871 if (NumDependencies) { 3872 DepTaskArgs[0] = UpLoc; 3873 DepTaskArgs[1] = ThreadID; 3874 DepTaskArgs[2] = NewTask; 3875 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 3876 DepTaskArgs[4] = DependenciesArray.getPointer(); 3877 DepTaskArgs[5] = CGF.Builder.getInt32(0); 3878 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3879 } 3880 auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD, 3881 NumDependencies, &TaskArgs, 3882 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 3883 if (!Data.Tied) { 3884 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3885 auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 3886 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 3887 } 3888 if (NumDependencies) { 3889 CGF.EmitRuntimeCall( 3890 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 3891 } else { 3892 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 3893 TaskArgs); 3894 } 3895 // Check if parent region is untied and build return for untied task; 3896 if (auto *Region = 3897 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3898 Region->emitUntiedSwitch(CGF); 3899 }; 3900 3901 llvm::Value *DepWaitTaskArgs[6]; 3902 if (NumDependencies) { 3903 DepWaitTaskArgs[0] = UpLoc; 3904 DepWaitTaskArgs[1] = ThreadID; 3905 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 3906 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 3907 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 3908 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3909 } 3910 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 3911 NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, 3912 PrePostActionTy &) { 3913 auto &RT = CGF.CGM.getOpenMPRuntime(); 3914 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 3915 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 3916 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 3917 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 3918 // is specified. 3919 if (NumDependencies) 3920 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 3921 DepWaitTaskArgs); 3922 // Call proxy_task_entry(gtid, new_task); 3923 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( 3924 CodeGenFunction &CGF, PrePostActionTy &Action) { 3925 Action.Enter(CGF); 3926 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 3927 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 3928 }; 3929 3930 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 3931 // kmp_task_t *new_task); 3932 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 3933 // kmp_task_t *new_task); 3934 RegionCodeGenTy RCG(CodeGen); 3935 CommonActionTy Action( 3936 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 3937 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 3938 RCG.setAction(Action); 3939 RCG(CGF); 3940 }; 3941 3942 if (IfCond) 3943 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 3944 else { 3945 RegionCodeGenTy ThenRCG(ThenCodeGen); 3946 ThenRCG(CGF); 3947 } 3948 } 3949 3950 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 3951 const OMPLoopDirective &D, 3952 llvm::Value *TaskFunction, 3953 QualType SharedsTy, Address Shareds, 3954 const Expr *IfCond, 3955 const OMPTaskDataTy &Data) { 3956 if (!CGF.HaveInsertPoint()) 3957 return; 3958 TaskResultTy Result = 3959 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 3960 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 3961 // libcall. 3962 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 3963 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 3964 // sched, kmp_uint64 grainsize, void *task_dup); 3965 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3966 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 3967 llvm::Value *IfVal; 3968 if (IfCond) { 3969 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 3970 /*isSigned=*/true); 3971 } else 3972 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 3973 3974 LValue LBLVal = CGF.EmitLValueForField( 3975 Result.TDBase, 3976 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 3977 auto *LBVar = 3978 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 3979 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 3980 /*IsInitializer=*/true); 3981 LValue UBLVal = CGF.EmitLValueForField( 3982 Result.TDBase, 3983 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 3984 auto *UBVar = 3985 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 3986 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 3987 /*IsInitializer=*/true); 3988 LValue StLVal = CGF.EmitLValueForField( 3989 Result.TDBase, 3990 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 3991 auto *StVar = 3992 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 3993 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 3994 /*IsInitializer=*/true); 3995 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 3996 llvm::Value *TaskArgs[] = { 3997 UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(), 3998 UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()), 3999 llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0), 4000 llvm::ConstantInt::getSigned( 4001 CGF.IntTy, Data.Schedule.getPointer() 4002 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4003 : NoSchedule), 4004 Data.Schedule.getPointer() 4005 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4006 /*isSigned=*/false) 4007 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 4008 Result.TaskDupFn 4009 ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn, 4010 CGF.VoidPtrTy) 4011 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 4012 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 4013 } 4014 4015 /// \brief Emit reduction operation for each element of array (required for 4016 /// array sections) LHS op = RHS. 4017 /// \param Type Type of array. 4018 /// \param LHSVar Variable on the left side of the reduction operation 4019 /// (references element of array in original variable). 4020 /// \param RHSVar Variable on the right side of the reduction operation 4021 /// (references element of array in original variable). 4022 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4023 /// RHSVar. 4024 static void EmitOMPAggregateReduction( 4025 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4026 const VarDecl *RHSVar, 4027 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4028 const Expr *, const Expr *)> &RedOpGen, 4029 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4030 const Expr *UpExpr = nullptr) { 4031 // Perform element-by-element initialization. 4032 QualType ElementTy; 4033 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4034 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4035 4036 // Drill down to the base element type on both arrays. 4037 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 4038 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4039 4040 auto RHSBegin = RHSAddr.getPointer(); 4041 auto LHSBegin = LHSAddr.getPointer(); 4042 // Cast from pointer to array type to pointer to single element. 4043 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 4044 // The basic structure here is a while-do loop. 4045 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4046 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4047 auto IsEmpty = 4048 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4049 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4050 4051 // Enter the loop body, making that address the current address. 4052 auto EntryBB = CGF.Builder.GetInsertBlock(); 4053 CGF.EmitBlock(BodyBB); 4054 4055 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4056 4057 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4058 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4059 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4060 Address RHSElementCurrent = 4061 Address(RHSElementPHI, 4062 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4063 4064 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4065 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4066 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4067 Address LHSElementCurrent = 4068 Address(LHSElementPHI, 4069 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4070 4071 // Emit copy. 4072 CodeGenFunction::OMPPrivateScope Scope(CGF); 4073 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 4074 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 4075 Scope.Privatize(); 4076 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4077 Scope.ForceCleanup(); 4078 4079 // Shift the address forward by one element. 4080 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4081 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 4082 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4083 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 4084 // Check whether we've reached the end. 4085 auto Done = 4086 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4087 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4088 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4089 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4090 4091 // Done. 4092 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4093 } 4094 4095 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4096 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4097 /// UDR combiner function. 4098 static void emitReductionCombiner(CodeGenFunction &CGF, 4099 const Expr *ReductionOp) { 4100 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4101 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4102 if (auto *DRE = 4103 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4104 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4105 std::pair<llvm::Function *, llvm::Function *> Reduction = 4106 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4107 RValue Func = RValue::get(Reduction.first); 4108 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 4109 CGF.EmitIgnoredExpr(ReductionOp); 4110 return; 4111 } 4112 CGF.EmitIgnoredExpr(ReductionOp); 4113 } 4114 4115 static llvm::Value *emitReductionFunction(CodeGenModule &CGM, 4116 llvm::Type *ArgsType, 4117 ArrayRef<const Expr *> Privates, 4118 ArrayRef<const Expr *> LHSExprs, 4119 ArrayRef<const Expr *> RHSExprs, 4120 ArrayRef<const Expr *> ReductionOps) { 4121 auto &C = CGM.getContext(); 4122 4123 // void reduction_func(void *LHSArg, void *RHSArg); 4124 FunctionArgList Args; 4125 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 4126 C.VoidPtrTy); 4127 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 4128 C.VoidPtrTy); 4129 Args.push_back(&LHSArg); 4130 Args.push_back(&RHSArg); 4131 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4132 auto *Fn = llvm::Function::Create( 4133 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 4134 ".omp.reduction.reduction_func", &CGM.getModule()); 4135 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 4136 CodeGenFunction CGF(CGM); 4137 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 4138 4139 // Dst = (void*[n])(LHSArg); 4140 // Src = (void*[n])(RHSArg); 4141 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4142 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 4143 ArgsType), CGF.getPointerAlign()); 4144 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4145 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 4146 ArgsType), CGF.getPointerAlign()); 4147 4148 // ... 4149 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 4150 // ... 4151 CodeGenFunction::OMPPrivateScope Scope(CGF); 4152 auto IPriv = Privates.begin(); 4153 unsigned Idx = 0; 4154 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 4155 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 4156 Scope.addPrivate(RHSVar, [&]() -> Address { 4157 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 4158 }); 4159 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 4160 Scope.addPrivate(LHSVar, [&]() -> Address { 4161 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 4162 }); 4163 QualType PrivTy = (*IPriv)->getType(); 4164 if (PrivTy->isVariablyModifiedType()) { 4165 // Get array size and emit VLA type. 4166 ++Idx; 4167 Address Elem = 4168 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 4169 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 4170 auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); 4171 auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 4172 CodeGenFunction::OpaqueValueMapping OpaqueMap( 4173 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 4174 CGF.EmitVariablyModifiedType(PrivTy); 4175 } 4176 } 4177 Scope.Privatize(); 4178 IPriv = Privates.begin(); 4179 auto ILHS = LHSExprs.begin(); 4180 auto IRHS = RHSExprs.begin(); 4181 for (auto *E : ReductionOps) { 4182 if ((*IPriv)->getType()->isArrayType()) { 4183 // Emit reduction for array section. 4184 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4185 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4186 EmitOMPAggregateReduction( 4187 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4188 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4189 emitReductionCombiner(CGF, E); 4190 }); 4191 } else 4192 // Emit reduction for array subscript or single variable. 4193 emitReductionCombiner(CGF, E); 4194 ++IPriv; 4195 ++ILHS; 4196 ++IRHS; 4197 } 4198 Scope.ForceCleanup(); 4199 CGF.FinishFunction(); 4200 return Fn; 4201 } 4202 4203 static void emitSingleReductionCombiner(CodeGenFunction &CGF, 4204 const Expr *ReductionOp, 4205 const Expr *PrivateRef, 4206 const DeclRefExpr *LHS, 4207 const DeclRefExpr *RHS) { 4208 if (PrivateRef->getType()->isArrayType()) { 4209 // Emit reduction for array section. 4210 auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 4211 auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 4212 EmitOMPAggregateReduction( 4213 CGF, PrivateRef->getType(), LHSVar, RHSVar, 4214 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4215 emitReductionCombiner(CGF, ReductionOp); 4216 }); 4217 } else 4218 // Emit reduction for array subscript or single variable. 4219 emitReductionCombiner(CGF, ReductionOp); 4220 } 4221 4222 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 4223 ArrayRef<const Expr *> Privates, 4224 ArrayRef<const Expr *> LHSExprs, 4225 ArrayRef<const Expr *> RHSExprs, 4226 ArrayRef<const Expr *> ReductionOps, 4227 bool WithNowait, bool SimpleReduction) { 4228 if (!CGF.HaveInsertPoint()) 4229 return; 4230 // Next code should be emitted for reduction: 4231 // 4232 // static kmp_critical_name lock = { 0 }; 4233 // 4234 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 4235 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 4236 // ... 4237 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 4238 // *(Type<n>-1*)rhs[<n>-1]); 4239 // } 4240 // 4241 // ... 4242 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 4243 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4244 // RedList, reduce_func, &<lock>)) { 4245 // case 1: 4246 // ... 4247 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4248 // ... 4249 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4250 // break; 4251 // case 2: 4252 // ... 4253 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4254 // ... 4255 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 4256 // break; 4257 // default:; 4258 // } 4259 // 4260 // if SimpleReduction is true, only the next code is generated: 4261 // ... 4262 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4263 // ... 4264 4265 auto &C = CGM.getContext(); 4266 4267 if (SimpleReduction) { 4268 CodeGenFunction::RunCleanupsScope Scope(CGF); 4269 auto IPriv = Privates.begin(); 4270 auto ILHS = LHSExprs.begin(); 4271 auto IRHS = RHSExprs.begin(); 4272 for (auto *E : ReductionOps) { 4273 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4274 cast<DeclRefExpr>(*IRHS)); 4275 ++IPriv; 4276 ++ILHS; 4277 ++IRHS; 4278 } 4279 return; 4280 } 4281 4282 // 1. Build a list of reduction variables. 4283 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 4284 auto Size = RHSExprs.size(); 4285 for (auto *E : Privates) { 4286 if (E->getType()->isVariablyModifiedType()) 4287 // Reserve place for array size. 4288 ++Size; 4289 } 4290 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 4291 QualType ReductionArrayTy = 4292 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 4293 /*IndexTypeQuals=*/0); 4294 Address ReductionList = 4295 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 4296 auto IPriv = Privates.begin(); 4297 unsigned Idx = 0; 4298 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 4299 Address Elem = 4300 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 4301 CGF.Builder.CreateStore( 4302 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4303 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 4304 Elem); 4305 if ((*IPriv)->getType()->isVariablyModifiedType()) { 4306 // Store array size. 4307 ++Idx; 4308 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 4309 CGF.getPointerSize()); 4310 llvm::Value *Size = CGF.Builder.CreateIntCast( 4311 CGF.getVLASize( 4312 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 4313 .first, 4314 CGF.SizeTy, /*isSigned=*/false); 4315 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 4316 Elem); 4317 } 4318 } 4319 4320 // 2. Emit reduce_func(). 4321 auto *ReductionFn = emitReductionFunction( 4322 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 4323 LHSExprs, RHSExprs, ReductionOps); 4324 4325 // 3. Create static kmp_critical_name lock = { 0 }; 4326 auto *Lock = getCriticalRegionLock(".reduction"); 4327 4328 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4329 // RedList, reduce_func, &<lock>); 4330 auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 4331 auto *ThreadId = getThreadID(CGF, Loc); 4332 auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 4333 auto *RL = 4334 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(), 4335 CGF.VoidPtrTy); 4336 llvm::Value *Args[] = { 4337 IdentTLoc, // ident_t *<loc> 4338 ThreadId, // i32 <gtid> 4339 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 4340 ReductionArrayTySize, // size_type sizeof(RedList) 4341 RL, // void *RedList 4342 ReductionFn, // void (*) (void *, void *) <reduce_func> 4343 Lock // kmp_critical_name *&<lock> 4344 }; 4345 auto Res = CGF.EmitRuntimeCall( 4346 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 4347 : OMPRTL__kmpc_reduce), 4348 Args); 4349 4350 // 5. Build switch(res) 4351 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 4352 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 4353 4354 // 6. Build case 1: 4355 // ... 4356 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4357 // ... 4358 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4359 // break; 4360 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 4361 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 4362 CGF.EmitBlock(Case1BB); 4363 4364 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4365 llvm::Value *EndArgs[] = { 4366 IdentTLoc, // ident_t *<loc> 4367 ThreadId, // i32 <gtid> 4368 Lock // kmp_critical_name *&<lock> 4369 }; 4370 auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 4371 CodeGenFunction &CGF, PrePostActionTy &Action) { 4372 auto IPriv = Privates.begin(); 4373 auto ILHS = LHSExprs.begin(); 4374 auto IRHS = RHSExprs.begin(); 4375 for (auto *E : ReductionOps) { 4376 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4377 cast<DeclRefExpr>(*IRHS)); 4378 ++IPriv; 4379 ++ILHS; 4380 ++IRHS; 4381 } 4382 }; 4383 RegionCodeGenTy RCG(CodeGen); 4384 CommonActionTy Action( 4385 nullptr, llvm::None, 4386 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 4387 : OMPRTL__kmpc_end_reduce), 4388 EndArgs); 4389 RCG.setAction(Action); 4390 RCG(CGF); 4391 4392 CGF.EmitBranch(DefaultBB); 4393 4394 // 7. Build case 2: 4395 // ... 4396 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4397 // ... 4398 // break; 4399 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 4400 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 4401 CGF.EmitBlock(Case2BB); 4402 4403 auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 4404 CodeGenFunction &CGF, PrePostActionTy &Action) { 4405 auto ILHS = LHSExprs.begin(); 4406 auto IRHS = RHSExprs.begin(); 4407 auto IPriv = Privates.begin(); 4408 for (auto *E : ReductionOps) { 4409 const Expr *XExpr = nullptr; 4410 const Expr *EExpr = nullptr; 4411 const Expr *UpExpr = nullptr; 4412 BinaryOperatorKind BO = BO_Comma; 4413 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 4414 if (BO->getOpcode() == BO_Assign) { 4415 XExpr = BO->getLHS(); 4416 UpExpr = BO->getRHS(); 4417 } 4418 } 4419 // Try to emit update expression as a simple atomic. 4420 auto *RHSExpr = UpExpr; 4421 if (RHSExpr) { 4422 // Analyze RHS part of the whole expression. 4423 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 4424 RHSExpr->IgnoreParenImpCasts())) { 4425 // If this is a conditional operator, analyze its condition for 4426 // min/max reduction operator. 4427 RHSExpr = ACO->getCond(); 4428 } 4429 if (auto *BORHS = 4430 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 4431 EExpr = BORHS->getRHS(); 4432 BO = BORHS->getOpcode(); 4433 } 4434 } 4435 if (XExpr) { 4436 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4437 auto &&AtomicRedGen = [BO, VD, IPriv, 4438 Loc](CodeGenFunction &CGF, const Expr *XExpr, 4439 const Expr *EExpr, const Expr *UpExpr) { 4440 LValue X = CGF.EmitLValue(XExpr); 4441 RValue E; 4442 if (EExpr) 4443 E = CGF.EmitAnyExpr(EExpr); 4444 CGF.EmitOMPAtomicSimpleUpdateExpr( 4445 X, E, BO, /*IsXLHSInRHSPart=*/true, 4446 llvm::AtomicOrdering::Monotonic, Loc, 4447 [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { 4448 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4449 PrivateScope.addPrivate( 4450 VD, [&CGF, VD, XRValue, Loc]() -> Address { 4451 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 4452 CGF.emitOMPSimpleStore( 4453 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 4454 VD->getType().getNonReferenceType(), Loc); 4455 return LHSTemp; 4456 }); 4457 (void)PrivateScope.Privatize(); 4458 return CGF.EmitAnyExpr(UpExpr); 4459 }); 4460 }; 4461 if ((*IPriv)->getType()->isArrayType()) { 4462 // Emit atomic reduction for array section. 4463 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4464 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 4465 AtomicRedGen, XExpr, EExpr, UpExpr); 4466 } else 4467 // Emit atomic reduction for array subscript or single variable. 4468 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 4469 } else { 4470 // Emit as a critical region. 4471 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 4472 const Expr *, const Expr *) { 4473 auto &RT = CGF.CGM.getOpenMPRuntime(); 4474 RT.emitCriticalRegion( 4475 CGF, ".atomic_reduction", 4476 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 4477 Action.Enter(CGF); 4478 emitReductionCombiner(CGF, E); 4479 }, 4480 Loc); 4481 }; 4482 if ((*IPriv)->getType()->isArrayType()) { 4483 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4484 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4485 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4486 CritRedGen); 4487 } else 4488 CritRedGen(CGF, nullptr, nullptr, nullptr); 4489 } 4490 ++ILHS; 4491 ++IRHS; 4492 ++IPriv; 4493 } 4494 }; 4495 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 4496 if (!WithNowait) { 4497 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 4498 llvm::Value *EndArgs[] = { 4499 IdentTLoc, // ident_t *<loc> 4500 ThreadId, // i32 <gtid> 4501 Lock // kmp_critical_name *&<lock> 4502 }; 4503 CommonActionTy Action(nullptr, llvm::None, 4504 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 4505 EndArgs); 4506 AtomicRCG.setAction(Action); 4507 AtomicRCG(CGF); 4508 } else 4509 AtomicRCG(CGF); 4510 4511 CGF.EmitBranch(DefaultBB); 4512 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 4513 } 4514 4515 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 4516 SourceLocation Loc) { 4517 if (!CGF.HaveInsertPoint()) 4518 return; 4519 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 4520 // global_tid); 4521 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 4522 // Ignore return result until untied tasks are supported. 4523 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 4524 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4525 Region->emitUntiedSwitch(CGF); 4526 } 4527 4528 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 4529 OpenMPDirectiveKind InnerKind, 4530 const RegionCodeGenTy &CodeGen, 4531 bool HasCancel) { 4532 if (!CGF.HaveInsertPoint()) 4533 return; 4534 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 4535 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 4536 } 4537 4538 namespace { 4539 enum RTCancelKind { 4540 CancelNoreq = 0, 4541 CancelParallel = 1, 4542 CancelLoop = 2, 4543 CancelSections = 3, 4544 CancelTaskgroup = 4 4545 }; 4546 } // anonymous namespace 4547 4548 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 4549 RTCancelKind CancelKind = CancelNoreq; 4550 if (CancelRegion == OMPD_parallel) 4551 CancelKind = CancelParallel; 4552 else if (CancelRegion == OMPD_for) 4553 CancelKind = CancelLoop; 4554 else if (CancelRegion == OMPD_sections) 4555 CancelKind = CancelSections; 4556 else { 4557 assert(CancelRegion == OMPD_taskgroup); 4558 CancelKind = CancelTaskgroup; 4559 } 4560 return CancelKind; 4561 } 4562 4563 void CGOpenMPRuntime::emitCancellationPointCall( 4564 CodeGenFunction &CGF, SourceLocation Loc, 4565 OpenMPDirectiveKind CancelRegion) { 4566 if (!CGF.HaveInsertPoint()) 4567 return; 4568 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 4569 // global_tid, kmp_int32 cncl_kind); 4570 if (auto *OMPRegionInfo = 4571 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4572 if (OMPRegionInfo->hasCancel()) { 4573 llvm::Value *Args[] = { 4574 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 4575 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4576 // Ignore return result until untied tasks are supported. 4577 auto *Result = CGF.EmitRuntimeCall( 4578 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 4579 // if (__kmpc_cancellationpoint()) { 4580 // __kmpc_cancel_barrier(); 4581 // exit from construct; 4582 // } 4583 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4584 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4585 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4586 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4587 CGF.EmitBlock(ExitBB); 4588 // __kmpc_cancel_barrier(); 4589 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 4590 // exit from construct; 4591 auto CancelDest = 4592 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4593 CGF.EmitBranchThroughCleanup(CancelDest); 4594 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4595 } 4596 } 4597 } 4598 4599 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 4600 const Expr *IfCond, 4601 OpenMPDirectiveKind CancelRegion) { 4602 if (!CGF.HaveInsertPoint()) 4603 return; 4604 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 4605 // kmp_int32 cncl_kind); 4606 if (auto *OMPRegionInfo = 4607 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4608 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 4609 PrePostActionTy &) { 4610 auto &RT = CGF.CGM.getOpenMPRuntime(); 4611 llvm::Value *Args[] = { 4612 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 4613 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4614 // Ignore return result until untied tasks are supported. 4615 auto *Result = CGF.EmitRuntimeCall( 4616 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 4617 // if (__kmpc_cancel()) { 4618 // __kmpc_cancel_barrier(); 4619 // exit from construct; 4620 // } 4621 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4622 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4623 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4624 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4625 CGF.EmitBlock(ExitBB); 4626 // __kmpc_cancel_barrier(); 4627 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 4628 // exit from construct; 4629 auto CancelDest = 4630 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4631 CGF.EmitBranchThroughCleanup(CancelDest); 4632 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4633 }; 4634 if (IfCond) 4635 emitOMPIfClause(CGF, IfCond, ThenGen, 4636 [](CodeGenFunction &, PrePostActionTy &) {}); 4637 else { 4638 RegionCodeGenTy ThenRCG(ThenGen); 4639 ThenRCG(CGF); 4640 } 4641 } 4642 } 4643 4644 /// \brief Obtain information that uniquely identifies a target entry. This 4645 /// consists of the file and device IDs as well as line number associated with 4646 /// the relevant entry source location. 4647 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 4648 unsigned &DeviceID, unsigned &FileID, 4649 unsigned &LineNum) { 4650 4651 auto &SM = C.getSourceManager(); 4652 4653 // The loc should be always valid and have a file ID (the user cannot use 4654 // #pragma directives in macros) 4655 4656 assert(Loc.isValid() && "Source location is expected to be always valid."); 4657 assert(Loc.isFileID() && "Source location is expected to refer to a file."); 4658 4659 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 4660 assert(PLoc.isValid() && "Source location is expected to be always valid."); 4661 4662 llvm::sys::fs::UniqueID ID; 4663 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 4664 llvm_unreachable("Source file with target region no longer exists!"); 4665 4666 DeviceID = ID.getDevice(); 4667 FileID = ID.getFile(); 4668 LineNum = PLoc.getLine(); 4669 } 4670 4671 void CGOpenMPRuntime::emitTargetOutlinedFunction( 4672 const OMPExecutableDirective &D, StringRef ParentName, 4673 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4674 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4675 assert(!ParentName.empty() && "Invalid target region parent name!"); 4676 4677 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 4678 IsOffloadEntry, CodeGen); 4679 } 4680 4681 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 4682 const OMPExecutableDirective &D, StringRef ParentName, 4683 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4684 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4685 // Create a unique name for the entry function using the source location 4686 // information of the current target region. The name will be something like: 4687 // 4688 // __omp_offloading_DD_FFFF_PP_lBB 4689 // 4690 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 4691 // mangled name of the function that encloses the target region and BB is the 4692 // line number of the target region. 4693 4694 unsigned DeviceID; 4695 unsigned FileID; 4696 unsigned Line; 4697 getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, 4698 Line); 4699 SmallString<64> EntryFnName; 4700 { 4701 llvm::raw_svector_ostream OS(EntryFnName); 4702 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 4703 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 4704 } 4705 4706 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4707 4708 CodeGenFunction CGF(CGM, true); 4709 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 4710 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4711 4712 OutlinedFn = 4713 CGF.GenerateOpenMPCapturedStmtFunction(CS, /*CastValToPtr=*/true); 4714 4715 // If this target outline function is not an offload entry, we don't need to 4716 // register it. 4717 if (!IsOffloadEntry) 4718 return; 4719 4720 // The target region ID is used by the runtime library to identify the current 4721 // target region, so it only has to be unique and not necessarily point to 4722 // anything. It could be the pointer to the outlined function that implements 4723 // the target region, but we aren't using that so that the compiler doesn't 4724 // need to keep that, and could therefore inline the host function if proven 4725 // worthwhile during optimization. In the other hand, if emitting code for the 4726 // device, the ID has to be the function address so that it can retrieved from 4727 // the offloading entry and launched by the runtime library. We also mark the 4728 // outlined function to have external linkage in case we are emitting code for 4729 // the device, because these functions will be entry points to the device. 4730 4731 if (CGM.getLangOpts().OpenMPIsDevice) { 4732 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 4733 OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); 4734 } else 4735 OutlinedFnID = new llvm::GlobalVariable( 4736 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 4737 llvm::GlobalValue::PrivateLinkage, 4738 llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); 4739 4740 // Register the information for the entry associated with this target region. 4741 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 4742 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID); 4743 } 4744 4745 /// discard all CompoundStmts intervening between two constructs 4746 static const Stmt *ignoreCompoundStmts(const Stmt *Body) { 4747 while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) 4748 Body = CS->body_front(); 4749 4750 return Body; 4751 } 4752 4753 /// \brief Emit the num_teams clause of an enclosed teams directive at the 4754 /// target region scope. If there is no teams directive associated with the 4755 /// target directive, or if there is no num_teams clause associated with the 4756 /// enclosed teams directive, return nullptr. 4757 static llvm::Value * 4758 emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4759 CodeGenFunction &CGF, 4760 const OMPExecutableDirective &D) { 4761 4762 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4763 "teams directive expected to be " 4764 "emitted only for the host!"); 4765 4766 // FIXME: For the moment we do not support combined directives with target and 4767 // teams, so we do not expect to get any num_teams clause in the provided 4768 // directive. Once we support that, this assertion can be replaced by the 4769 // actual emission of the clause expression. 4770 assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr && 4771 "Not expecting clause in directive."); 4772 4773 // If the current target region has a teams region enclosed, we need to get 4774 // the number of teams to pass to the runtime function call. This is done 4775 // by generating the expression in a inlined region. This is required because 4776 // the expression is captured in the enclosing target environment when the 4777 // teams directive is not combined with target. 4778 4779 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4780 4781 // FIXME: Accommodate other combined directives with teams when they become 4782 // available. 4783 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 4784 ignoreCompoundStmts(CS.getCapturedStmt()))) { 4785 if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 4786 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 4787 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4788 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 4789 return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty, 4790 /*IsSigned=*/true); 4791 } 4792 4793 // If we have an enclosed teams directive but no num_teams clause we use 4794 // the default value 0. 4795 return CGF.Builder.getInt32(0); 4796 } 4797 4798 // No teams associated with the directive. 4799 return nullptr; 4800 } 4801 4802 /// \brief Emit the thread_limit clause of an enclosed teams directive at the 4803 /// target region scope. If there is no teams directive associated with the 4804 /// target directive, or if there is no thread_limit clause associated with the 4805 /// enclosed teams directive, return nullptr. 4806 static llvm::Value * 4807 emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4808 CodeGenFunction &CGF, 4809 const OMPExecutableDirective &D) { 4810 4811 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4812 "teams directive expected to be " 4813 "emitted only for the host!"); 4814 4815 // FIXME: For the moment we do not support combined directives with target and 4816 // teams, so we do not expect to get any thread_limit clause in the provided 4817 // directive. Once we support that, this assertion can be replaced by the 4818 // actual emission of the clause expression. 4819 assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr && 4820 "Not expecting clause in directive."); 4821 4822 // If the current target region has a teams region enclosed, we need to get 4823 // the thread limit to pass to the runtime function call. This is done 4824 // by generating the expression in a inlined region. This is required because 4825 // the expression is captured in the enclosing target environment when the 4826 // teams directive is not combined with target. 4827 4828 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4829 4830 // FIXME: Accommodate other combined directives with teams when they become 4831 // available. 4832 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 4833 ignoreCompoundStmts(CS.getCapturedStmt()))) { 4834 if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 4835 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 4836 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4837 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 4838 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 4839 /*IsSigned=*/true); 4840 } 4841 4842 // If we have an enclosed teams directive but no thread_limit clause we use 4843 // the default value 0. 4844 return CGF.Builder.getInt32(0); 4845 } 4846 4847 // No teams associated with the directive. 4848 return nullptr; 4849 } 4850 4851 namespace { 4852 // \brief Utility to handle information from clauses associated with a given 4853 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 4854 // It provides a convenient interface to obtain the information and generate 4855 // code for that information. 4856 class MappableExprsHandler { 4857 public: 4858 /// \brief Values for bit flags used to specify the mapping type for 4859 /// offloading. 4860 enum OpenMPOffloadMappingFlags { 4861 /// \brief Only allocate memory on the device, 4862 OMP_MAP_ALLOC = 0x00, 4863 /// \brief Allocate memory on the device and move data from host to device. 4864 OMP_MAP_TO = 0x01, 4865 /// \brief Allocate memory on the device and move data from device to host. 4866 OMP_MAP_FROM = 0x02, 4867 /// \brief Always perform the requested mapping action on the element, even 4868 /// if it was already mapped before. 4869 OMP_MAP_ALWAYS = 0x04, 4870 /// \brief Decrement the reference count associated with the element without 4871 /// executing any other action. 4872 OMP_MAP_RELEASE = 0x08, 4873 /// \brief Delete the element from the device environment, ignoring the 4874 /// current reference count associated with the element. 4875 OMP_MAP_DELETE = 0x10, 4876 /// \brief The element passed to the device is a pointer. 4877 OMP_MAP_PTR = 0x20, 4878 /// \brief Signal the element as extra, i.e. is not argument to the target 4879 /// region kernel. 4880 OMP_MAP_EXTRA = 0x40, 4881 /// \brief Pass the element to the device by value. 4882 OMP_MAP_BYCOPY = 0x80, 4883 }; 4884 4885 typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; 4886 typedef SmallVector<unsigned, 16> MapFlagsArrayTy; 4887 4888 private: 4889 /// \brief Directive from where the map clauses were extracted. 4890 const OMPExecutableDirective &Directive; 4891 4892 /// \brief Function the directive is being generated for. 4893 CodeGenFunction &CGF; 4894 4895 llvm::Value *getExprTypeSize(const Expr *E) const { 4896 auto ExprTy = E->getType().getCanonicalType(); 4897 4898 // Reference types are ignored for mapping purposes. 4899 if (auto *RefTy = ExprTy->getAs<ReferenceType>()) 4900 ExprTy = RefTy->getPointeeType().getCanonicalType(); 4901 4902 // Given that an array section is considered a built-in type, we need to 4903 // do the calculation based on the length of the section instead of relying 4904 // on CGF.getTypeSize(E->getType()). 4905 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 4906 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 4907 OAE->getBase()->IgnoreParenImpCasts()) 4908 .getCanonicalType(); 4909 4910 // If there is no length associated with the expression, that means we 4911 // are using the whole length of the base. 4912 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 4913 return CGF.getTypeSize(BaseTy); 4914 4915 llvm::Value *ElemSize; 4916 if (auto *PTy = BaseTy->getAs<PointerType>()) 4917 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 4918 else { 4919 auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 4920 assert(ATy && "Expecting array type if not a pointer type."); 4921 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 4922 } 4923 4924 // If we don't have a length at this point, that is because we have an 4925 // array section with a single element. 4926 if (!OAE->getLength()) 4927 return ElemSize; 4928 4929 auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 4930 LengthVal = 4931 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 4932 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 4933 } 4934 return CGF.getTypeSize(ExprTy); 4935 } 4936 4937 /// \brief Return the corresponding bits for a given map clause modifier. Add 4938 /// a flag marking the map as a pointer if requested. Add a flag marking the 4939 /// map as extra, meaning is not an argument of the kernel. 4940 unsigned getMapTypeBits(OpenMPMapClauseKind MapType, 4941 OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, 4942 bool AddExtraFlag) const { 4943 unsigned Bits = 0u; 4944 switch (MapType) { 4945 case OMPC_MAP_alloc: 4946 Bits = OMP_MAP_ALLOC; 4947 break; 4948 case OMPC_MAP_to: 4949 Bits = OMP_MAP_TO; 4950 break; 4951 case OMPC_MAP_from: 4952 Bits = OMP_MAP_FROM; 4953 break; 4954 case OMPC_MAP_tofrom: 4955 Bits = OMP_MAP_TO | OMP_MAP_FROM; 4956 break; 4957 case OMPC_MAP_delete: 4958 Bits = OMP_MAP_DELETE; 4959 break; 4960 case OMPC_MAP_release: 4961 Bits = OMP_MAP_RELEASE; 4962 break; 4963 default: 4964 llvm_unreachable("Unexpected map type!"); 4965 break; 4966 } 4967 if (AddPtrFlag) 4968 Bits |= OMP_MAP_PTR; 4969 if (AddExtraFlag) 4970 Bits |= OMP_MAP_EXTRA; 4971 if (MapTypeModifier == OMPC_MAP_always) 4972 Bits |= OMP_MAP_ALWAYS; 4973 return Bits; 4974 } 4975 4976 /// \brief Return true if the provided expression is a final array section. A 4977 /// final array section, is one whose length can't be proved to be one. 4978 bool isFinalArraySectionExpression(const Expr *E) const { 4979 auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 4980 4981 // It is not an array section and therefore not a unity-size one. 4982 if (!OASE) 4983 return false; 4984 4985 // An array section with no colon always refer to a single element. 4986 if (OASE->getColonLoc().isInvalid()) 4987 return false; 4988 4989 auto *Length = OASE->getLength(); 4990 4991 // If we don't have a length we have to check if the array has size 1 4992 // for this dimension. Also, we should always expect a length if the 4993 // base type is pointer. 4994 if (!Length) { 4995 auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 4996 OASE->getBase()->IgnoreParenImpCasts()) 4997 .getCanonicalType(); 4998 if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 4999 return ATy->getSize().getSExtValue() != 1; 5000 // If we don't have a constant dimension length, we have to consider 5001 // the current section as having any size, so it is not necessarily 5002 // unitary. If it happen to be unity size, that's user fault. 5003 return true; 5004 } 5005 5006 // Check if the length evaluates to 1. 5007 llvm::APSInt ConstLength; 5008 if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) 5009 return true; // Can have more that size 1. 5010 5011 return ConstLength.getSExtValue() != 1; 5012 } 5013 5014 /// \brief Generate the base pointers, section pointers, sizes and map type 5015 /// bits for the provided map type, map modifier, and expression components. 5016 /// \a IsFirstComponent should be set to true if the provided set of 5017 /// components is the first associated with a capture. 5018 void generateInfoForComponentList( 5019 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 5020 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 5021 MapValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 5022 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 5023 bool IsFirstComponentList) const { 5024 5025 // The following summarizes what has to be generated for each map and the 5026 // types bellow. The generated information is expressed in this order: 5027 // base pointer, section pointer, size, flags 5028 // (to add to the ones that come from the map type and modifier). 5029 // 5030 // double d; 5031 // int i[100]; 5032 // float *p; 5033 // 5034 // struct S1 { 5035 // int i; 5036 // float f[50]; 5037 // } 5038 // struct S2 { 5039 // int i; 5040 // float f[50]; 5041 // S1 s; 5042 // double *p; 5043 // struct S2 *ps; 5044 // } 5045 // S2 s; 5046 // S2 *ps; 5047 // 5048 // map(d) 5049 // &d, &d, sizeof(double), noflags 5050 // 5051 // map(i) 5052 // &i, &i, 100*sizeof(int), noflags 5053 // 5054 // map(i[1:23]) 5055 // &i(=&i[0]), &i[1], 23*sizeof(int), noflags 5056 // 5057 // map(p) 5058 // &p, &p, sizeof(float*), noflags 5059 // 5060 // map(p[1:24]) 5061 // p, &p[1], 24*sizeof(float), noflags 5062 // 5063 // map(s) 5064 // &s, &s, sizeof(S2), noflags 5065 // 5066 // map(s.i) 5067 // &s, &(s.i), sizeof(int), noflags 5068 // 5069 // map(s.s.f) 5070 // &s, &(s.i.f), 50*sizeof(int), noflags 5071 // 5072 // map(s.p) 5073 // &s, &(s.p), sizeof(double*), noflags 5074 // 5075 // map(s.p[:22], s.a s.b) 5076 // &s, &(s.p), sizeof(double*), noflags 5077 // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag 5078 // 5079 // map(s.ps) 5080 // &s, &(s.ps), sizeof(S2*), noflags 5081 // 5082 // map(s.ps->s.i) 5083 // &s, &(s.ps), sizeof(S2*), noflags 5084 // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag 5085 // 5086 // map(s.ps->ps) 5087 // &s, &(s.ps), sizeof(S2*), noflags 5088 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5089 // 5090 // map(s.ps->ps->ps) 5091 // &s, &(s.ps), sizeof(S2*), noflags 5092 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5093 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5094 // 5095 // map(s.ps->ps->s.f[:22]) 5096 // &s, &(s.ps), sizeof(S2*), noflags 5097 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5098 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag 5099 // 5100 // map(ps) 5101 // &ps, &ps, sizeof(S2*), noflags 5102 // 5103 // map(ps->i) 5104 // ps, &(ps->i), sizeof(int), noflags 5105 // 5106 // map(ps->s.f) 5107 // ps, &(ps->s.f[0]), 50*sizeof(float), noflags 5108 // 5109 // map(ps->p) 5110 // ps, &(ps->p), sizeof(double*), noflags 5111 // 5112 // map(ps->p[:22]) 5113 // ps, &(ps->p), sizeof(double*), noflags 5114 // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag 5115 // 5116 // map(ps->ps) 5117 // ps, &(ps->ps), sizeof(S2*), noflags 5118 // 5119 // map(ps->ps->s.i) 5120 // ps, &(ps->ps), sizeof(S2*), noflags 5121 // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag 5122 // 5123 // map(ps->ps->ps) 5124 // ps, &(ps->ps), sizeof(S2*), noflags 5125 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5126 // 5127 // map(ps->ps->ps->ps) 5128 // ps, &(ps->ps), sizeof(S2*), noflags 5129 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5130 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5131 // 5132 // map(ps->ps->ps->s.f[:22]) 5133 // ps, &(ps->ps), sizeof(S2*), noflags 5134 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5135 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + 5136 // extra_flag 5137 5138 // Track if the map information being generated is the first for a capture. 5139 bool IsCaptureFirstInfo = IsFirstComponentList; 5140 5141 // Scan the components from the base to the complete expression. 5142 auto CI = Components.rbegin(); 5143 auto CE = Components.rend(); 5144 auto I = CI; 5145 5146 // Track if the map information being generated is the first for a list of 5147 // components. 5148 bool IsExpressionFirstInfo = true; 5149 llvm::Value *BP = nullptr; 5150 5151 if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) { 5152 // The base is the 'this' pointer. The content of the pointer is going 5153 // to be the base of the field being mapped. 5154 BP = CGF.EmitScalarExpr(ME->getBase()); 5155 } else { 5156 // The base is the reference to the variable. 5157 // BP = &Var. 5158 BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression())) 5159 .getPointer(); 5160 5161 // If the variable is a pointer and is being dereferenced (i.e. is not 5162 // the last component), the base has to be the pointer itself, not his 5163 // reference. 5164 if (I->getAssociatedDeclaration()->getType()->isAnyPointerType() && 5165 std::next(I) != CE) { 5166 auto PtrAddr = CGF.MakeNaturalAlignAddrLValue( 5167 BP, I->getAssociatedDeclaration()->getType()); 5168 BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), 5169 I->getAssociatedDeclaration() 5170 ->getType() 5171 ->getAs<PointerType>()) 5172 .getPointer(); 5173 5174 // We do not need to generate individual map information for the 5175 // pointer, it can be associated with the combined storage. 5176 ++I; 5177 } 5178 } 5179 5180 for (; I != CE; ++I) { 5181 auto Next = std::next(I); 5182 5183 // We need to generate the addresses and sizes if this is the last 5184 // component, if the component is a pointer or if it is an array section 5185 // whose length can't be proved to be one. If this is a pointer, it 5186 // becomes the base address for the following components. 5187 5188 // A final array section, is one whose length can't be proved to be one. 5189 bool IsFinalArraySection = 5190 isFinalArraySectionExpression(I->getAssociatedExpression()); 5191 5192 // Get information on whether the element is a pointer. Have to do a 5193 // special treatment for array sections given that they are built-in 5194 // types. 5195 const auto *OASE = 5196 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 5197 bool IsPointer = 5198 (OASE && 5199 OMPArraySectionExpr::getBaseOriginalType(OASE) 5200 .getCanonicalType() 5201 ->isAnyPointerType()) || 5202 I->getAssociatedExpression()->getType()->isAnyPointerType(); 5203 5204 if (Next == CE || IsPointer || IsFinalArraySection) { 5205 5206 // If this is not the last component, we expect the pointer to be 5207 // associated with an array expression or member expression. 5208 assert((Next == CE || 5209 isa<MemberExpr>(Next->getAssociatedExpression()) || 5210 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 5211 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 5212 "Unexpected expression"); 5213 5214 // Save the base we are currently using. 5215 BasePointers.push_back(BP); 5216 5217 auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer(); 5218 auto *Size = getExprTypeSize(I->getAssociatedExpression()); 5219 5220 Pointers.push_back(LB); 5221 Sizes.push_back(Size); 5222 // We need to add a pointer flag for each map that comes from the the 5223 // same expression except for the first one. We need to add the extra 5224 // flag for each map that relates with the current capture, except for 5225 // the first one (there is a set of entries for each capture). 5226 Types.push_back(getMapTypeBits(MapType, MapTypeModifier, 5227 !IsExpressionFirstInfo, 5228 !IsCaptureFirstInfo)); 5229 5230 // If we have a final array section, we are done with this expression. 5231 if (IsFinalArraySection) 5232 break; 5233 5234 // The pointer becomes the base for the next element. 5235 if (Next != CE) 5236 BP = LB; 5237 5238 IsExpressionFirstInfo = false; 5239 IsCaptureFirstInfo = false; 5240 continue; 5241 } 5242 } 5243 } 5244 5245 public: 5246 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 5247 : Directive(Dir), CGF(CGF) {} 5248 5249 /// \brief Generate all the base pointers, section pointers, sizes and map 5250 /// types for the extracted mappable expressions. 5251 void generateAllInfo(MapValuesArrayTy &BasePointers, 5252 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 5253 MapFlagsArrayTy &Types) const { 5254 BasePointers.clear(); 5255 Pointers.clear(); 5256 Sizes.clear(); 5257 Types.clear(); 5258 5259 struct MapInfo { 5260 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 5261 OpenMPMapClauseKind MapType; 5262 OpenMPMapClauseKind MapTypeModifier; 5263 }; 5264 5265 // We have to process the component lists that relate with the same 5266 // declaration in a single chunk so that we can generate the map flags 5267 // correctly. Therefore, we organize all lists in a map. 5268 llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 5269 for (auto *C : Directive.getClausesOfKind<OMPMapClause>()) 5270 for (auto L : C->component_lists()) { 5271 const ValueDecl *VD = 5272 L.first ? cast<ValueDecl>(L.first->getCanonicalDecl()) : nullptr; 5273 Info[VD].push_back( 5274 {L.second, C->getMapType(), C->getMapTypeModifier()}); 5275 } 5276 5277 for (auto &M : Info) { 5278 // We need to know when we generate information for the first component 5279 // associated with a capture, because the mapping flags depend on it. 5280 bool IsFirstComponentList = true; 5281 for (MapInfo &L : M.second) { 5282 assert(!L.Components.empty() && 5283 "Not expecting declaration with no component lists."); 5284 generateInfoForComponentList(L.MapType, L.MapTypeModifier, L.Components, 5285 BasePointers, Pointers, Sizes, Types, 5286 IsFirstComponentList); 5287 IsFirstComponentList = false; 5288 } 5289 } 5290 } 5291 5292 /// \brief Generate the base pointers, section pointers, sizes and map types 5293 /// associated to a given capture. 5294 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 5295 MapValuesArrayTy &BasePointers, 5296 MapValuesArrayTy &Pointers, 5297 MapValuesArrayTy &Sizes, 5298 MapFlagsArrayTy &Types) const { 5299 assert(!Cap->capturesVariableArrayType() && 5300 "Not expecting to generate map info for a variable array type!"); 5301 5302 BasePointers.clear(); 5303 Pointers.clear(); 5304 Sizes.clear(); 5305 Types.clear(); 5306 5307 const ValueDecl *VD = 5308 Cap->capturesThis() 5309 ? nullptr 5310 : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl()); 5311 5312 // We need to know when we generating information for the first component 5313 // associated with a capture, because the mapping flags depend on it. 5314 bool IsFirstComponentList = true; 5315 for (auto *C : Directive.getClausesOfKind<OMPMapClause>()) 5316 for (auto L : C->decl_component_lists(VD)) { 5317 assert(L.first == VD && 5318 "We got information for the wrong declaration??"); 5319 assert(!L.second.empty() && 5320 "Not expecting declaration with no component lists."); 5321 generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), 5322 L.second, BasePointers, Pointers, Sizes, 5323 Types, IsFirstComponentList); 5324 IsFirstComponentList = false; 5325 } 5326 5327 return; 5328 } 5329 }; 5330 5331 enum OpenMPOffloadingReservedDeviceIDs { 5332 /// \brief Device ID if the device was not defined, runtime should get it 5333 /// from environment variables in the spec. 5334 OMP_DEVICEID_UNDEF = -1, 5335 }; 5336 } // anonymous namespace 5337 5338 /// \brief Emit the arrays used to pass the captures and map information to the 5339 /// offloading runtime library. If there is no map or capture information, 5340 /// return nullptr by reference. 5341 static void 5342 emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray, 5343 llvm::Value *&PointersArray, llvm::Value *&SizesArray, 5344 llvm::Value *&MapTypesArray, 5345 MappableExprsHandler::MapValuesArrayTy &BasePointers, 5346 MappableExprsHandler::MapValuesArrayTy &Pointers, 5347 MappableExprsHandler::MapValuesArrayTy &Sizes, 5348 MappableExprsHandler::MapFlagsArrayTy &MapTypes) { 5349 auto &CGM = CGF.CGM; 5350 auto &Ctx = CGF.getContext(); 5351 5352 BasePointersArray = PointersArray = SizesArray = MapTypesArray = nullptr; 5353 5354 if (unsigned PointerNumVal = BasePointers.size()) { 5355 // Detect if we have any capture size requiring runtime evaluation of the 5356 // size so that a constant array could be eventually used. 5357 bool hasRuntimeEvaluationCaptureSize = false; 5358 for (auto *S : Sizes) 5359 if (!isa<llvm::Constant>(S)) { 5360 hasRuntimeEvaluationCaptureSize = true; 5361 break; 5362 } 5363 5364 llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); 5365 QualType PointerArrayType = 5366 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 5367 /*IndexTypeQuals=*/0); 5368 5369 BasePointersArray = 5370 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 5371 PointersArray = 5372 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 5373 5374 // If we don't have any VLA types or other types that require runtime 5375 // evaluation, we can use a constant array for the map sizes, otherwise we 5376 // need to fill up the arrays as we do for the pointers. 5377 if (hasRuntimeEvaluationCaptureSize) { 5378 QualType SizeArrayType = Ctx.getConstantArrayType( 5379 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 5380 /*IndexTypeQuals=*/0); 5381 SizesArray = 5382 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 5383 } else { 5384 // We expect all the sizes to be constant, so we collect them to create 5385 // a constant array. 5386 SmallVector<llvm::Constant *, 16> ConstSizes; 5387 for (auto S : Sizes) 5388 ConstSizes.push_back(cast<llvm::Constant>(S)); 5389 5390 auto *SizesArrayInit = llvm::ConstantArray::get( 5391 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 5392 auto *SizesArrayGbl = new llvm::GlobalVariable( 5393 CGM.getModule(), SizesArrayInit->getType(), 5394 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 5395 SizesArrayInit, ".offload_sizes"); 5396 SizesArrayGbl->setUnnamedAddr(true); 5397 SizesArray = SizesArrayGbl; 5398 } 5399 5400 // The map types are always constant so we don't need to generate code to 5401 // fill arrays. Instead, we create an array constant. 5402 llvm::Constant *MapTypesArrayInit = 5403 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 5404 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 5405 CGM.getModule(), MapTypesArrayInit->getType(), 5406 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 5407 MapTypesArrayInit, ".offload_maptypes"); 5408 MapTypesArrayGbl->setUnnamedAddr(true); 5409 MapTypesArray = MapTypesArrayGbl; 5410 5411 for (unsigned i = 0; i < PointerNumVal; ++i) { 5412 llvm::Value *BPVal = BasePointers[i]; 5413 if (BPVal->getType()->isPointerTy()) 5414 BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); 5415 else { 5416 assert(BPVal->getType()->isIntegerTy() && 5417 "If not a pointer, the value type must be an integer."); 5418 BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); 5419 } 5420 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 5421 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, 5422 0, i); 5423 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 5424 CGF.Builder.CreateStore(BPVal, BPAddr); 5425 5426 llvm::Value *PVal = Pointers[i]; 5427 if (PVal->getType()->isPointerTy()) 5428 PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); 5429 else { 5430 assert(PVal->getType()->isIntegerTy() && 5431 "If not a pointer, the value type must be an integer."); 5432 PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); 5433 } 5434 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 5435 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0, 5436 i); 5437 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 5438 CGF.Builder.CreateStore(PVal, PAddr); 5439 5440 if (hasRuntimeEvaluationCaptureSize) { 5441 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 5442 llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, 5443 /*Idx0=*/0, 5444 /*Idx1=*/i); 5445 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 5446 CGF.Builder.CreateStore( 5447 CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), 5448 SAddr); 5449 } 5450 } 5451 } 5452 } 5453 /// \brief Emit the arguments to be passed to the runtime library based on the 5454 /// arrays of pointers, sizes and map types. 5455 static void emitOffloadingArraysArgument( 5456 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 5457 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 5458 llvm::Value *&MapTypesArrayArg, llvm::Value *BasePointersArray, 5459 llvm::Value *PointersArray, llvm::Value *SizesArray, 5460 llvm::Value *MapTypesArray, unsigned NumElems) { 5461 auto &CGM = CGF.CGM; 5462 if (NumElems) { 5463 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5464 llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), BasePointersArray, 5465 /*Idx0=*/0, /*Idx1=*/0); 5466 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5467 llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), PointersArray, 5468 /*Idx0=*/0, 5469 /*Idx1=*/0); 5470 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5471 llvm::ArrayType::get(CGM.SizeTy, NumElems), SizesArray, 5472 /*Idx0=*/0, /*Idx1=*/0); 5473 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5474 llvm::ArrayType::get(CGM.Int32Ty, NumElems), MapTypesArray, 5475 /*Idx0=*/0, 5476 /*Idx1=*/0); 5477 } else { 5478 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 5479 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 5480 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 5481 MapTypesArrayArg = 5482 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); 5483 } 5484 } 5485 5486 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 5487 const OMPExecutableDirective &D, 5488 llvm::Value *OutlinedFn, 5489 llvm::Value *OutlinedFnID, 5490 const Expr *IfCond, const Expr *Device, 5491 ArrayRef<llvm::Value *> CapturedVars) { 5492 if (!CGF.HaveInsertPoint()) 5493 return; 5494 5495 assert(OutlinedFn && "Invalid outlined function!"); 5496 5497 auto &Ctx = CGF.getContext(); 5498 5499 // Fill up the arrays with all the captured variables. 5500 MappableExprsHandler::MapValuesArrayTy KernelArgs; 5501 MappableExprsHandler::MapValuesArrayTy BasePointers; 5502 MappableExprsHandler::MapValuesArrayTy Pointers; 5503 MappableExprsHandler::MapValuesArrayTy Sizes; 5504 MappableExprsHandler::MapFlagsArrayTy MapTypes; 5505 5506 MappableExprsHandler::MapValuesArrayTy CurBasePointers; 5507 MappableExprsHandler::MapValuesArrayTy CurPointers; 5508 MappableExprsHandler::MapValuesArrayTy CurSizes; 5509 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 5510 5511 // Get map clause information. 5512 MappableExprsHandler MCHandler(D, CGF); 5513 5514 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 5515 auto RI = CS.getCapturedRecordDecl()->field_begin(); 5516 auto CV = CapturedVars.begin(); 5517 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 5518 CE = CS.capture_end(); 5519 CI != CE; ++CI, ++RI, ++CV) { 5520 StringRef Name; 5521 QualType Ty; 5522 5523 CurBasePointers.clear(); 5524 CurPointers.clear(); 5525 CurSizes.clear(); 5526 CurMapTypes.clear(); 5527 5528 // VLA sizes are passed to the outlined region by copy and do not have map 5529 // information associated. 5530 if (CI->capturesVariableArrayType()) { 5531 CurBasePointers.push_back(*CV); 5532 CurPointers.push_back(*CV); 5533 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 5534 // Copy to the device as an argument. No need to retrieve it. 5535 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_BYCOPY); 5536 } else { 5537 // If we have any information in the map clause, we use it, otherwise we 5538 // just do a default mapping. 5539 MCHandler.generateInfoForCapture(CI, CurBasePointers, CurPointers, 5540 CurSizes, CurMapTypes); 5541 5542 if (CurBasePointers.empty()) { 5543 // Do the default mapping. 5544 if (CI->capturesThis()) { 5545 CurBasePointers.push_back(*CV); 5546 CurPointers.push_back(*CV); 5547 const PointerType *PtrTy = 5548 cast<PointerType>(RI->getType().getTypePtr()); 5549 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 5550 // Default map type. 5551 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_TO | 5552 MappableExprsHandler::OMP_MAP_FROM); 5553 } else if (CI->capturesVariableByCopy()) { 5554 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_BYCOPY); 5555 if (!RI->getType()->isAnyPointerType()) { 5556 // If the field is not a pointer, we need to save the actual value 5557 // and load it as a void pointer. 5558 auto DstAddr = CGF.CreateMemTemp( 5559 Ctx.getUIntPtrType(), 5560 Twine(CI->getCapturedVar()->getName()) + ".casted"); 5561 LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 5562 5563 auto *SrcAddrVal = CGF.EmitScalarConversion( 5564 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 5565 Ctx.getPointerType(RI->getType()), SourceLocation()); 5566 LValue SrcLV = 5567 CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType()); 5568 5569 // Store the value using the source type pointer. 5570 CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV); 5571 5572 // Load the value using the destination type pointer. 5573 CurBasePointers.push_back( 5574 CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal()); 5575 CurPointers.push_back(CurBasePointers.back()); 5576 } else { 5577 CurBasePointers.push_back(*CV); 5578 CurPointers.push_back(*CV); 5579 } 5580 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 5581 } else { 5582 assert(CI->capturesVariable() && "Expected captured reference."); 5583 CurBasePointers.push_back(*CV); 5584 CurPointers.push_back(*CV); 5585 5586 const ReferenceType *PtrTy = 5587 cast<ReferenceType>(RI->getType().getTypePtr()); 5588 QualType ElementType = PtrTy->getPointeeType(); 5589 CurSizes.push_back(CGF.getTypeSize(ElementType)); 5590 // The default map type for a scalar/complex type is 'to' because by 5591 // default the value doesn't have to be retrieved. For an aggregate 5592 // type, 5593 // the default is 'tofrom'. 5594 CurMapTypes.push_back(ElementType->isAggregateType() 5595 ? (MappableExprsHandler::OMP_MAP_TO | 5596 MappableExprsHandler::OMP_MAP_FROM) 5597 : MappableExprsHandler::OMP_MAP_TO); 5598 } 5599 } 5600 } 5601 // We expect to have at least an element of information for this capture. 5602 assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!"); 5603 assert(CurBasePointers.size() == CurPointers.size() && 5604 CurBasePointers.size() == CurSizes.size() && 5605 CurBasePointers.size() == CurMapTypes.size() && 5606 "Inconsistent map information sizes!"); 5607 5608 // The kernel args are always the first elements of the base pointers 5609 // associated with a capture. 5610 KernelArgs.push_back(CurBasePointers.front()); 5611 // We need to append the results of this capture to what we already have. 5612 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 5613 Pointers.append(CurPointers.begin(), CurPointers.end()); 5614 Sizes.append(CurSizes.begin(), CurSizes.end()); 5615 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 5616 } 5617 5618 // Keep track on whether the host function has to be executed. 5619 auto OffloadErrorQType = 5620 Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); 5621 auto OffloadError = CGF.MakeAddrLValue( 5622 CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), 5623 OffloadErrorQType); 5624 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), 5625 OffloadError); 5626 5627 // Fill up the pointer arrays and transfer execution to the device. 5628 auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, 5629 OutlinedFnID, OffloadError, OffloadErrorQType, 5630 &D](CodeGenFunction &CGF, PrePostActionTy &) { 5631 auto &RT = CGF.CGM.getOpenMPRuntime(); 5632 // Emit the offloading arrays. 5633 llvm::Value *BasePointersArray; 5634 llvm::Value *PointersArray; 5635 llvm::Value *SizesArray; 5636 llvm::Value *MapTypesArray; 5637 emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray, 5638 MapTypesArray, BasePointers, Pointers, Sizes, 5639 MapTypes); 5640 emitOffloadingArraysArgument(CGF, BasePointersArray, PointersArray, 5641 SizesArray, MapTypesArray, BasePointersArray, 5642 PointersArray, SizesArray, MapTypesArray, 5643 BasePointers.size()); 5644 5645 // On top of the arrays that were filled up, the target offloading call 5646 // takes as arguments the device id as well as the host pointer. The host 5647 // pointer is used by the runtime library to identify the current target 5648 // region, so it only has to be unique and not necessarily point to 5649 // anything. It could be the pointer to the outlined function that 5650 // implements the target region, but we aren't using that so that the 5651 // compiler doesn't need to keep that, and could therefore inline the host 5652 // function if proven worthwhile during optimization. 5653 5654 // From this point on, we need to have an ID of the target region defined. 5655 assert(OutlinedFnID && "Invalid outlined function ID!"); 5656 5657 // Emit device ID if any. 5658 llvm::Value *DeviceID; 5659 if (Device) 5660 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5661 CGF.Int32Ty, /*isSigned=*/true); 5662 else 5663 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 5664 5665 // Emit the number of elements in the offloading arrays. 5666 llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 5667 5668 // Return value of the runtime offloading call. 5669 llvm::Value *Return; 5670 5671 auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D); 5672 auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D); 5673 5674 // If we have NumTeams defined this means that we have an enclosed teams 5675 // region. Therefore we also expect to have ThreadLimit defined. These two 5676 // values should be defined in the presence of a teams directive, regardless 5677 // of having any clauses associated. If the user is using teams but no 5678 // clauses, these two values will be the default that should be passed to 5679 // the runtime library - a 32-bit integer with the value zero. 5680 if (NumTeams) { 5681 assert(ThreadLimit && "Thread limit expression should be available along " 5682 "with number of teams."); 5683 llvm::Value *OffloadingArgs[] = { 5684 DeviceID, OutlinedFnID, PointerNum, 5685 BasePointersArray, PointersArray, SizesArray, 5686 MapTypesArray, NumTeams, ThreadLimit}; 5687 Return = CGF.EmitRuntimeCall( 5688 RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); 5689 } else { 5690 llvm::Value *OffloadingArgs[] = { 5691 DeviceID, OutlinedFnID, PointerNum, BasePointersArray, 5692 PointersArray, SizesArray, MapTypesArray}; 5693 Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), 5694 OffloadingArgs); 5695 } 5696 5697 CGF.EmitStoreOfScalar(Return, OffloadError); 5698 }; 5699 5700 // Notify that the host version must be executed. 5701 auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) { 5702 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), 5703 OffloadError); 5704 }; 5705 5706 // If we have a target function ID it means that we need to support 5707 // offloading, otherwise, just execute on the host. We need to execute on host 5708 // regardless of the conditional in the if clause if, e.g., the user do not 5709 // specify target triples. 5710 if (OutlinedFnID) { 5711 if (IfCond) 5712 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 5713 else { 5714 RegionCodeGenTy ThenRCG(ThenGen); 5715 ThenRCG(CGF); 5716 } 5717 } else { 5718 RegionCodeGenTy ElseRCG(ElseGen); 5719 ElseRCG(CGF); 5720 } 5721 5722 // Check the error code and execute the host version if required. 5723 auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); 5724 auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); 5725 auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); 5726 auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); 5727 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 5728 5729 CGF.EmitBlock(OffloadFailedBlock); 5730 CGF.Builder.CreateCall(OutlinedFn, KernelArgs); 5731 CGF.EmitBranch(OffloadContBlock); 5732 5733 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 5734 } 5735 5736 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 5737 StringRef ParentName) { 5738 if (!S) 5739 return; 5740 5741 // If we find a OMP target directive, codegen the outline function and 5742 // register the result. 5743 // FIXME: Add other directives with target when they become supported. 5744 bool isTargetDirective = isa<OMPTargetDirective>(S); 5745 5746 if (isTargetDirective) { 5747 auto *E = cast<OMPExecutableDirective>(S); 5748 unsigned DeviceID; 5749 unsigned FileID; 5750 unsigned Line; 5751 getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID, 5752 FileID, Line); 5753 5754 // Is this a target region that should not be emitted as an entry point? If 5755 // so just signal we are done with this target region. 5756 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 5757 ParentName, Line)) 5758 return; 5759 5760 llvm::Function *Fn; 5761 llvm::Constant *Addr; 5762 std::tie(Fn, Addr) = 5763 CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( 5764 CGM, cast<OMPTargetDirective>(*E), ParentName, 5765 /*isOffloadEntry=*/true); 5766 assert(Fn && Addr && "Target region emission failed."); 5767 return; 5768 } 5769 5770 if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { 5771 if (!E->hasAssociatedStmt()) 5772 return; 5773 5774 scanForTargetRegionsFunctions( 5775 cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(), 5776 ParentName); 5777 return; 5778 } 5779 5780 // If this is a lambda function, look into its body. 5781 if (auto *L = dyn_cast<LambdaExpr>(S)) 5782 S = L->getBody(); 5783 5784 // Keep looking for target regions recursively. 5785 for (auto *II : S->children()) 5786 scanForTargetRegionsFunctions(II, ParentName); 5787 } 5788 5789 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 5790 auto &FD = *cast<FunctionDecl>(GD.getDecl()); 5791 5792 // If emitting code for the host, we do not process FD here. Instead we do 5793 // the normal code generation. 5794 if (!CGM.getLangOpts().OpenMPIsDevice) 5795 return false; 5796 5797 // Try to detect target regions in the function. 5798 scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); 5799 5800 // We should not emit any function othen that the ones created during the 5801 // scanning. Therefore, we signal that this function is completely dealt 5802 // with. 5803 return true; 5804 } 5805 5806 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 5807 if (!CGM.getLangOpts().OpenMPIsDevice) 5808 return false; 5809 5810 // Check if there are Ctors/Dtors in this declaration and look for target 5811 // regions in it. We use the complete variant to produce the kernel name 5812 // mangling. 5813 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 5814 if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 5815 for (auto *Ctor : RD->ctors()) { 5816 StringRef ParentName = 5817 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 5818 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 5819 } 5820 auto *Dtor = RD->getDestructor(); 5821 if (Dtor) { 5822 StringRef ParentName = 5823 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 5824 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 5825 } 5826 } 5827 5828 // If we are in target mode we do not emit any global (declare target is not 5829 // implemented yet). Therefore we signal that GD was processed in this case. 5830 return true; 5831 } 5832 5833 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 5834 auto *VD = GD.getDecl(); 5835 if (isa<FunctionDecl>(VD)) 5836 return emitTargetFunctions(GD); 5837 5838 return emitTargetGlobalVariable(GD); 5839 } 5840 5841 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 5842 // If we have offloading in the current module, we need to emit the entries 5843 // now and register the offloading descriptor. 5844 createOffloadEntriesAndInfoMetadata(); 5845 5846 // Create and register the offloading binary descriptors. This is the main 5847 // entity that captures all the information about offloading in the current 5848 // compilation unit. 5849 return createOffloadingBinaryDescriptorRegistration(); 5850 } 5851 5852 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 5853 const OMPExecutableDirective &D, 5854 SourceLocation Loc, 5855 llvm::Value *OutlinedFn, 5856 ArrayRef<llvm::Value *> CapturedVars) { 5857 if (!CGF.HaveInsertPoint()) 5858 return; 5859 5860 auto *RTLoc = emitUpdateLocation(CGF, Loc); 5861 CodeGenFunction::RunCleanupsScope Scope(CGF); 5862 5863 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 5864 llvm::Value *Args[] = { 5865 RTLoc, 5866 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 5867 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 5868 llvm::SmallVector<llvm::Value *, 16> RealArgs; 5869 RealArgs.append(std::begin(Args), std::end(Args)); 5870 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 5871 5872 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 5873 CGF.EmitRuntimeCall(RTLFn, RealArgs); 5874 } 5875 5876 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 5877 const Expr *NumTeams, 5878 const Expr *ThreadLimit, 5879 SourceLocation Loc) { 5880 if (!CGF.HaveInsertPoint()) 5881 return; 5882 5883 auto *RTLoc = emitUpdateLocation(CGF, Loc); 5884 5885 llvm::Value *NumTeamsVal = 5886 (NumTeams) 5887 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 5888 CGF.CGM.Int32Ty, /* isSigned = */ true) 5889 : CGF.Builder.getInt32(0); 5890 5891 llvm::Value *ThreadLimitVal = 5892 (ThreadLimit) 5893 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 5894 CGF.CGM.Int32Ty, /* isSigned = */ true) 5895 : CGF.Builder.getInt32(0); 5896 5897 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 5898 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 5899 ThreadLimitVal}; 5900 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 5901 PushNumTeamsArgs); 5902 } 5903 5904 void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF, 5905 const OMPExecutableDirective &D, 5906 const Expr *IfCond, 5907 const Expr *Device, 5908 const RegionCodeGenTy &CodeGen) { 5909 5910 if (!CGF.HaveInsertPoint()) 5911 return; 5912 5913 llvm::Value *BasePointersArray = nullptr; 5914 llvm::Value *PointersArray = nullptr; 5915 llvm::Value *SizesArray = nullptr; 5916 llvm::Value *MapTypesArray = nullptr; 5917 unsigned NumOfPtrs = 0; 5918 5919 // Generate the code for the opening of the data environment. Capture all the 5920 // arguments of the runtime call by reference because they are used in the 5921 // closing of the region. 5922 auto &&BeginThenGen = [&D, &CGF, &BasePointersArray, &PointersArray, 5923 &SizesArray, &MapTypesArray, Device, 5924 &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) { 5925 // Fill up the arrays with all the mapped variables. 5926 MappableExprsHandler::MapValuesArrayTy BasePointers; 5927 MappableExprsHandler::MapValuesArrayTy Pointers; 5928 MappableExprsHandler::MapValuesArrayTy Sizes; 5929 MappableExprsHandler::MapFlagsArrayTy MapTypes; 5930 5931 // Get map clause information. 5932 MappableExprsHandler MCHandler(D, CGF); 5933 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 5934 NumOfPtrs = BasePointers.size(); 5935 5936 // Fill up the arrays and create the arguments. 5937 emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray, 5938 MapTypesArray, BasePointers, Pointers, Sizes, 5939 MapTypes); 5940 5941 llvm::Value *BasePointersArrayArg = nullptr; 5942 llvm::Value *PointersArrayArg = nullptr; 5943 llvm::Value *SizesArrayArg = nullptr; 5944 llvm::Value *MapTypesArrayArg = nullptr; 5945 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 5946 SizesArrayArg, MapTypesArrayArg, 5947 BasePointersArray, PointersArray, SizesArray, 5948 MapTypesArray, NumOfPtrs); 5949 5950 // Emit device ID if any. 5951 llvm::Value *DeviceID = nullptr; 5952 if (Device) 5953 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5954 CGF.Int32Ty, /*isSigned=*/true); 5955 else 5956 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 5957 5958 // Emit the number of elements in the offloading arrays. 5959 auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs); 5960 5961 llvm::Value *OffloadingArgs[] = { 5962 DeviceID, PointerNum, BasePointersArrayArg, 5963 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 5964 auto &RT = CGF.CGM.getOpenMPRuntime(); 5965 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), 5966 OffloadingArgs); 5967 }; 5968 5969 // Generate code for the closing of the data region. 5970 auto &&EndThenGen = [&CGF, &BasePointersArray, &PointersArray, &SizesArray, 5971 &MapTypesArray, Device, 5972 &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) { 5973 assert(BasePointersArray && PointersArray && SizesArray && MapTypesArray && 5974 NumOfPtrs && "Invalid data environment closing arguments."); 5975 5976 llvm::Value *BasePointersArrayArg = nullptr; 5977 llvm::Value *PointersArrayArg = nullptr; 5978 llvm::Value *SizesArrayArg = nullptr; 5979 llvm::Value *MapTypesArrayArg = nullptr; 5980 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 5981 SizesArrayArg, MapTypesArrayArg, 5982 BasePointersArray, PointersArray, SizesArray, 5983 MapTypesArray, NumOfPtrs); 5984 5985 // Emit device ID if any. 5986 llvm::Value *DeviceID = nullptr; 5987 if (Device) 5988 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5989 CGF.Int32Ty, /*isSigned=*/true); 5990 else 5991 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 5992 5993 // Emit the number of elements in the offloading arrays. 5994 auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs); 5995 5996 llvm::Value *OffloadingArgs[] = { 5997 DeviceID, PointerNum, BasePointersArrayArg, 5998 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 5999 auto &RT = CGF.CGM.getOpenMPRuntime(); 6000 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end), 6001 OffloadingArgs); 6002 }; 6003 6004 // In the event we get an if clause, we don't have to take any action on the 6005 // else side. 6006 auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 6007 6008 if (IfCond) { 6009 emitOMPIfClause(CGF, IfCond, BeginThenGen, ElseGen); 6010 } else { 6011 RegionCodeGenTy BeginThenRCG(BeginThenGen); 6012 BeginThenRCG(CGF); 6013 } 6014 6015 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, CodeGen); 6016 6017 if (IfCond) { 6018 emitOMPIfClause(CGF, IfCond, EndThenGen, ElseGen); 6019 } else { 6020 RegionCodeGenTy EndThenRCG(EndThenGen); 6021 EndThenRCG(CGF); 6022 } 6023 } 6024 6025 void CGOpenMPRuntime::emitTargetEnterOrExitDataCall( 6026 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 6027 const Expr *Device) { 6028 if (!CGF.HaveInsertPoint()) 6029 return; 6030 6031 assert((isa<OMPTargetEnterDataDirective>(D) || 6032 isa<OMPTargetExitDataDirective>(D)) && 6033 "Expecting either target enter or exit data directives."); 6034 6035 // Generate the code for the opening of the data environment. 6036 auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) { 6037 // Fill up the arrays with all the mapped variables. 6038 MappableExprsHandler::MapValuesArrayTy BasePointers; 6039 MappableExprsHandler::MapValuesArrayTy Pointers; 6040 MappableExprsHandler::MapValuesArrayTy Sizes; 6041 MappableExprsHandler::MapFlagsArrayTy MapTypes; 6042 6043 // Get map clause information. 6044 MappableExprsHandler MCHandler(D, CGF); 6045 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 6046 6047 llvm::Value *BasePointersArrayArg = nullptr; 6048 llvm::Value *PointersArrayArg = nullptr; 6049 llvm::Value *SizesArrayArg = nullptr; 6050 llvm::Value *MapTypesArrayArg = nullptr; 6051 6052 // Fill up the arrays and create the arguments. 6053 emitOffloadingArrays(CGF, BasePointersArrayArg, PointersArrayArg, 6054 SizesArrayArg, MapTypesArrayArg, BasePointers, 6055 Pointers, Sizes, MapTypes); 6056 emitOffloadingArraysArgument( 6057 CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, 6058 MapTypesArrayArg, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, 6059 MapTypesArrayArg, BasePointers.size()); 6060 6061 // Emit device ID if any. 6062 llvm::Value *DeviceID = nullptr; 6063 if (Device) 6064 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6065 CGF.Int32Ty, /*isSigned=*/true); 6066 else 6067 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6068 6069 // Emit the number of elements in the offloading arrays. 6070 auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 6071 6072 llvm::Value *OffloadingArgs[] = { 6073 DeviceID, PointerNum, BasePointersArrayArg, 6074 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 6075 auto &RT = CGF.CGM.getOpenMPRuntime(); 6076 CGF.EmitRuntimeCall( 6077 RT.createRuntimeFunction(isa<OMPTargetEnterDataDirective>(D) 6078 ? OMPRTL__tgt_target_data_begin 6079 : OMPRTL__tgt_target_data_end), 6080 OffloadingArgs); 6081 }; 6082 6083 // In the event we get an if clause, we don't have to take any action on the 6084 // else side. 6085 auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 6086 6087 if (IfCond) { 6088 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 6089 } else { 6090 RegionCodeGenTy ThenGenRCG(ThenGen); 6091 ThenGenRCG(CGF); 6092 } 6093 } 6094 6095 namespace { 6096 /// Kind of parameter in a function with 'declare simd' directive. 6097 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 6098 /// Attribute set of the parameter. 6099 struct ParamAttrTy { 6100 ParamKindTy Kind = Vector; 6101 llvm::APSInt StrideOrArg; 6102 llvm::APSInt Alignment; 6103 }; 6104 } // namespace 6105 6106 static unsigned evaluateCDTSize(const FunctionDecl *FD, 6107 ArrayRef<ParamAttrTy> ParamAttrs) { 6108 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 6109 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 6110 // of that clause. The VLEN value must be power of 2. 6111 // In other case the notion of the function`s "characteristic data type" (CDT) 6112 // is used to compute the vector length. 6113 // CDT is defined in the following order: 6114 // a) For non-void function, the CDT is the return type. 6115 // b) If the function has any non-uniform, non-linear parameters, then the 6116 // CDT is the type of the first such parameter. 6117 // c) If the CDT determined by a) or b) above is struct, union, or class 6118 // type which is pass-by-value (except for the type that maps to the 6119 // built-in complex data type), the characteristic data type is int. 6120 // d) If none of the above three cases is applicable, the CDT is int. 6121 // The VLEN is then determined based on the CDT and the size of vector 6122 // register of that ISA for which current vector version is generated. The 6123 // VLEN is computed using the formula below: 6124 // VLEN = sizeof(vector_register) / sizeof(CDT), 6125 // where vector register size specified in section 3.2.1 Registers and the 6126 // Stack Frame of original AMD64 ABI document. 6127 QualType RetType = FD->getReturnType(); 6128 if (RetType.isNull()) 6129 return 0; 6130 ASTContext &C = FD->getASTContext(); 6131 QualType CDT; 6132 if (!RetType.isNull() && !RetType->isVoidType()) 6133 CDT = RetType; 6134 else { 6135 unsigned Offset = 0; 6136 if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 6137 if (ParamAttrs[Offset].Kind == Vector) 6138 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 6139 ++Offset; 6140 } 6141 if (CDT.isNull()) { 6142 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 6143 if (ParamAttrs[I + Offset].Kind == Vector) { 6144 CDT = FD->getParamDecl(I)->getType(); 6145 break; 6146 } 6147 } 6148 } 6149 } 6150 if (CDT.isNull()) 6151 CDT = C.IntTy; 6152 CDT = CDT->getCanonicalTypeUnqualified(); 6153 if (CDT->isRecordType() || CDT->isUnionType()) 6154 CDT = C.IntTy; 6155 return C.getTypeSize(CDT); 6156 } 6157 6158 static void 6159 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 6160 llvm::APSInt VLENVal, 6161 ArrayRef<ParamAttrTy> ParamAttrs, 6162 OMPDeclareSimdDeclAttr::BranchStateTy State) { 6163 struct ISADataTy { 6164 char ISA; 6165 unsigned VecRegSize; 6166 }; 6167 ISADataTy ISAData[] = { 6168 { 6169 'b', 128 6170 }, // SSE 6171 { 6172 'c', 256 6173 }, // AVX 6174 { 6175 'd', 256 6176 }, // AVX2 6177 { 6178 'e', 512 6179 }, // AVX512 6180 }; 6181 llvm::SmallVector<char, 2> Masked; 6182 switch (State) { 6183 case OMPDeclareSimdDeclAttr::BS_Undefined: 6184 Masked.push_back('N'); 6185 Masked.push_back('M'); 6186 break; 6187 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 6188 Masked.push_back('N'); 6189 break; 6190 case OMPDeclareSimdDeclAttr::BS_Inbranch: 6191 Masked.push_back('M'); 6192 break; 6193 } 6194 for (auto Mask : Masked) { 6195 for (auto &Data : ISAData) { 6196 SmallString<256> Buffer; 6197 llvm::raw_svector_ostream Out(Buffer); 6198 Out << "_ZGV" << Data.ISA << Mask; 6199 if (!VLENVal) { 6200 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 6201 evaluateCDTSize(FD, ParamAttrs)); 6202 } else 6203 Out << VLENVal; 6204 for (auto &ParamAttr : ParamAttrs) { 6205 switch (ParamAttr.Kind){ 6206 case LinearWithVarStride: 6207 Out << 's' << ParamAttr.StrideOrArg; 6208 break; 6209 case Linear: 6210 Out << 'l'; 6211 if (!!ParamAttr.StrideOrArg) 6212 Out << ParamAttr.StrideOrArg; 6213 break; 6214 case Uniform: 6215 Out << 'u'; 6216 break; 6217 case Vector: 6218 Out << 'v'; 6219 break; 6220 } 6221 if (!!ParamAttr.Alignment) 6222 Out << 'a' << ParamAttr.Alignment; 6223 } 6224 Out << '_' << Fn->getName(); 6225 Fn->addFnAttr(Out.str()); 6226 } 6227 } 6228 } 6229 6230 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 6231 llvm::Function *Fn) { 6232 ASTContext &C = CGM.getContext(); 6233 FD = FD->getCanonicalDecl(); 6234 // Map params to their positions in function decl. 6235 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 6236 if (isa<CXXMethodDecl>(FD)) 6237 ParamPositions.insert({FD, 0}); 6238 unsigned ParamPos = ParamPositions.size(); 6239 for (auto *P : FD->params()) { 6240 ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); 6241 ++ParamPos; 6242 } 6243 for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 6244 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 6245 // Mark uniform parameters. 6246 for (auto *E : Attr->uniforms()) { 6247 E = E->IgnoreParenImpCasts(); 6248 unsigned Pos; 6249 if (isa<CXXThisExpr>(E)) 6250 Pos = ParamPositions[FD]; 6251 else { 6252 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6253 ->getCanonicalDecl(); 6254 Pos = ParamPositions[PVD]; 6255 } 6256 ParamAttrs[Pos].Kind = Uniform; 6257 } 6258 // Get alignment info. 6259 auto NI = Attr->alignments_begin(); 6260 for (auto *E : Attr->aligneds()) { 6261 E = E->IgnoreParenImpCasts(); 6262 unsigned Pos; 6263 QualType ParmTy; 6264 if (isa<CXXThisExpr>(E)) { 6265 Pos = ParamPositions[FD]; 6266 ParmTy = E->getType(); 6267 } else { 6268 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6269 ->getCanonicalDecl(); 6270 Pos = ParamPositions[PVD]; 6271 ParmTy = PVD->getType(); 6272 } 6273 ParamAttrs[Pos].Alignment = 6274 (*NI) ? (*NI)->EvaluateKnownConstInt(C) 6275 : llvm::APSInt::getUnsigned( 6276 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 6277 .getQuantity()); 6278 ++NI; 6279 } 6280 // Mark linear parameters. 6281 auto SI = Attr->steps_begin(); 6282 auto MI = Attr->modifiers_begin(); 6283 for (auto *E : Attr->linears()) { 6284 E = E->IgnoreParenImpCasts(); 6285 unsigned Pos; 6286 if (isa<CXXThisExpr>(E)) 6287 Pos = ParamPositions[FD]; 6288 else { 6289 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6290 ->getCanonicalDecl(); 6291 Pos = ParamPositions[PVD]; 6292 } 6293 auto &ParamAttr = ParamAttrs[Pos]; 6294 ParamAttr.Kind = Linear; 6295 if (*SI) { 6296 if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, 6297 Expr::SE_AllowSideEffects)) { 6298 if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 6299 if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 6300 ParamAttr.Kind = LinearWithVarStride; 6301 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 6302 ParamPositions[StridePVD->getCanonicalDecl()]); 6303 } 6304 } 6305 } 6306 } 6307 ++SI; 6308 ++MI; 6309 } 6310 llvm::APSInt VLENVal; 6311 if (const Expr *VLEN = Attr->getSimdlen()) 6312 VLENVal = VLEN->EvaluateKnownConstInt(C); 6313 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 6314 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 6315 CGM.getTriple().getArch() == llvm::Triple::x86_64) 6316 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 6317 } 6318 } 6319