1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/BitmaskEnum.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/CallSite.h" 25 #include "llvm/IR/DerivedTypes.h" 26 #include "llvm/IR/GlobalValue.h" 27 #include "llvm/IR/Value.h" 28 #include "llvm/Support/Format.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <cassert> 31 32 using namespace clang; 33 using namespace CodeGen; 34 35 namespace { 36 /// \brief Base class for handling code generation inside OpenMP regions. 37 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 38 public: 39 /// \brief Kinds of OpenMP regions used in codegen. 40 enum CGOpenMPRegionKind { 41 /// \brief Region with outlined function for standalone 'parallel' 42 /// directive. 43 ParallelOutlinedRegion, 44 /// \brief Region with outlined function for standalone 'task' directive. 45 TaskOutlinedRegion, 46 /// \brief Region for constructs that do not require function outlining, 47 /// like 'for', 'sections', 'atomic' etc. directives. 48 InlinedRegion, 49 /// \brief Region with outlined function for standalone 'target' directive. 50 TargetRegion, 51 }; 52 53 CGOpenMPRegionInfo(const CapturedStmt &CS, 54 const CGOpenMPRegionKind RegionKind, 55 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 56 bool HasCancel) 57 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 58 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 59 60 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 61 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 62 bool HasCancel) 63 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 64 Kind(Kind), HasCancel(HasCancel) {} 65 66 /// \brief Get a variable or parameter for storing global thread id 67 /// inside OpenMP construct. 68 virtual const VarDecl *getThreadIDVariable() const = 0; 69 70 /// \brief Emit the captured statement body. 71 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 72 73 /// \brief Get an LValue for the current ThreadID variable. 74 /// \return LValue for thread id variable. This LValue always has type int32*. 75 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 76 77 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 78 79 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 80 81 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 82 83 bool hasCancel() const { return HasCancel; } 84 85 static bool classof(const CGCapturedStmtInfo *Info) { 86 return Info->getKind() == CR_OpenMP; 87 } 88 89 ~CGOpenMPRegionInfo() override = default; 90 91 protected: 92 CGOpenMPRegionKind RegionKind; 93 RegionCodeGenTy CodeGen; 94 OpenMPDirectiveKind Kind; 95 bool HasCancel; 96 }; 97 98 /// \brief API for captured statement code generation in OpenMP constructs. 99 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 100 public: 101 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 102 const RegionCodeGenTy &CodeGen, 103 OpenMPDirectiveKind Kind, bool HasCancel, 104 StringRef HelperName) 105 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 106 HasCancel), 107 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 108 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 109 } 110 111 /// \brief Get a variable or parameter for storing global thread id 112 /// inside OpenMP construct. 113 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 114 115 /// \brief Get the name of the capture helper. 116 StringRef getHelperName() const override { return HelperName; } 117 118 static bool classof(const CGCapturedStmtInfo *Info) { 119 return CGOpenMPRegionInfo::classof(Info) && 120 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 121 ParallelOutlinedRegion; 122 } 123 124 private: 125 /// \brief A variable or parameter storing global thread id for OpenMP 126 /// constructs. 127 const VarDecl *ThreadIDVar; 128 StringRef HelperName; 129 }; 130 131 /// \brief API for captured statement code generation in OpenMP constructs. 132 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 133 public: 134 class UntiedTaskActionTy final : public PrePostActionTy { 135 bool Untied; 136 const VarDecl *PartIDVar; 137 const RegionCodeGenTy UntiedCodeGen; 138 llvm::SwitchInst *UntiedSwitch = nullptr; 139 140 public: 141 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 142 const RegionCodeGenTy &UntiedCodeGen) 143 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 144 void Enter(CodeGenFunction &CGF) override { 145 if (Untied) { 146 // Emit task switching point. 147 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 148 CGF.GetAddrOfLocalVar(PartIDVar), 149 PartIDVar->getType()->castAs<PointerType>()); 150 auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 auto *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// \brief Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// \brief Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// \brief Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// \brief A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// \brief API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // \brief Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// \brief Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// \brief Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// \brief Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// \brief Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// \brief CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// \brief API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// \brief This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// \brief Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// \brief API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { 359 return CGF.EmitLValue(&DRE).getAddress(); 360 }); 361 } 362 (void)PrivScope.Privatize(); 363 } 364 365 /// \brief Lookup the captured field decl for a variable. 366 const FieldDecl *lookup(const VarDecl *VD) const override { 367 if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 368 return FD; 369 return nullptr; 370 } 371 372 /// \brief Emit the captured statement body. 373 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 374 llvm_unreachable("No body for expressions"); 375 } 376 377 /// \brief Get a variable or parameter for storing global thread id 378 /// inside OpenMP construct. 379 const VarDecl *getThreadIDVariable() const override { 380 llvm_unreachable("No thread id for expressions"); 381 } 382 383 /// \brief Get the name of the capture helper. 384 StringRef getHelperName() const override { 385 llvm_unreachable("No helper name for expressions"); 386 } 387 388 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 389 390 private: 391 /// Private scope to capture global variables. 392 CodeGenFunction::OMPPrivateScope PrivScope; 393 }; 394 395 /// \brief RAII for emitting code of OpenMP constructs. 396 class InlinedOpenMPRegionRAII { 397 CodeGenFunction &CGF; 398 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 399 FieldDecl *LambdaThisCaptureField = nullptr; 400 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 401 402 public: 403 /// \brief Constructs region for combined constructs. 404 /// \param CodeGen Code generation sequence for combined directives. Includes 405 /// a list of functions used for code generation of implicitly inlined 406 /// regions. 407 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 408 OpenMPDirectiveKind Kind, bool HasCancel) 409 : CGF(CGF) { 410 // Start emission for the construct. 411 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 412 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 413 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 414 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 415 CGF.LambdaThisCaptureField = nullptr; 416 BlockInfo = CGF.BlockInfo; 417 CGF.BlockInfo = nullptr; 418 } 419 420 ~InlinedOpenMPRegionRAII() { 421 // Restore original CapturedStmtInfo only if we're done with code emission. 422 auto *OldCSI = 423 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 424 delete CGF.CapturedStmtInfo; 425 CGF.CapturedStmtInfo = OldCSI; 426 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 427 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 428 CGF.BlockInfo = BlockInfo; 429 } 430 }; 431 432 /// \brief Values for bit flags used in the ident_t to describe the fields. 433 /// All enumeric elements are named and described in accordance with the code 434 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 435 enum OpenMPLocationFlags : unsigned { 436 /// \brief Use trampoline for internal microtask. 437 OMP_IDENT_IMD = 0x01, 438 /// \brief Use c-style ident structure. 439 OMP_IDENT_KMPC = 0x02, 440 /// \brief Atomic reduction option for kmpc_reduce. 441 OMP_ATOMIC_REDUCE = 0x10, 442 /// \brief Explicit 'barrier' directive. 443 OMP_IDENT_BARRIER_EXPL = 0x20, 444 /// \brief Implicit barrier in code. 445 OMP_IDENT_BARRIER_IMPL = 0x40, 446 /// \brief Implicit barrier in 'for' directive. 447 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 448 /// \brief Implicit barrier in 'sections' directive. 449 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 450 /// \brief Implicit barrier in 'single' directive. 451 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 452 /// Call of __kmp_for_static_init for static loop. 453 OMP_IDENT_WORK_LOOP = 0x200, 454 /// Call of __kmp_for_static_init for sections. 455 OMP_IDENT_WORK_SECTIONS = 0x400, 456 /// Call of __kmp_for_static_init for distribute. 457 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 458 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 459 }; 460 461 /// \brief Describes ident structure that describes a source location. 462 /// All descriptions are taken from 463 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 464 /// Original structure: 465 /// typedef struct ident { 466 /// kmp_int32 reserved_1; /**< might be used in Fortran; 467 /// see above */ 468 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 469 /// KMP_IDENT_KMPC identifies this union 470 /// member */ 471 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 472 /// see above */ 473 ///#if USE_ITT_BUILD 474 /// /* but currently used for storing 475 /// region-specific ITT */ 476 /// /* contextual information. */ 477 ///#endif /* USE_ITT_BUILD */ 478 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 479 /// C++ */ 480 /// char const *psource; /**< String describing the source location. 481 /// The string is composed of semi-colon separated 482 // fields which describe the source file, 483 /// the function and a pair of line numbers that 484 /// delimit the construct. 485 /// */ 486 /// } ident_t; 487 enum IdentFieldIndex { 488 /// \brief might be used in Fortran 489 IdentField_Reserved_1, 490 /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 491 IdentField_Flags, 492 /// \brief Not really used in Fortran any more 493 IdentField_Reserved_2, 494 /// \brief Source[4] in Fortran, do not use for C++ 495 IdentField_Reserved_3, 496 /// \brief String describing the source location. The string is composed of 497 /// semi-colon separated fields which describe the source file, the function 498 /// and a pair of line numbers that delimit the construct. 499 IdentField_PSource 500 }; 501 502 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 503 /// the enum sched_type in kmp.h). 504 enum OpenMPSchedType { 505 /// \brief Lower bound for default (unordered) versions. 506 OMP_sch_lower = 32, 507 OMP_sch_static_chunked = 33, 508 OMP_sch_static = 34, 509 OMP_sch_dynamic_chunked = 35, 510 OMP_sch_guided_chunked = 36, 511 OMP_sch_runtime = 37, 512 OMP_sch_auto = 38, 513 /// static with chunk adjustment (e.g., simd) 514 OMP_sch_static_balanced_chunked = 45, 515 /// \brief Lower bound for 'ordered' versions. 516 OMP_ord_lower = 64, 517 OMP_ord_static_chunked = 65, 518 OMP_ord_static = 66, 519 OMP_ord_dynamic_chunked = 67, 520 OMP_ord_guided_chunked = 68, 521 OMP_ord_runtime = 69, 522 OMP_ord_auto = 70, 523 OMP_sch_default = OMP_sch_static, 524 /// \brief dist_schedule types 525 OMP_dist_sch_static_chunked = 91, 526 OMP_dist_sch_static = 92, 527 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 528 /// Set if the monotonic schedule modifier was present. 529 OMP_sch_modifier_monotonic = (1 << 29), 530 /// Set if the nonmonotonic schedule modifier was present. 531 OMP_sch_modifier_nonmonotonic = (1 << 30), 532 }; 533 534 enum OpenMPRTLFunction { 535 /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 536 /// kmpc_micro microtask, ...); 537 OMPRTL__kmpc_fork_call, 538 /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, 539 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 540 OMPRTL__kmpc_threadprivate_cached, 541 /// \brief Call to void __kmpc_threadprivate_register( ident_t *, 542 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 543 OMPRTL__kmpc_threadprivate_register, 544 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 545 OMPRTL__kmpc_global_thread_num, 546 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 547 // kmp_critical_name *crit); 548 OMPRTL__kmpc_critical, 549 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 550 // global_tid, kmp_critical_name *crit, uintptr_t hint); 551 OMPRTL__kmpc_critical_with_hint, 552 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 553 // kmp_critical_name *crit); 554 OMPRTL__kmpc_end_critical, 555 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 556 // global_tid); 557 OMPRTL__kmpc_cancel_barrier, 558 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 559 OMPRTL__kmpc_barrier, 560 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 561 OMPRTL__kmpc_for_static_fini, 562 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 563 // global_tid); 564 OMPRTL__kmpc_serialized_parallel, 565 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 566 // global_tid); 567 OMPRTL__kmpc_end_serialized_parallel, 568 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 569 // kmp_int32 num_threads); 570 OMPRTL__kmpc_push_num_threads, 571 // Call to void __kmpc_flush(ident_t *loc); 572 OMPRTL__kmpc_flush, 573 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 574 OMPRTL__kmpc_master, 575 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 576 OMPRTL__kmpc_end_master, 577 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 578 // int end_part); 579 OMPRTL__kmpc_omp_taskyield, 580 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 581 OMPRTL__kmpc_single, 582 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 583 OMPRTL__kmpc_end_single, 584 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 585 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 586 // kmp_routine_entry_t *task_entry); 587 OMPRTL__kmpc_omp_task_alloc, 588 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 589 // new_task); 590 OMPRTL__kmpc_omp_task, 591 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 592 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 593 // kmp_int32 didit); 594 OMPRTL__kmpc_copyprivate, 595 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 596 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 597 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 598 OMPRTL__kmpc_reduce, 599 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 600 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 601 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 602 // *lck); 603 OMPRTL__kmpc_reduce_nowait, 604 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 605 // kmp_critical_name *lck); 606 OMPRTL__kmpc_end_reduce, 607 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 608 // kmp_critical_name *lck); 609 OMPRTL__kmpc_end_reduce_nowait, 610 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 611 // kmp_task_t * new_task); 612 OMPRTL__kmpc_omp_task_begin_if0, 613 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 614 // kmp_task_t * new_task); 615 OMPRTL__kmpc_omp_task_complete_if0, 616 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 617 OMPRTL__kmpc_ordered, 618 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 619 OMPRTL__kmpc_end_ordered, 620 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 621 // global_tid); 622 OMPRTL__kmpc_omp_taskwait, 623 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 624 OMPRTL__kmpc_taskgroup, 625 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 626 OMPRTL__kmpc_end_taskgroup, 627 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 628 // int proc_bind); 629 OMPRTL__kmpc_push_proc_bind, 630 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 631 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 632 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 633 OMPRTL__kmpc_omp_task_with_deps, 634 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 635 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 636 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 637 OMPRTL__kmpc_omp_wait_deps, 638 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 639 // global_tid, kmp_int32 cncl_kind); 640 OMPRTL__kmpc_cancellationpoint, 641 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 642 // kmp_int32 cncl_kind); 643 OMPRTL__kmpc_cancel, 644 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 645 // kmp_int32 num_teams, kmp_int32 thread_limit); 646 OMPRTL__kmpc_push_num_teams, 647 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 648 // microtask, ...); 649 OMPRTL__kmpc_fork_teams, 650 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 651 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 652 // sched, kmp_uint64 grainsize, void *task_dup); 653 OMPRTL__kmpc_taskloop, 654 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 655 // num_dims, struct kmp_dim *dims); 656 OMPRTL__kmpc_doacross_init, 657 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 658 OMPRTL__kmpc_doacross_fini, 659 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 660 // *vec); 661 OMPRTL__kmpc_doacross_post, 662 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 663 // *vec); 664 OMPRTL__kmpc_doacross_wait, 665 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 666 // *data); 667 OMPRTL__kmpc_task_reduction_init, 668 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 669 // *d); 670 OMPRTL__kmpc_task_reduction_get_th_data, 671 672 // 673 // Offloading related calls 674 // 675 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 676 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 677 // *arg_types); 678 OMPRTL__tgt_target, 679 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 680 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 681 // *arg_types); 682 OMPRTL__tgt_target_nowait, 683 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 684 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 685 // *arg_types, int32_t num_teams, int32_t thread_limit); 686 OMPRTL__tgt_target_teams, 687 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 688 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 689 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 690 OMPRTL__tgt_target_teams_nowait, 691 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 692 OMPRTL__tgt_register_lib, 693 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 694 OMPRTL__tgt_unregister_lib, 695 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 696 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 697 OMPRTL__tgt_target_data_begin, 698 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 699 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 700 // *arg_types); 701 OMPRTL__tgt_target_data_begin_nowait, 702 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 703 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 704 OMPRTL__tgt_target_data_end, 705 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 706 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 707 // *arg_types); 708 OMPRTL__tgt_target_data_end_nowait, 709 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 710 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 711 OMPRTL__tgt_target_data_update, 712 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 713 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 714 // *arg_types); 715 OMPRTL__tgt_target_data_update_nowait, 716 }; 717 718 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 719 /// region. 720 class CleanupTy final : public EHScopeStack::Cleanup { 721 PrePostActionTy *Action; 722 723 public: 724 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 725 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 726 if (!CGF.HaveInsertPoint()) 727 return; 728 Action->Exit(CGF); 729 } 730 }; 731 732 } // anonymous namespace 733 734 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 735 CodeGenFunction::RunCleanupsScope Scope(CGF); 736 if (PrePostAction) { 737 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 738 Callback(CodeGen, CGF, *PrePostAction); 739 } else { 740 PrePostActionTy Action; 741 Callback(CodeGen, CGF, Action); 742 } 743 } 744 745 /// Check if the combiner is a call to UDR combiner and if it is so return the 746 /// UDR decl used for reduction. 747 static const OMPDeclareReductionDecl * 748 getReductionInit(const Expr *ReductionOp) { 749 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 750 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 751 if (auto *DRE = 752 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 753 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 754 return DRD; 755 return nullptr; 756 } 757 758 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 759 const OMPDeclareReductionDecl *DRD, 760 const Expr *InitOp, 761 Address Private, Address Original, 762 QualType Ty) { 763 if (DRD->getInitializer()) { 764 std::pair<llvm::Function *, llvm::Function *> Reduction = 765 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 766 auto *CE = cast<CallExpr>(InitOp); 767 auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 768 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 769 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 770 auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 771 auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 772 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 773 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 774 [=]() -> Address { return Private; }); 775 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 776 [=]() -> Address { return Original; }); 777 (void)PrivateScope.Privatize(); 778 RValue Func = RValue::get(Reduction.second); 779 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 780 CGF.EmitIgnoredExpr(InitOp); 781 } else { 782 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 783 auto *GV = new llvm::GlobalVariable( 784 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 785 llvm::GlobalValue::PrivateLinkage, Init, ".init"); 786 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 787 RValue InitRVal; 788 switch (CGF.getEvaluationKind(Ty)) { 789 case TEK_Scalar: 790 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 791 break; 792 case TEK_Complex: 793 InitRVal = 794 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 795 break; 796 case TEK_Aggregate: 797 InitRVal = RValue::getAggregate(LV.getAddress()); 798 break; 799 } 800 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 801 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 802 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 803 /*IsInitializer=*/false); 804 } 805 } 806 807 /// \brief Emit initialization of arrays of complex types. 808 /// \param DestAddr Address of the array. 809 /// \param Type Type of array. 810 /// \param Init Initial expression of array. 811 /// \param SrcAddr Address of the original array. 812 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 813 QualType Type, bool EmitDeclareReductionInit, 814 const Expr *Init, 815 const OMPDeclareReductionDecl *DRD, 816 Address SrcAddr = Address::invalid()) { 817 // Perform element-by-element initialization. 818 QualType ElementTy; 819 820 // Drill down to the base element type on both arrays. 821 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 822 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 823 DestAddr = 824 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 825 if (DRD) 826 SrcAddr = 827 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 828 829 llvm::Value *SrcBegin = nullptr; 830 if (DRD) 831 SrcBegin = SrcAddr.getPointer(); 832 auto DestBegin = DestAddr.getPointer(); 833 // Cast from pointer to array type to pointer to single element. 834 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 835 // The basic structure here is a while-do loop. 836 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 837 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 838 auto IsEmpty = 839 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 840 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 841 842 // Enter the loop body, making that address the current address. 843 auto EntryBB = CGF.Builder.GetInsertBlock(); 844 CGF.EmitBlock(BodyBB); 845 846 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 847 848 llvm::PHINode *SrcElementPHI = nullptr; 849 Address SrcElementCurrent = Address::invalid(); 850 if (DRD) { 851 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 852 "omp.arraycpy.srcElementPast"); 853 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 854 SrcElementCurrent = 855 Address(SrcElementPHI, 856 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 857 } 858 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 859 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 860 DestElementPHI->addIncoming(DestBegin, EntryBB); 861 Address DestElementCurrent = 862 Address(DestElementPHI, 863 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 864 865 // Emit copy. 866 { 867 CodeGenFunction::RunCleanupsScope InitScope(CGF); 868 if (EmitDeclareReductionInit) { 869 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 870 SrcElementCurrent, ElementTy); 871 } else 872 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 873 /*IsInitializer=*/false); 874 } 875 876 if (DRD) { 877 // Shift the address forward by one element. 878 auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( 879 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 880 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 881 } 882 883 // Shift the address forward by one element. 884 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 885 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 886 // Check whether we've reached the end. 887 auto Done = 888 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 889 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 890 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 891 892 // Done. 893 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 894 } 895 896 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 897 return CGF.EmitOMPSharedLValue(E); 898 } 899 900 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 901 const Expr *E) { 902 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 903 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 904 return LValue(); 905 } 906 907 void ReductionCodeGen::emitAggregateInitialization( 908 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 909 const OMPDeclareReductionDecl *DRD) { 910 // Emit VarDecl with copy init for arrays. 911 // Get the address of the original variable captured in current 912 // captured region. 913 auto *PrivateVD = 914 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 915 bool EmitDeclareReductionInit = 916 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 917 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 918 EmitDeclareReductionInit, 919 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 920 : PrivateVD->getInit(), 921 DRD, SharedLVal.getAddress()); 922 } 923 924 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 925 ArrayRef<const Expr *> Privates, 926 ArrayRef<const Expr *> ReductionOps) { 927 ClausesData.reserve(Shareds.size()); 928 SharedAddresses.reserve(Shareds.size()); 929 Sizes.reserve(Shareds.size()); 930 BaseDecls.reserve(Shareds.size()); 931 auto IPriv = Privates.begin(); 932 auto IRed = ReductionOps.begin(); 933 for (const auto *Ref : Shareds) { 934 ClausesData.emplace_back(Ref, *IPriv, *IRed); 935 std::advance(IPriv, 1); 936 std::advance(IRed, 1); 937 } 938 } 939 940 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 941 assert(SharedAddresses.size() == N && 942 "Number of generated lvalues must be exactly N."); 943 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 944 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 945 SharedAddresses.emplace_back(First, Second); 946 } 947 948 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 949 auto *PrivateVD = 950 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 951 QualType PrivateType = PrivateVD->getType(); 952 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 953 if (!PrivateType->isVariablyModifiedType()) { 954 Sizes.emplace_back( 955 CGF.getTypeSize( 956 SharedAddresses[N].first.getType().getNonReferenceType()), 957 nullptr); 958 return; 959 } 960 llvm::Value *Size; 961 llvm::Value *SizeInChars; 962 llvm::Type *ElemType = 963 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 964 ->getElementType(); 965 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 966 if (AsArraySection) { 967 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 968 SharedAddresses[N].first.getPointer()); 969 Size = CGF.Builder.CreateNUWAdd( 970 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 971 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 972 } else { 973 SizeInChars = CGF.getTypeSize( 974 SharedAddresses[N].first.getType().getNonReferenceType()); 975 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 976 } 977 Sizes.emplace_back(SizeInChars, Size); 978 CodeGenFunction::OpaqueValueMapping OpaqueMap( 979 CGF, 980 cast<OpaqueValueExpr>( 981 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 982 RValue::get(Size)); 983 CGF.EmitVariablyModifiedType(PrivateType); 984 } 985 986 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 987 llvm::Value *Size) { 988 auto *PrivateVD = 989 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 990 QualType PrivateType = PrivateVD->getType(); 991 if (!PrivateType->isVariablyModifiedType()) { 992 assert(!Size && !Sizes[N].second && 993 "Size should be nullptr for non-variably modified reduction " 994 "items."); 995 return; 996 } 997 CodeGenFunction::OpaqueValueMapping OpaqueMap( 998 CGF, 999 cast<OpaqueValueExpr>( 1000 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1001 RValue::get(Size)); 1002 CGF.EmitVariablyModifiedType(PrivateType); 1003 } 1004 1005 void ReductionCodeGen::emitInitialization( 1006 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1007 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1008 assert(SharedAddresses.size() > N && "No variable was generated"); 1009 auto *PrivateVD = 1010 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1011 auto *DRD = getReductionInit(ClausesData[N].ReductionOp); 1012 QualType PrivateType = PrivateVD->getType(); 1013 PrivateAddr = CGF.Builder.CreateElementBitCast( 1014 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1015 QualType SharedType = SharedAddresses[N].first.getType(); 1016 SharedLVal = CGF.MakeAddrLValue( 1017 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1018 CGF.ConvertTypeForMem(SharedType)), 1019 SharedType, SharedAddresses[N].first.getBaseInfo(), 1020 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1021 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1022 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1023 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1024 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1025 PrivateAddr, SharedLVal.getAddress(), 1026 SharedLVal.getType()); 1027 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1028 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1029 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1030 PrivateVD->getType().getQualifiers(), 1031 /*IsInitializer=*/false); 1032 } 1033 } 1034 1035 bool ReductionCodeGen::needCleanups(unsigned N) { 1036 auto *PrivateVD = 1037 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1038 QualType PrivateType = PrivateVD->getType(); 1039 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1040 return DTorKind != QualType::DK_none; 1041 } 1042 1043 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1044 Address PrivateAddr) { 1045 auto *PrivateVD = 1046 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1047 QualType PrivateType = PrivateVD->getType(); 1048 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1049 if (needCleanups(N)) { 1050 PrivateAddr = CGF.Builder.CreateElementBitCast( 1051 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1052 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1053 } 1054 } 1055 1056 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1057 LValue BaseLV) { 1058 BaseTy = BaseTy.getNonReferenceType(); 1059 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1060 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1061 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 1062 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1063 else { 1064 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1065 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1066 } 1067 BaseTy = BaseTy->getPointeeType(); 1068 } 1069 return CGF.MakeAddrLValue( 1070 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1071 CGF.ConvertTypeForMem(ElTy)), 1072 BaseLV.getType(), BaseLV.getBaseInfo(), 1073 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1074 } 1075 1076 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1077 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1078 llvm::Value *Addr) { 1079 Address Tmp = Address::invalid(); 1080 Address TopTmp = Address::invalid(); 1081 Address MostTopTmp = Address::invalid(); 1082 BaseTy = BaseTy.getNonReferenceType(); 1083 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1084 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1085 Tmp = CGF.CreateMemTemp(BaseTy); 1086 if (TopTmp.isValid()) 1087 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1088 else 1089 MostTopTmp = Tmp; 1090 TopTmp = Tmp; 1091 BaseTy = BaseTy->getPointeeType(); 1092 } 1093 llvm::Type *Ty = BaseLVType; 1094 if (Tmp.isValid()) 1095 Ty = Tmp.getElementType(); 1096 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1097 if (Tmp.isValid()) { 1098 CGF.Builder.CreateStore(Addr, Tmp); 1099 return MostTopTmp; 1100 } 1101 return Address(Addr, BaseLVAlignment); 1102 } 1103 1104 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1105 const VarDecl *OrigVD = nullptr; 1106 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1107 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 1108 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1109 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1110 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1111 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1112 DE = cast<DeclRefExpr>(Base); 1113 OrigVD = cast<VarDecl>(DE->getDecl()); 1114 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1115 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 1116 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1117 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1118 DE = cast<DeclRefExpr>(Base); 1119 OrigVD = cast<VarDecl>(DE->getDecl()); 1120 } 1121 return OrigVD; 1122 } 1123 1124 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1125 Address PrivateAddr) { 1126 const DeclRefExpr *DE; 1127 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1128 BaseDecls.emplace_back(OrigVD); 1129 auto OriginalBaseLValue = CGF.EmitLValue(DE); 1130 LValue BaseLValue = 1131 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1132 OriginalBaseLValue); 1133 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1134 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1135 llvm::Value *PrivatePointer = 1136 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1137 PrivateAddr.getPointer(), 1138 SharedAddresses[N].first.getAddress().getType()); 1139 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1140 return castToBase(CGF, OrigVD->getType(), 1141 SharedAddresses[N].first.getType(), 1142 OriginalBaseLValue.getAddress().getType(), 1143 OriginalBaseLValue.getAlignment(), Ptr); 1144 } 1145 BaseDecls.emplace_back( 1146 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1147 return PrivateAddr; 1148 } 1149 1150 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1151 auto *DRD = getReductionInit(ClausesData[N].ReductionOp); 1152 return DRD && DRD->getInitializer(); 1153 } 1154 1155 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1156 return CGF.EmitLoadOfPointerLValue( 1157 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1158 getThreadIDVariable()->getType()->castAs<PointerType>()); 1159 } 1160 1161 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1162 if (!CGF.HaveInsertPoint()) 1163 return; 1164 // 1.2.2 OpenMP Language Terminology 1165 // Structured block - An executable statement with a single entry at the 1166 // top and a single exit at the bottom. 1167 // The point of exit cannot be a branch out of the structured block. 1168 // longjmp() and throw() must not violate the entry/exit criteria. 1169 CGF.EHStack.pushTerminate(); 1170 CodeGen(CGF); 1171 CGF.EHStack.popTerminate(); 1172 } 1173 1174 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1175 CodeGenFunction &CGF) { 1176 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1177 getThreadIDVariable()->getType(), 1178 AlignmentSource::Decl); 1179 } 1180 1181 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 1182 : CGM(CGM), OffloadEntriesInfoManager(CGM) { 1183 IdentTy = llvm::StructType::create( 1184 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 1185 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 1186 CGM.Int8PtrTy /* psource */); 1187 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1188 1189 loadOffloadInfoMetadata(); 1190 } 1191 1192 void CGOpenMPRuntime::clear() { 1193 InternalVars.clear(); 1194 } 1195 1196 static llvm::Function * 1197 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1198 const Expr *CombinerInitializer, const VarDecl *In, 1199 const VarDecl *Out, bool IsCombiner) { 1200 // void .omp_combiner.(Ty *in, Ty *out); 1201 auto &C = CGM.getContext(); 1202 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1203 FunctionArgList Args; 1204 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1205 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1206 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1207 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1208 Args.push_back(&OmpOutParm); 1209 Args.push_back(&OmpInParm); 1210 auto &FnInfo = 1211 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1212 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1213 auto *Fn = llvm::Function::Create( 1214 FnTy, llvm::GlobalValue::InternalLinkage, 1215 IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); 1216 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1217 Fn->removeFnAttr(llvm::Attribute::NoInline); 1218 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1219 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1220 CodeGenFunction CGF(CGM); 1221 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1222 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1223 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1224 Out->getLocation()); 1225 CodeGenFunction::OMPPrivateScope Scope(CGF); 1226 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1227 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { 1228 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1229 .getAddress(); 1230 }); 1231 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1232 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { 1233 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1234 .getAddress(); 1235 }); 1236 (void)Scope.Privatize(); 1237 if (!IsCombiner && Out->hasInit() && 1238 !CGF.isTrivialInitializer(Out->getInit())) { 1239 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1240 Out->getType().getQualifiers(), 1241 /*IsInitializer=*/true); 1242 } 1243 if (CombinerInitializer) 1244 CGF.EmitIgnoredExpr(CombinerInitializer); 1245 Scope.ForceCleanup(); 1246 CGF.FinishFunction(); 1247 return Fn; 1248 } 1249 1250 void CGOpenMPRuntime::emitUserDefinedReduction( 1251 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1252 if (UDRMap.count(D) > 0) 1253 return; 1254 auto &C = CGM.getContext(); 1255 if (!In || !Out) { 1256 In = &C.Idents.get("omp_in"); 1257 Out = &C.Idents.get("omp_out"); 1258 } 1259 llvm::Function *Combiner = emitCombinerOrInitializer( 1260 CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), 1261 cast<VarDecl>(D->lookup(Out).front()), 1262 /*IsCombiner=*/true); 1263 llvm::Function *Initializer = nullptr; 1264 if (auto *Init = D->getInitializer()) { 1265 if (!Priv || !Orig) { 1266 Priv = &C.Idents.get("omp_priv"); 1267 Orig = &C.Idents.get("omp_orig"); 1268 } 1269 Initializer = emitCombinerOrInitializer( 1270 CGM, D->getType(), 1271 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1272 : nullptr, 1273 cast<VarDecl>(D->lookup(Orig).front()), 1274 cast<VarDecl>(D->lookup(Priv).front()), 1275 /*IsCombiner=*/false); 1276 } 1277 UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); 1278 if (CGF) { 1279 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1280 Decls.second.push_back(D); 1281 } 1282 } 1283 1284 std::pair<llvm::Function *, llvm::Function *> 1285 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1286 auto I = UDRMap.find(D); 1287 if (I != UDRMap.end()) 1288 return I->second; 1289 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1290 return UDRMap.lookup(D); 1291 } 1292 1293 // Layout information for ident_t. 1294 static CharUnits getIdentAlign(CodeGenModule &CGM) { 1295 return CGM.getPointerAlign(); 1296 } 1297 static CharUnits getIdentSize(CodeGenModule &CGM) { 1298 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 1299 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 1300 } 1301 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { 1302 // All the fields except the last are i32, so this works beautifully. 1303 return unsigned(Field) * CharUnits::fromQuantity(4); 1304 } 1305 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 1306 IdentFieldIndex Field, 1307 const llvm::Twine &Name = "") { 1308 auto Offset = getOffsetOfIdentField(Field); 1309 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 1310 } 1311 1312 static llvm::Value *emitParallelOrTeamsOutlinedFunction( 1313 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1314 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1315 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1316 assert(ThreadIDVar->getType()->isPointerType() && 1317 "thread id variable must be of type kmp_int32 *"); 1318 CodeGenFunction CGF(CGM, true); 1319 bool HasCancel = false; 1320 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1321 HasCancel = OPD->hasCancel(); 1322 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1323 HasCancel = OPSD->hasCancel(); 1324 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1325 HasCancel = OPFD->hasCancel(); 1326 else if (auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1327 HasCancel = OPFD->hasCancel(); 1328 else if (auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1329 HasCancel = OPFD->hasCancel(); 1330 else if (auto *OPFD = dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1331 HasCancel = OPFD->hasCancel(); 1332 else if (auto *OPFD = 1333 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1334 HasCancel = OPFD->hasCancel(); 1335 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1336 HasCancel, OutlinedHelperName); 1337 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1338 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1339 } 1340 1341 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 1342 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1343 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1344 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1345 return emitParallelOrTeamsOutlinedFunction( 1346 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1347 } 1348 1349 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1350 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1351 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1352 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1353 return emitParallelOrTeamsOutlinedFunction( 1354 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1355 } 1356 1357 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 1358 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1359 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1360 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1361 bool Tied, unsigned &NumberOfParts) { 1362 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1363 PrePostActionTy &) { 1364 auto *ThreadID = getThreadID(CGF, D.getLocStart()); 1365 auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); 1366 llvm::Value *TaskArgs[] = { 1367 UpLoc, ThreadID, 1368 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1369 TaskTVar->getType()->castAs<PointerType>()) 1370 .getPointer()}; 1371 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1372 }; 1373 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1374 UntiedCodeGen); 1375 CodeGen.setAction(Action); 1376 assert(!ThreadIDVar->getType()->isPointerType() && 1377 "thread id variable must be of type kmp_int32 for tasks"); 1378 const OpenMPDirectiveKind Region = 1379 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1380 : OMPD_task; 1381 auto *CS = D.getCapturedStmt(Region); 1382 auto *TD = dyn_cast<OMPTaskDirective>(&D); 1383 CodeGenFunction CGF(CGM, true); 1384 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1385 InnermostKind, 1386 TD ? TD->hasCancel() : false, Action); 1387 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1388 auto *Res = CGF.GenerateCapturedStmtFunction(*CS); 1389 if (!Tied) 1390 NumberOfParts = Action.getNumberOfParts(); 1391 return Res; 1392 } 1393 1394 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1395 CharUnits Align = getIdentAlign(CGM); 1396 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 1397 if (!Entry) { 1398 if (!DefaultOpenMPPSource) { 1399 // Initialize default location for psource field of ident_t structure of 1400 // all ident_t objects. Format is ";file;function;line;column;;". 1401 // Taken from 1402 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 1403 DefaultOpenMPPSource = 1404 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1405 DefaultOpenMPPSource = 1406 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1407 } 1408 1409 ConstantInitBuilder builder(CGM); 1410 auto fields = builder.beginStruct(IdentTy); 1411 fields.addInt(CGM.Int32Ty, 0); 1412 fields.addInt(CGM.Int32Ty, Flags); 1413 fields.addInt(CGM.Int32Ty, 0); 1414 fields.addInt(CGM.Int32Ty, 0); 1415 fields.add(DefaultOpenMPPSource); 1416 auto DefaultOpenMPLocation = 1417 fields.finishAndCreateGlobal("", Align, /*isConstant*/ true, 1418 llvm::GlobalValue::PrivateLinkage); 1419 DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 1420 1421 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 1422 } 1423 return Address(Entry, Align); 1424 } 1425 1426 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1427 SourceLocation Loc, 1428 unsigned Flags) { 1429 Flags |= OMP_IDENT_KMPC; 1430 // If no debug info is generated - return global default location. 1431 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1432 Loc.isInvalid()) 1433 return getOrCreateDefaultLocation(Flags).getPointer(); 1434 1435 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1436 1437 Address LocValue = Address::invalid(); 1438 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1439 if (I != OpenMPLocThreadIDMap.end()) 1440 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 1441 1442 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1443 // GetOpenMPThreadID was called before this routine. 1444 if (!LocValue.isValid()) { 1445 // Generate "ident_t .kmpc_loc.addr;" 1446 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 1447 ".kmpc_loc.addr"); 1448 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1449 Elem.second.DebugLoc = AI.getPointer(); 1450 LocValue = AI; 1451 1452 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1453 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1454 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1455 CGM.getSize(getIdentSize(CGF.CGM))); 1456 } 1457 1458 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1459 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 1460 1461 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1462 if (OMPDebugLoc == nullptr) { 1463 SmallString<128> Buffer2; 1464 llvm::raw_svector_ostream OS2(Buffer2); 1465 // Build debug location 1466 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1467 OS2 << ";" << PLoc.getFilename() << ";"; 1468 if (const FunctionDecl *FD = 1469 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 1470 OS2 << FD->getQualifiedNameAsString(); 1471 } 1472 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1473 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1474 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1475 } 1476 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1477 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 1478 1479 // Our callers always pass this to a runtime function, so for 1480 // convenience, go ahead and return a naked pointer. 1481 return LocValue.getPointer(); 1482 } 1483 1484 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1485 SourceLocation Loc) { 1486 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1487 1488 llvm::Value *ThreadID = nullptr; 1489 // Check whether we've already cached a load of the thread id in this 1490 // function. 1491 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1492 if (I != OpenMPLocThreadIDMap.end()) { 1493 ThreadID = I->second.ThreadID; 1494 if (ThreadID != nullptr) 1495 return ThreadID; 1496 } 1497 // If exceptions are enabled, do not use parameter to avoid possible crash. 1498 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1499 !CGF.getLangOpts().CXXExceptions || 1500 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1501 if (auto *OMPRegionInfo = 1502 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1503 if (OMPRegionInfo->getThreadIDVariable()) { 1504 // Check if this an outlined function with thread id passed as argument. 1505 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1506 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1507 // If value loaded in entry block, cache it and use it everywhere in 1508 // function. 1509 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1510 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1511 Elem.second.ThreadID = ThreadID; 1512 } 1513 return ThreadID; 1514 } 1515 } 1516 } 1517 1518 // This is not an outlined function region - need to call __kmpc_int32 1519 // kmpc_global_thread_num(ident_t *loc). 1520 // Generate thread id value and cache this value for use across the 1521 // function. 1522 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1523 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1524 auto *Call = CGF.Builder.CreateCall( 1525 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1526 emitUpdateLocation(CGF, Loc)); 1527 Call->setCallingConv(CGF.getRuntimeCC()); 1528 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1529 Elem.second.ThreadID = Call; 1530 return Call; 1531 } 1532 1533 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1534 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1535 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 1536 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1537 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1538 for(auto *D : FunctionUDRMap[CGF.CurFn]) { 1539 UDRMap.erase(D); 1540 } 1541 FunctionUDRMap.erase(CGF.CurFn); 1542 } 1543 } 1544 1545 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1546 if (!IdentTy) { 1547 } 1548 return llvm::PointerType::getUnqual(IdentTy); 1549 } 1550 1551 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1552 if (!Kmpc_MicroTy) { 1553 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1554 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1555 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1556 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1557 } 1558 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1559 } 1560 1561 llvm::Constant * 1562 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1563 llvm::Constant *RTLFn = nullptr; 1564 switch (static_cast<OpenMPRTLFunction>(Function)) { 1565 case OMPRTL__kmpc_fork_call: { 1566 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1567 // microtask, ...); 1568 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1569 getKmpc_MicroPointerTy()}; 1570 llvm::FunctionType *FnTy = 1571 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1572 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1573 break; 1574 } 1575 case OMPRTL__kmpc_global_thread_num: { 1576 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1577 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1578 llvm::FunctionType *FnTy = 1579 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1580 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1581 break; 1582 } 1583 case OMPRTL__kmpc_threadprivate_cached: { 1584 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1585 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1586 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1587 CGM.VoidPtrTy, CGM.SizeTy, 1588 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1589 llvm::FunctionType *FnTy = 1590 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1591 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1592 break; 1593 } 1594 case OMPRTL__kmpc_critical: { 1595 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1596 // kmp_critical_name *crit); 1597 llvm::Type *TypeParams[] = { 1598 getIdentTyPointerTy(), CGM.Int32Ty, 1599 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1600 llvm::FunctionType *FnTy = 1601 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1602 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1603 break; 1604 } 1605 case OMPRTL__kmpc_critical_with_hint: { 1606 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1607 // kmp_critical_name *crit, uintptr_t hint); 1608 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1609 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1610 CGM.IntPtrTy}; 1611 llvm::FunctionType *FnTy = 1612 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1613 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1614 break; 1615 } 1616 case OMPRTL__kmpc_threadprivate_register: { 1617 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1618 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1619 // typedef void *(*kmpc_ctor)(void *); 1620 auto KmpcCtorTy = 1621 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1622 /*isVarArg*/ false)->getPointerTo(); 1623 // typedef void *(*kmpc_cctor)(void *, void *); 1624 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1625 auto KmpcCopyCtorTy = 1626 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1627 /*isVarArg*/ false)->getPointerTo(); 1628 // typedef void (*kmpc_dtor)(void *); 1629 auto KmpcDtorTy = 1630 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1631 ->getPointerTo(); 1632 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1633 KmpcCopyCtorTy, KmpcDtorTy}; 1634 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1635 /*isVarArg*/ false); 1636 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1637 break; 1638 } 1639 case OMPRTL__kmpc_end_critical: { 1640 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1641 // kmp_critical_name *crit); 1642 llvm::Type *TypeParams[] = { 1643 getIdentTyPointerTy(), CGM.Int32Ty, 1644 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1645 llvm::FunctionType *FnTy = 1646 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1647 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1648 break; 1649 } 1650 case OMPRTL__kmpc_cancel_barrier: { 1651 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1652 // global_tid); 1653 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1654 llvm::FunctionType *FnTy = 1655 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1656 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1657 break; 1658 } 1659 case OMPRTL__kmpc_barrier: { 1660 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1661 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1662 llvm::FunctionType *FnTy = 1663 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1664 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1665 break; 1666 } 1667 case OMPRTL__kmpc_for_static_fini: { 1668 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1669 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1670 llvm::FunctionType *FnTy = 1671 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1672 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1673 break; 1674 } 1675 case OMPRTL__kmpc_push_num_threads: { 1676 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1677 // kmp_int32 num_threads) 1678 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1679 CGM.Int32Ty}; 1680 llvm::FunctionType *FnTy = 1681 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1682 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1683 break; 1684 } 1685 case OMPRTL__kmpc_serialized_parallel: { 1686 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1687 // global_tid); 1688 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1689 llvm::FunctionType *FnTy = 1690 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1691 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1692 break; 1693 } 1694 case OMPRTL__kmpc_end_serialized_parallel: { 1695 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1696 // global_tid); 1697 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1698 llvm::FunctionType *FnTy = 1699 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1700 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1701 break; 1702 } 1703 case OMPRTL__kmpc_flush: { 1704 // Build void __kmpc_flush(ident_t *loc); 1705 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1706 llvm::FunctionType *FnTy = 1707 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1708 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1709 break; 1710 } 1711 case OMPRTL__kmpc_master: { 1712 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1713 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1714 llvm::FunctionType *FnTy = 1715 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1716 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1717 break; 1718 } 1719 case OMPRTL__kmpc_end_master: { 1720 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1721 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1722 llvm::FunctionType *FnTy = 1723 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1724 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1725 break; 1726 } 1727 case OMPRTL__kmpc_omp_taskyield: { 1728 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1729 // int end_part); 1730 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1731 llvm::FunctionType *FnTy = 1732 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1733 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1734 break; 1735 } 1736 case OMPRTL__kmpc_single: { 1737 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1738 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1739 llvm::FunctionType *FnTy = 1740 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1741 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1742 break; 1743 } 1744 case OMPRTL__kmpc_end_single: { 1745 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1746 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1747 llvm::FunctionType *FnTy = 1748 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1749 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1750 break; 1751 } 1752 case OMPRTL__kmpc_omp_task_alloc: { 1753 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1754 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1755 // kmp_routine_entry_t *task_entry); 1756 assert(KmpRoutineEntryPtrTy != nullptr && 1757 "Type kmp_routine_entry_t must be created."); 1758 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1759 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1760 // Return void * and then cast to particular kmp_task_t type. 1761 llvm::FunctionType *FnTy = 1762 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1763 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1764 break; 1765 } 1766 case OMPRTL__kmpc_omp_task: { 1767 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1768 // *new_task); 1769 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1770 CGM.VoidPtrTy}; 1771 llvm::FunctionType *FnTy = 1772 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1773 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1774 break; 1775 } 1776 case OMPRTL__kmpc_copyprivate: { 1777 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1778 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1779 // kmp_int32 didit); 1780 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1781 auto *CpyFnTy = 1782 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1783 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1784 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1785 CGM.Int32Ty}; 1786 llvm::FunctionType *FnTy = 1787 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1788 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1789 break; 1790 } 1791 case OMPRTL__kmpc_reduce: { 1792 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1793 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1794 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1795 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1796 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1797 /*isVarArg=*/false); 1798 llvm::Type *TypeParams[] = { 1799 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1800 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1801 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1802 llvm::FunctionType *FnTy = 1803 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1804 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1805 break; 1806 } 1807 case OMPRTL__kmpc_reduce_nowait: { 1808 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1809 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1810 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1811 // *lck); 1812 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1813 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1814 /*isVarArg=*/false); 1815 llvm::Type *TypeParams[] = { 1816 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1817 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1818 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1819 llvm::FunctionType *FnTy = 1820 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1821 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1822 break; 1823 } 1824 case OMPRTL__kmpc_end_reduce: { 1825 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1826 // kmp_critical_name *lck); 1827 llvm::Type *TypeParams[] = { 1828 getIdentTyPointerTy(), CGM.Int32Ty, 1829 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1830 llvm::FunctionType *FnTy = 1831 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1832 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1833 break; 1834 } 1835 case OMPRTL__kmpc_end_reduce_nowait: { 1836 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1837 // kmp_critical_name *lck); 1838 llvm::Type *TypeParams[] = { 1839 getIdentTyPointerTy(), CGM.Int32Ty, 1840 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1841 llvm::FunctionType *FnTy = 1842 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1843 RTLFn = 1844 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1845 break; 1846 } 1847 case OMPRTL__kmpc_omp_task_begin_if0: { 1848 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1849 // *new_task); 1850 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1851 CGM.VoidPtrTy}; 1852 llvm::FunctionType *FnTy = 1853 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1854 RTLFn = 1855 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1856 break; 1857 } 1858 case OMPRTL__kmpc_omp_task_complete_if0: { 1859 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1860 // *new_task); 1861 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1862 CGM.VoidPtrTy}; 1863 llvm::FunctionType *FnTy = 1864 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1865 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1866 /*Name=*/"__kmpc_omp_task_complete_if0"); 1867 break; 1868 } 1869 case OMPRTL__kmpc_ordered: { 1870 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1871 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1872 llvm::FunctionType *FnTy = 1873 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1874 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1875 break; 1876 } 1877 case OMPRTL__kmpc_end_ordered: { 1878 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 1879 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1880 llvm::FunctionType *FnTy = 1881 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1882 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 1883 break; 1884 } 1885 case OMPRTL__kmpc_omp_taskwait: { 1886 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 1887 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1888 llvm::FunctionType *FnTy = 1889 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1890 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 1891 break; 1892 } 1893 case OMPRTL__kmpc_taskgroup: { 1894 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 1895 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1896 llvm::FunctionType *FnTy = 1897 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1898 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 1899 break; 1900 } 1901 case OMPRTL__kmpc_end_taskgroup: { 1902 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 1903 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1904 llvm::FunctionType *FnTy = 1905 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1906 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 1907 break; 1908 } 1909 case OMPRTL__kmpc_push_proc_bind: { 1910 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 1911 // int proc_bind) 1912 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1913 llvm::FunctionType *FnTy = 1914 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1915 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 1916 break; 1917 } 1918 case OMPRTL__kmpc_omp_task_with_deps: { 1919 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 1920 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 1921 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 1922 llvm::Type *TypeParams[] = { 1923 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 1924 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 1925 llvm::FunctionType *FnTy = 1926 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1927 RTLFn = 1928 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 1929 break; 1930 } 1931 case OMPRTL__kmpc_omp_wait_deps: { 1932 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 1933 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 1934 // kmp_depend_info_t *noalias_dep_list); 1935 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1936 CGM.Int32Ty, CGM.VoidPtrTy, 1937 CGM.Int32Ty, CGM.VoidPtrTy}; 1938 llvm::FunctionType *FnTy = 1939 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1940 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 1941 break; 1942 } 1943 case OMPRTL__kmpc_cancellationpoint: { 1944 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 1945 // global_tid, kmp_int32 cncl_kind) 1946 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1947 llvm::FunctionType *FnTy = 1948 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1949 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 1950 break; 1951 } 1952 case OMPRTL__kmpc_cancel: { 1953 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 1954 // kmp_int32 cncl_kind) 1955 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1956 llvm::FunctionType *FnTy = 1957 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1958 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 1959 break; 1960 } 1961 case OMPRTL__kmpc_push_num_teams: { 1962 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 1963 // kmp_int32 num_teams, kmp_int32 num_threads) 1964 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1965 CGM.Int32Ty}; 1966 llvm::FunctionType *FnTy = 1967 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1968 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 1969 break; 1970 } 1971 case OMPRTL__kmpc_fork_teams: { 1972 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 1973 // microtask, ...); 1974 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1975 getKmpc_MicroPointerTy()}; 1976 llvm::FunctionType *FnTy = 1977 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1978 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 1979 break; 1980 } 1981 case OMPRTL__kmpc_taskloop: { 1982 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 1983 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 1984 // sched, kmp_uint64 grainsize, void *task_dup); 1985 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1986 CGM.IntTy, 1987 CGM.VoidPtrTy, 1988 CGM.IntTy, 1989 CGM.Int64Ty->getPointerTo(), 1990 CGM.Int64Ty->getPointerTo(), 1991 CGM.Int64Ty, 1992 CGM.IntTy, 1993 CGM.IntTy, 1994 CGM.Int64Ty, 1995 CGM.VoidPtrTy}; 1996 llvm::FunctionType *FnTy = 1997 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1998 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 1999 break; 2000 } 2001 case OMPRTL__kmpc_doacross_init: { 2002 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2003 // num_dims, struct kmp_dim *dims); 2004 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2005 CGM.Int32Ty, 2006 CGM.Int32Ty, 2007 CGM.VoidPtrTy}; 2008 llvm::FunctionType *FnTy = 2009 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2010 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2011 break; 2012 } 2013 case OMPRTL__kmpc_doacross_fini: { 2014 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2015 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2016 llvm::FunctionType *FnTy = 2017 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2018 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2019 break; 2020 } 2021 case OMPRTL__kmpc_doacross_post: { 2022 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2023 // *vec); 2024 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2025 CGM.Int64Ty->getPointerTo()}; 2026 llvm::FunctionType *FnTy = 2027 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2028 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2029 break; 2030 } 2031 case OMPRTL__kmpc_doacross_wait: { 2032 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2033 // *vec); 2034 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2035 CGM.Int64Ty->getPointerTo()}; 2036 llvm::FunctionType *FnTy = 2037 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2038 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2039 break; 2040 } 2041 case OMPRTL__kmpc_task_reduction_init: { 2042 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2043 // *data); 2044 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2045 llvm::FunctionType *FnTy = 2046 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2047 RTLFn = 2048 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2049 break; 2050 } 2051 case OMPRTL__kmpc_task_reduction_get_th_data: { 2052 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2053 // *d); 2054 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2055 llvm::FunctionType *FnTy = 2056 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2057 RTLFn = CGM.CreateRuntimeFunction( 2058 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2059 break; 2060 } 2061 case OMPRTL__tgt_target: { 2062 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2063 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2064 // *arg_types); 2065 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2066 CGM.VoidPtrTy, 2067 CGM.Int32Ty, 2068 CGM.VoidPtrPtrTy, 2069 CGM.VoidPtrPtrTy, 2070 CGM.SizeTy->getPointerTo(), 2071 CGM.Int64Ty->getPointerTo()}; 2072 llvm::FunctionType *FnTy = 2073 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2074 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2075 break; 2076 } 2077 case OMPRTL__tgt_target_nowait: { 2078 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2079 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2080 // int64_t *arg_types); 2081 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2082 CGM.VoidPtrTy, 2083 CGM.Int32Ty, 2084 CGM.VoidPtrPtrTy, 2085 CGM.VoidPtrPtrTy, 2086 CGM.SizeTy->getPointerTo(), 2087 CGM.Int64Ty->getPointerTo()}; 2088 llvm::FunctionType *FnTy = 2089 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2090 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2091 break; 2092 } 2093 case OMPRTL__tgt_target_teams: { 2094 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2095 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2096 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2097 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2098 CGM.VoidPtrTy, 2099 CGM.Int32Ty, 2100 CGM.VoidPtrPtrTy, 2101 CGM.VoidPtrPtrTy, 2102 CGM.SizeTy->getPointerTo(), 2103 CGM.Int64Ty->getPointerTo(), 2104 CGM.Int32Ty, 2105 CGM.Int32Ty}; 2106 llvm::FunctionType *FnTy = 2107 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2108 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2109 break; 2110 } 2111 case OMPRTL__tgt_target_teams_nowait: { 2112 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2113 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 2114 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2115 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2116 CGM.VoidPtrTy, 2117 CGM.Int32Ty, 2118 CGM.VoidPtrPtrTy, 2119 CGM.VoidPtrPtrTy, 2120 CGM.SizeTy->getPointerTo(), 2121 CGM.Int64Ty->getPointerTo(), 2122 CGM.Int32Ty, 2123 CGM.Int32Ty}; 2124 llvm::FunctionType *FnTy = 2125 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2126 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2127 break; 2128 } 2129 case OMPRTL__tgt_register_lib: { 2130 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2131 QualType ParamTy = 2132 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2133 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2134 llvm::FunctionType *FnTy = 2135 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2136 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2137 break; 2138 } 2139 case OMPRTL__tgt_unregister_lib: { 2140 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2141 QualType ParamTy = 2142 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2143 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2144 llvm::FunctionType *FnTy = 2145 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2146 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2147 break; 2148 } 2149 case OMPRTL__tgt_target_data_begin: { 2150 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2151 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2152 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2153 CGM.Int32Ty, 2154 CGM.VoidPtrPtrTy, 2155 CGM.VoidPtrPtrTy, 2156 CGM.SizeTy->getPointerTo(), 2157 CGM.Int64Ty->getPointerTo()}; 2158 llvm::FunctionType *FnTy = 2159 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2160 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2161 break; 2162 } 2163 case OMPRTL__tgt_target_data_begin_nowait: { 2164 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2165 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2166 // *arg_types); 2167 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2168 CGM.Int32Ty, 2169 CGM.VoidPtrPtrTy, 2170 CGM.VoidPtrPtrTy, 2171 CGM.SizeTy->getPointerTo(), 2172 CGM.Int64Ty->getPointerTo()}; 2173 auto *FnTy = 2174 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2175 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2176 break; 2177 } 2178 case OMPRTL__tgt_target_data_end: { 2179 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2180 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2181 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2182 CGM.Int32Ty, 2183 CGM.VoidPtrPtrTy, 2184 CGM.VoidPtrPtrTy, 2185 CGM.SizeTy->getPointerTo(), 2186 CGM.Int64Ty->getPointerTo()}; 2187 llvm::FunctionType *FnTy = 2188 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2189 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2190 break; 2191 } 2192 case OMPRTL__tgt_target_data_end_nowait: { 2193 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2194 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2195 // *arg_types); 2196 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2197 CGM.Int32Ty, 2198 CGM.VoidPtrPtrTy, 2199 CGM.VoidPtrPtrTy, 2200 CGM.SizeTy->getPointerTo(), 2201 CGM.Int64Ty->getPointerTo()}; 2202 auto *FnTy = 2203 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2204 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2205 break; 2206 } 2207 case OMPRTL__tgt_target_data_update: { 2208 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2209 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2210 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2211 CGM.Int32Ty, 2212 CGM.VoidPtrPtrTy, 2213 CGM.VoidPtrPtrTy, 2214 CGM.SizeTy->getPointerTo(), 2215 CGM.Int64Ty->getPointerTo()}; 2216 llvm::FunctionType *FnTy = 2217 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2218 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2219 break; 2220 } 2221 case OMPRTL__tgt_target_data_update_nowait: { 2222 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2223 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2224 // *arg_types); 2225 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2226 CGM.Int32Ty, 2227 CGM.VoidPtrPtrTy, 2228 CGM.VoidPtrPtrTy, 2229 CGM.SizeTy->getPointerTo(), 2230 CGM.Int64Ty->getPointerTo()}; 2231 auto *FnTy = 2232 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2233 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2234 break; 2235 } 2236 } 2237 assert(RTLFn && "Unable to find OpenMP runtime function"); 2238 return RTLFn; 2239 } 2240 2241 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 2242 bool IVSigned) { 2243 assert((IVSize == 32 || IVSize == 64) && 2244 "IV size is not compatible with the omp runtime"); 2245 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2246 : "__kmpc_for_static_init_4u") 2247 : (IVSigned ? "__kmpc_for_static_init_8" 2248 : "__kmpc_for_static_init_8u"); 2249 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2250 auto PtrTy = llvm::PointerType::getUnqual(ITy); 2251 llvm::Type *TypeParams[] = { 2252 getIdentTyPointerTy(), // loc 2253 CGM.Int32Ty, // tid 2254 CGM.Int32Ty, // schedtype 2255 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2256 PtrTy, // p_lower 2257 PtrTy, // p_upper 2258 PtrTy, // p_stride 2259 ITy, // incr 2260 ITy // chunk 2261 }; 2262 llvm::FunctionType *FnTy = 2263 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2264 return CGM.CreateRuntimeFunction(FnTy, Name); 2265 } 2266 2267 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 2268 bool IVSigned) { 2269 assert((IVSize == 32 || IVSize == 64) && 2270 "IV size is not compatible with the omp runtime"); 2271 auto Name = 2272 IVSize == 32 2273 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2274 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2275 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2276 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2277 CGM.Int32Ty, // tid 2278 CGM.Int32Ty, // schedtype 2279 ITy, // lower 2280 ITy, // upper 2281 ITy, // stride 2282 ITy // chunk 2283 }; 2284 llvm::FunctionType *FnTy = 2285 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2286 return CGM.CreateRuntimeFunction(FnTy, Name); 2287 } 2288 2289 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 2290 bool IVSigned) { 2291 assert((IVSize == 32 || IVSize == 64) && 2292 "IV size is not compatible with the omp runtime"); 2293 auto Name = 2294 IVSize == 32 2295 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2296 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2297 llvm::Type *TypeParams[] = { 2298 getIdentTyPointerTy(), // loc 2299 CGM.Int32Ty, // tid 2300 }; 2301 llvm::FunctionType *FnTy = 2302 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2303 return CGM.CreateRuntimeFunction(FnTy, Name); 2304 } 2305 2306 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 2307 bool IVSigned) { 2308 assert((IVSize == 32 || IVSize == 64) && 2309 "IV size is not compatible with the omp runtime"); 2310 auto Name = 2311 IVSize == 32 2312 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2313 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2314 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2315 auto PtrTy = llvm::PointerType::getUnqual(ITy); 2316 llvm::Type *TypeParams[] = { 2317 getIdentTyPointerTy(), // loc 2318 CGM.Int32Ty, // tid 2319 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2320 PtrTy, // p_lower 2321 PtrTy, // p_upper 2322 PtrTy // p_stride 2323 }; 2324 llvm::FunctionType *FnTy = 2325 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2326 return CGM.CreateRuntimeFunction(FnTy, Name); 2327 } 2328 2329 llvm::Constant * 2330 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2331 assert(!CGM.getLangOpts().OpenMPUseTLS || 2332 !CGM.getContext().getTargetInfo().isTLSSupported()); 2333 // Lookup the entry, lazily creating it if necessary. 2334 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 2335 Twine(CGM.getMangledName(VD)) + ".cache."); 2336 } 2337 2338 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2339 const VarDecl *VD, 2340 Address VDAddr, 2341 SourceLocation Loc) { 2342 if (CGM.getLangOpts().OpenMPUseTLS && 2343 CGM.getContext().getTargetInfo().isTLSSupported()) 2344 return VDAddr; 2345 2346 auto VarTy = VDAddr.getElementType(); 2347 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2348 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2349 CGM.Int8PtrTy), 2350 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2351 getOrCreateThreadPrivateCache(VD)}; 2352 return Address(CGF.EmitRuntimeCall( 2353 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2354 VDAddr.getAlignment()); 2355 } 2356 2357 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2358 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2359 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2360 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2361 // library. 2362 auto OMPLoc = emitUpdateLocation(CGF, Loc); 2363 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2364 OMPLoc); 2365 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2366 // to register constructor/destructor for variable. 2367 llvm::Value *Args[] = {OMPLoc, 2368 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2369 CGM.VoidPtrTy), 2370 Ctor, CopyCtor, Dtor}; 2371 CGF.EmitRuntimeCall( 2372 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2373 } 2374 2375 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2376 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2377 bool PerformInit, CodeGenFunction *CGF) { 2378 if (CGM.getLangOpts().OpenMPUseTLS && 2379 CGM.getContext().getTargetInfo().isTLSSupported()) 2380 return nullptr; 2381 2382 VD = VD->getDefinition(CGM.getContext()); 2383 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 2384 ThreadPrivateWithDefinition.insert(VD); 2385 QualType ASTTy = VD->getType(); 2386 2387 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2388 auto Init = VD->getAnyInitializer(); 2389 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2390 // Generate function that re-emits the declaration's initializer into the 2391 // threadprivate copy of the variable VD 2392 CodeGenFunction CtorCGF(CGM); 2393 FunctionArgList Args; 2394 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2395 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2396 ImplicitParamDecl::Other); 2397 Args.push_back(&Dst); 2398 2399 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2400 CGM.getContext().VoidPtrTy, Args); 2401 auto FTy = CGM.getTypes().GetFunctionType(FI); 2402 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 2403 FTy, ".__kmpc_global_ctor_.", FI, Loc); 2404 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2405 Args, Loc, Loc); 2406 auto ArgVal = CtorCGF.EmitLoadOfScalar( 2407 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2408 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2409 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2410 Arg = CtorCGF.Builder.CreateElementBitCast( 2411 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2412 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2413 /*IsInitializer=*/true); 2414 ArgVal = CtorCGF.EmitLoadOfScalar( 2415 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2416 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2417 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2418 CtorCGF.FinishFunction(); 2419 Ctor = Fn; 2420 } 2421 if (VD->getType().isDestructedType() != QualType::DK_none) { 2422 // Generate function that emits destructor call for the threadprivate copy 2423 // of the variable VD 2424 CodeGenFunction DtorCGF(CGM); 2425 FunctionArgList Args; 2426 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2427 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2428 ImplicitParamDecl::Other); 2429 Args.push_back(&Dst); 2430 2431 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2432 CGM.getContext().VoidTy, Args); 2433 auto FTy = CGM.getTypes().GetFunctionType(FI); 2434 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 2435 FTy, ".__kmpc_global_dtor_.", FI, Loc); 2436 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2437 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2438 Loc, Loc); 2439 // Create a scope with an artificial location for the body of this function. 2440 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2441 auto ArgVal = DtorCGF.EmitLoadOfScalar( 2442 DtorCGF.GetAddrOfLocalVar(&Dst), 2443 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2444 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2445 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2446 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2447 DtorCGF.FinishFunction(); 2448 Dtor = Fn; 2449 } 2450 // Do not emit init function if it is not required. 2451 if (!Ctor && !Dtor) 2452 return nullptr; 2453 2454 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2455 auto CopyCtorTy = 2456 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2457 /*isVarArg=*/false)->getPointerTo(); 2458 // Copying constructor for the threadprivate variable. 2459 // Must be NULL - reserved by runtime, but currently it requires that this 2460 // parameter is always NULL. Otherwise it fires assertion. 2461 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2462 if (Ctor == nullptr) { 2463 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2464 /*isVarArg=*/false)->getPointerTo(); 2465 Ctor = llvm::Constant::getNullValue(CtorTy); 2466 } 2467 if (Dtor == nullptr) { 2468 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2469 /*isVarArg=*/false)->getPointerTo(); 2470 Dtor = llvm::Constant::getNullValue(DtorTy); 2471 } 2472 if (!CGF) { 2473 auto InitFunctionTy = 2474 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2475 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2476 InitFunctionTy, ".__omp_threadprivate_init_.", 2477 CGM.getTypes().arrangeNullaryFunction()); 2478 CodeGenFunction InitCGF(CGM); 2479 FunctionArgList ArgList; 2480 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2481 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2482 Loc, Loc); 2483 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2484 InitCGF.FinishFunction(); 2485 return InitFunction; 2486 } 2487 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2488 } 2489 return nullptr; 2490 } 2491 2492 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2493 QualType VarType, 2494 StringRef Name) { 2495 llvm::Twine VarName(Name, ".artificial."); 2496 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2497 llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName); 2498 llvm::Value *Args[] = { 2499 emitUpdateLocation(CGF, SourceLocation()), 2500 getThreadID(CGF, SourceLocation()), 2501 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2502 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2503 /*IsSigned=*/false), 2504 getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")}; 2505 return Address( 2506 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2507 CGF.EmitRuntimeCall( 2508 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2509 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2510 CGM.getPointerAlign()); 2511 } 2512 2513 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 2514 /// function. Here is the logic: 2515 /// if (Cond) { 2516 /// ThenGen(); 2517 /// } else { 2518 /// ElseGen(); 2519 /// } 2520 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2521 const RegionCodeGenTy &ThenGen, 2522 const RegionCodeGenTy &ElseGen) { 2523 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2524 2525 // If the condition constant folds and can be elided, try to avoid emitting 2526 // the condition and the dead arm of the if/else. 2527 bool CondConstant; 2528 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2529 if (CondConstant) 2530 ThenGen(CGF); 2531 else 2532 ElseGen(CGF); 2533 return; 2534 } 2535 2536 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2537 // emit the conditional branch. 2538 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 2539 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 2540 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 2541 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2542 2543 // Emit the 'then' code. 2544 CGF.EmitBlock(ThenBlock); 2545 ThenGen(CGF); 2546 CGF.EmitBranch(ContBlock); 2547 // Emit the 'else' code if present. 2548 // There is no need to emit line number for unconditional branch. 2549 (void)ApplyDebugLocation::CreateEmpty(CGF); 2550 CGF.EmitBlock(ElseBlock); 2551 ElseGen(CGF); 2552 // There is no need to emit line number for unconditional branch. 2553 (void)ApplyDebugLocation::CreateEmpty(CGF); 2554 CGF.EmitBranch(ContBlock); 2555 // Emit the continuation block for code after the if. 2556 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2557 } 2558 2559 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2560 llvm::Value *OutlinedFn, 2561 ArrayRef<llvm::Value *> CapturedVars, 2562 const Expr *IfCond) { 2563 if (!CGF.HaveInsertPoint()) 2564 return; 2565 auto *RTLoc = emitUpdateLocation(CGF, Loc); 2566 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2567 PrePostActionTy &) { 2568 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2569 auto &RT = CGF.CGM.getOpenMPRuntime(); 2570 llvm::Value *Args[] = { 2571 RTLoc, 2572 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2573 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2574 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2575 RealArgs.append(std::begin(Args), std::end(Args)); 2576 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2577 2578 auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 2579 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2580 }; 2581 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 2582 PrePostActionTy &) { 2583 auto &RT = CGF.CGM.getOpenMPRuntime(); 2584 auto ThreadID = RT.getThreadID(CGF, Loc); 2585 // Build calls: 2586 // __kmpc_serialized_parallel(&Loc, GTid); 2587 llvm::Value *Args[] = {RTLoc, ThreadID}; 2588 CGF.EmitRuntimeCall( 2589 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 2590 2591 // OutlinedFn(>id, &zero, CapturedStruct); 2592 auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2593 Address ZeroAddr = 2594 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 2595 /*Name*/ ".zero.addr"); 2596 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 2597 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2598 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2599 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2600 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2601 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2602 2603 // __kmpc_end_serialized_parallel(&Loc, GTid); 2604 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2605 CGF.EmitRuntimeCall( 2606 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2607 EndArgs); 2608 }; 2609 if (IfCond) 2610 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 2611 else { 2612 RegionCodeGenTy ThenRCG(ThenGen); 2613 ThenRCG(CGF); 2614 } 2615 } 2616 2617 // If we're inside an (outlined) parallel region, use the region info's 2618 // thread-ID variable (it is passed in a first argument of the outlined function 2619 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2620 // regular serial code region, get thread ID by calling kmp_int32 2621 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2622 // return the address of that temp. 2623 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2624 SourceLocation Loc) { 2625 if (auto *OMPRegionInfo = 2626 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2627 if (OMPRegionInfo->getThreadIDVariable()) 2628 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2629 2630 auto ThreadID = getThreadID(CGF, Loc); 2631 auto Int32Ty = 2632 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2633 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2634 CGF.EmitStoreOfScalar(ThreadID, 2635 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2636 2637 return ThreadIDTemp; 2638 } 2639 2640 llvm::Constant * 2641 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 2642 const llvm::Twine &Name) { 2643 SmallString<256> Buffer; 2644 llvm::raw_svector_ostream Out(Buffer); 2645 Out << Name; 2646 auto RuntimeName = Out.str(); 2647 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 2648 if (Elem.second) { 2649 assert(Elem.second->getType()->getPointerElementType() == Ty && 2650 "OMP internal variable has different type than requested"); 2651 return &*Elem.second; 2652 } 2653 2654 return Elem.second = new llvm::GlobalVariable( 2655 CGM.getModule(), Ty, /*IsConstant*/ false, 2656 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2657 Elem.first()); 2658 } 2659 2660 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2661 llvm::Twine Name(".gomp_critical_user_", CriticalName); 2662 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 2663 } 2664 2665 namespace { 2666 /// Common pre(post)-action for different OpenMP constructs. 2667 class CommonActionTy final : public PrePostActionTy { 2668 llvm::Value *EnterCallee; 2669 ArrayRef<llvm::Value *> EnterArgs; 2670 llvm::Value *ExitCallee; 2671 ArrayRef<llvm::Value *> ExitArgs; 2672 bool Conditional; 2673 llvm::BasicBlock *ContBlock = nullptr; 2674 2675 public: 2676 CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, 2677 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, 2678 bool Conditional = false) 2679 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2680 ExitArgs(ExitArgs), Conditional(Conditional) {} 2681 void Enter(CodeGenFunction &CGF) override { 2682 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2683 if (Conditional) { 2684 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2685 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2686 ContBlock = CGF.createBasicBlock("omp_if.end"); 2687 // Generate the branch (If-stmt) 2688 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2689 CGF.EmitBlock(ThenBlock); 2690 } 2691 } 2692 void Done(CodeGenFunction &CGF) { 2693 // Emit the rest of blocks/branches 2694 CGF.EmitBranch(ContBlock); 2695 CGF.EmitBlock(ContBlock, true); 2696 } 2697 void Exit(CodeGenFunction &CGF) override { 2698 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2699 } 2700 }; 2701 } // anonymous namespace 2702 2703 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2704 StringRef CriticalName, 2705 const RegionCodeGenTy &CriticalOpGen, 2706 SourceLocation Loc, const Expr *Hint) { 2707 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2708 // CriticalOpGen(); 2709 // __kmpc_end_critical(ident_t *, gtid, Lock); 2710 // Prepare arguments and build a call to __kmpc_critical 2711 if (!CGF.HaveInsertPoint()) 2712 return; 2713 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2714 getCriticalRegionLock(CriticalName)}; 2715 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2716 std::end(Args)); 2717 if (Hint) { 2718 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2719 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 2720 } 2721 CommonActionTy Action( 2722 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 2723 : OMPRTL__kmpc_critical), 2724 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 2725 CriticalOpGen.setAction(Action); 2726 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2727 } 2728 2729 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2730 const RegionCodeGenTy &MasterOpGen, 2731 SourceLocation Loc) { 2732 if (!CGF.HaveInsertPoint()) 2733 return; 2734 // if(__kmpc_master(ident_t *, gtid)) { 2735 // MasterOpGen(); 2736 // __kmpc_end_master(ident_t *, gtid); 2737 // } 2738 // Prepare arguments and build a call to __kmpc_master 2739 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2740 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 2741 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 2742 /*Conditional=*/true); 2743 MasterOpGen.setAction(Action); 2744 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2745 Action.Done(CGF); 2746 } 2747 2748 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2749 SourceLocation Loc) { 2750 if (!CGF.HaveInsertPoint()) 2751 return; 2752 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2753 llvm::Value *Args[] = { 2754 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2755 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2756 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 2757 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2758 Region->emitUntiedSwitch(CGF); 2759 } 2760 2761 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2762 const RegionCodeGenTy &TaskgroupOpGen, 2763 SourceLocation Loc) { 2764 if (!CGF.HaveInsertPoint()) 2765 return; 2766 // __kmpc_taskgroup(ident_t *, gtid); 2767 // TaskgroupOpGen(); 2768 // __kmpc_end_taskgroup(ident_t *, gtid); 2769 // Prepare arguments and build a call to __kmpc_taskgroup 2770 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2771 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 2772 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 2773 Args); 2774 TaskgroupOpGen.setAction(Action); 2775 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2776 } 2777 2778 /// Given an array of pointers to variables, project the address of a 2779 /// given variable. 2780 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2781 unsigned Index, const VarDecl *Var) { 2782 // Pull out the pointer to the variable. 2783 Address PtrAddr = 2784 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 2785 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2786 2787 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2788 Addr = CGF.Builder.CreateElementBitCast( 2789 Addr, CGF.ConvertTypeForMem(Var->getType())); 2790 return Addr; 2791 } 2792 2793 static llvm::Value *emitCopyprivateCopyFunction( 2794 CodeGenModule &CGM, llvm::Type *ArgsType, 2795 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2796 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2797 SourceLocation Loc) { 2798 auto &C = CGM.getContext(); 2799 // void copy_func(void *LHSArg, void *RHSArg); 2800 FunctionArgList Args; 2801 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2802 ImplicitParamDecl::Other); 2803 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2804 ImplicitParamDecl::Other); 2805 Args.push_back(&LHSArg); 2806 Args.push_back(&RHSArg); 2807 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2808 auto *Fn = llvm::Function::Create( 2809 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2810 ".omp.copyprivate.copy_func", &CGM.getModule()); 2811 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2812 CodeGenFunction CGF(CGM); 2813 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2814 // Dest = (void*[n])(LHSArg); 2815 // Src = (void*[n])(RHSArg); 2816 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2817 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2818 ArgsType), CGF.getPointerAlign()); 2819 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2820 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2821 ArgsType), CGF.getPointerAlign()); 2822 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2823 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2824 // ... 2825 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2826 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2827 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2828 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2829 2830 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2831 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2832 2833 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2834 QualType Type = VD->getType(); 2835 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2836 } 2837 CGF.FinishFunction(); 2838 return Fn; 2839 } 2840 2841 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2842 const RegionCodeGenTy &SingleOpGen, 2843 SourceLocation Loc, 2844 ArrayRef<const Expr *> CopyprivateVars, 2845 ArrayRef<const Expr *> SrcExprs, 2846 ArrayRef<const Expr *> DstExprs, 2847 ArrayRef<const Expr *> AssignmentOps) { 2848 if (!CGF.HaveInsertPoint()) 2849 return; 2850 assert(CopyprivateVars.size() == SrcExprs.size() && 2851 CopyprivateVars.size() == DstExprs.size() && 2852 CopyprivateVars.size() == AssignmentOps.size()); 2853 auto &C = CGM.getContext(); 2854 // int32 did_it = 0; 2855 // if(__kmpc_single(ident_t *, gtid)) { 2856 // SingleOpGen(); 2857 // __kmpc_end_single(ident_t *, gtid); 2858 // did_it = 1; 2859 // } 2860 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2861 // <copy_func>, did_it); 2862 2863 Address DidIt = Address::invalid(); 2864 if (!CopyprivateVars.empty()) { 2865 // int32 did_it = 0; 2866 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2867 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2868 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2869 } 2870 // Prepare arguments and build a call to __kmpc_single 2871 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2872 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 2873 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 2874 /*Conditional=*/true); 2875 SingleOpGen.setAction(Action); 2876 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2877 if (DidIt.isValid()) { 2878 // did_it = 1; 2879 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2880 } 2881 Action.Done(CGF); 2882 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2883 // <copy_func>, did_it); 2884 if (DidIt.isValid()) { 2885 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2886 auto CopyprivateArrayTy = 2887 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2888 /*IndexTypeQuals=*/0); 2889 // Create a list of all private variables for copyprivate. 2890 Address CopyprivateList = 2891 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2892 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2893 Address Elem = CGF.Builder.CreateConstArrayGEP( 2894 CopyprivateList, I, CGF.getPointerSize()); 2895 CGF.Builder.CreateStore( 2896 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2897 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 2898 Elem); 2899 } 2900 // Build function that copies private values from single region to all other 2901 // threads in the corresponding parallel region. 2902 auto *CpyFn = emitCopyprivateCopyFunction( 2903 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2904 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2905 auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2906 Address CL = 2907 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2908 CGF.VoidPtrTy); 2909 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 2910 llvm::Value *Args[] = { 2911 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2912 getThreadID(CGF, Loc), // i32 <gtid> 2913 BufSize, // size_t <buf_size> 2914 CL.getPointer(), // void *<copyprivate list> 2915 CpyFn, // void (*) (void *, void *) <copy_func> 2916 DidItVal // i32 did_it 2917 }; 2918 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 2919 } 2920 } 2921 2922 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2923 const RegionCodeGenTy &OrderedOpGen, 2924 SourceLocation Loc, bool IsThreads) { 2925 if (!CGF.HaveInsertPoint()) 2926 return; 2927 // __kmpc_ordered(ident_t *, gtid); 2928 // OrderedOpGen(); 2929 // __kmpc_end_ordered(ident_t *, gtid); 2930 // Prepare arguments and build a call to __kmpc_ordered 2931 if (IsThreads) { 2932 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2933 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 2934 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 2935 Args); 2936 OrderedOpGen.setAction(Action); 2937 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2938 return; 2939 } 2940 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2941 } 2942 2943 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2944 OpenMPDirectiveKind Kind, bool EmitChecks, 2945 bool ForceSimpleCall) { 2946 if (!CGF.HaveInsertPoint()) 2947 return; 2948 // Build call __kmpc_cancel_barrier(loc, thread_id); 2949 // Build call __kmpc_barrier(loc, thread_id); 2950 unsigned Flags; 2951 if (Kind == OMPD_for) 2952 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2953 else if (Kind == OMPD_sections) 2954 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2955 else if (Kind == OMPD_single) 2956 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2957 else if (Kind == OMPD_barrier) 2958 Flags = OMP_IDENT_BARRIER_EXPL; 2959 else 2960 Flags = OMP_IDENT_BARRIER_IMPL; 2961 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2962 // thread_id); 2963 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2964 getThreadID(CGF, Loc)}; 2965 if (auto *OMPRegionInfo = 2966 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2967 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2968 auto *Result = CGF.EmitRuntimeCall( 2969 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 2970 if (EmitChecks) { 2971 // if (__kmpc_cancel_barrier()) { 2972 // exit from construct; 2973 // } 2974 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2975 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2976 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2977 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2978 CGF.EmitBlock(ExitBB); 2979 // exit from construct; 2980 auto CancelDestination = 2981 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2982 CGF.EmitBranchThroughCleanup(CancelDestination); 2983 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2984 } 2985 return; 2986 } 2987 } 2988 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 2989 } 2990 2991 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 2992 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2993 bool Chunked, bool Ordered) { 2994 switch (ScheduleKind) { 2995 case OMPC_SCHEDULE_static: 2996 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2997 : (Ordered ? OMP_ord_static : OMP_sch_static); 2998 case OMPC_SCHEDULE_dynamic: 2999 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3000 case OMPC_SCHEDULE_guided: 3001 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3002 case OMPC_SCHEDULE_runtime: 3003 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3004 case OMPC_SCHEDULE_auto: 3005 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3006 case OMPC_SCHEDULE_unknown: 3007 assert(!Chunked && "chunk was specified but schedule kind not known"); 3008 return Ordered ? OMP_ord_static : OMP_sch_static; 3009 } 3010 llvm_unreachable("Unexpected runtime schedule"); 3011 } 3012 3013 /// \brief Map the OpenMP distribute schedule to the runtime enumeration. 3014 static OpenMPSchedType 3015 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3016 // only static is allowed for dist_schedule 3017 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3018 } 3019 3020 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3021 bool Chunked) const { 3022 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3023 return Schedule == OMP_sch_static; 3024 } 3025 3026 bool CGOpenMPRuntime::isStaticNonchunked( 3027 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3028 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3029 return Schedule == OMP_dist_sch_static; 3030 } 3031 3032 3033 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3034 auto Schedule = 3035 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3036 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3037 return Schedule != OMP_sch_static; 3038 } 3039 3040 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 3041 OpenMPScheduleClauseModifier M1, 3042 OpenMPScheduleClauseModifier M2) { 3043 int Modifier = 0; 3044 switch (M1) { 3045 case OMPC_SCHEDULE_MODIFIER_monotonic: 3046 Modifier = OMP_sch_modifier_monotonic; 3047 break; 3048 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3049 Modifier = OMP_sch_modifier_nonmonotonic; 3050 break; 3051 case OMPC_SCHEDULE_MODIFIER_simd: 3052 if (Schedule == OMP_sch_static_chunked) 3053 Schedule = OMP_sch_static_balanced_chunked; 3054 break; 3055 case OMPC_SCHEDULE_MODIFIER_last: 3056 case OMPC_SCHEDULE_MODIFIER_unknown: 3057 break; 3058 } 3059 switch (M2) { 3060 case OMPC_SCHEDULE_MODIFIER_monotonic: 3061 Modifier = OMP_sch_modifier_monotonic; 3062 break; 3063 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3064 Modifier = OMP_sch_modifier_nonmonotonic; 3065 break; 3066 case OMPC_SCHEDULE_MODIFIER_simd: 3067 if (Schedule == OMP_sch_static_chunked) 3068 Schedule = OMP_sch_static_balanced_chunked; 3069 break; 3070 case OMPC_SCHEDULE_MODIFIER_last: 3071 case OMPC_SCHEDULE_MODIFIER_unknown: 3072 break; 3073 } 3074 return Schedule | Modifier; 3075 } 3076 3077 void CGOpenMPRuntime::emitForDispatchInit( 3078 CodeGenFunction &CGF, SourceLocation Loc, 3079 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3080 bool Ordered, const DispatchRTInput &DispatchValues) { 3081 if (!CGF.HaveInsertPoint()) 3082 return; 3083 OpenMPSchedType Schedule = getRuntimeSchedule( 3084 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3085 assert(Ordered || 3086 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3087 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3088 Schedule != OMP_sch_static_balanced_chunked)); 3089 // Call __kmpc_dispatch_init( 3090 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3091 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3092 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3093 3094 // If the Chunk was not specified in the clause - use default value 1. 3095 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3096 : CGF.Builder.getIntN(IVSize, 1); 3097 llvm::Value *Args[] = { 3098 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3099 CGF.Builder.getInt32(addMonoNonMonoModifier( 3100 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3101 DispatchValues.LB, // Lower 3102 DispatchValues.UB, // Upper 3103 CGF.Builder.getIntN(IVSize, 1), // Stride 3104 Chunk // Chunk 3105 }; 3106 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3107 } 3108 3109 static void emitForStaticInitCall( 3110 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3111 llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, 3112 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3113 const CGOpenMPRuntime::StaticRTInput &Values) { 3114 if (!CGF.HaveInsertPoint()) 3115 return; 3116 3117 assert(!Values.Ordered); 3118 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3119 Schedule == OMP_sch_static_balanced_chunked || 3120 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3121 Schedule == OMP_dist_sch_static || 3122 Schedule == OMP_dist_sch_static_chunked); 3123 3124 // Call __kmpc_for_static_init( 3125 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3126 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3127 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3128 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3129 llvm::Value *Chunk = Values.Chunk; 3130 if (Chunk == nullptr) { 3131 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3132 Schedule == OMP_dist_sch_static) && 3133 "expected static non-chunked schedule"); 3134 // If the Chunk was not specified in the clause - use default value 1. 3135 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3136 } else { 3137 assert((Schedule == OMP_sch_static_chunked || 3138 Schedule == OMP_sch_static_balanced_chunked || 3139 Schedule == OMP_ord_static_chunked || 3140 Schedule == OMP_dist_sch_static_chunked) && 3141 "expected static chunked schedule"); 3142 } 3143 llvm::Value *Args[] = { 3144 UpdateLocation, 3145 ThreadId, 3146 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, 3147 M2)), // Schedule type 3148 Values.IL.getPointer(), // &isLastIter 3149 Values.LB.getPointer(), // &LB 3150 Values.UB.getPointer(), // &UB 3151 Values.ST.getPointer(), // &Stride 3152 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3153 Chunk // Chunk 3154 }; 3155 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3156 } 3157 3158 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3159 SourceLocation Loc, 3160 OpenMPDirectiveKind DKind, 3161 const OpenMPScheduleTy &ScheduleKind, 3162 const StaticRTInput &Values) { 3163 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3164 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3165 assert(isOpenMPWorksharingDirective(DKind) && 3166 "Expected loop-based or sections-based directive."); 3167 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3168 isOpenMPLoopDirective(DKind) 3169 ? OMP_IDENT_WORK_LOOP 3170 : OMP_IDENT_WORK_SECTIONS); 3171 auto *ThreadId = getThreadID(CGF, Loc); 3172 auto *StaticInitFunction = 3173 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3174 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3175 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3176 } 3177 3178 void CGOpenMPRuntime::emitDistributeStaticInit( 3179 CodeGenFunction &CGF, SourceLocation Loc, 3180 OpenMPDistScheduleClauseKind SchedKind, 3181 const CGOpenMPRuntime::StaticRTInput &Values) { 3182 OpenMPSchedType ScheduleNum = 3183 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3184 auto *UpdatedLocation = 3185 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3186 auto *ThreadId = getThreadID(CGF, Loc); 3187 auto *StaticInitFunction = 3188 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3189 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3190 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3191 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3192 } 3193 3194 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3195 SourceLocation Loc, 3196 OpenMPDirectiveKind DKind) { 3197 if (!CGF.HaveInsertPoint()) 3198 return; 3199 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3200 llvm::Value *Args[] = { 3201 emitUpdateLocation(CGF, Loc, 3202 isOpenMPDistributeDirective(DKind) 3203 ? OMP_IDENT_WORK_DISTRIBUTE 3204 : isOpenMPLoopDirective(DKind) 3205 ? OMP_IDENT_WORK_LOOP 3206 : OMP_IDENT_WORK_SECTIONS), 3207 getThreadID(CGF, Loc)}; 3208 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3209 Args); 3210 } 3211 3212 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3213 SourceLocation Loc, 3214 unsigned IVSize, 3215 bool IVSigned) { 3216 if (!CGF.HaveInsertPoint()) 3217 return; 3218 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3219 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3220 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3221 } 3222 3223 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3224 SourceLocation Loc, unsigned IVSize, 3225 bool IVSigned, Address IL, 3226 Address LB, Address UB, 3227 Address ST) { 3228 // Call __kmpc_dispatch_next( 3229 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3230 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3231 // kmp_int[32|64] *p_stride); 3232 llvm::Value *Args[] = { 3233 emitUpdateLocation(CGF, Loc), 3234 getThreadID(CGF, Loc), 3235 IL.getPointer(), // &isLastIter 3236 LB.getPointer(), // &Lower 3237 UB.getPointer(), // &Upper 3238 ST.getPointer() // &Stride 3239 }; 3240 llvm::Value *Call = 3241 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3242 return CGF.EmitScalarConversion( 3243 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 3244 CGF.getContext().BoolTy, Loc); 3245 } 3246 3247 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3248 llvm::Value *NumThreads, 3249 SourceLocation Loc) { 3250 if (!CGF.HaveInsertPoint()) 3251 return; 3252 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3253 llvm::Value *Args[] = { 3254 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3255 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3256 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3257 Args); 3258 } 3259 3260 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3261 OpenMPProcBindClauseKind ProcBind, 3262 SourceLocation Loc) { 3263 if (!CGF.HaveInsertPoint()) 3264 return; 3265 // Constants for proc bind value accepted by the runtime. 3266 enum ProcBindTy { 3267 ProcBindFalse = 0, 3268 ProcBindTrue, 3269 ProcBindMaster, 3270 ProcBindClose, 3271 ProcBindSpread, 3272 ProcBindIntel, 3273 ProcBindDefault 3274 } RuntimeProcBind; 3275 switch (ProcBind) { 3276 case OMPC_PROC_BIND_master: 3277 RuntimeProcBind = ProcBindMaster; 3278 break; 3279 case OMPC_PROC_BIND_close: 3280 RuntimeProcBind = ProcBindClose; 3281 break; 3282 case OMPC_PROC_BIND_spread: 3283 RuntimeProcBind = ProcBindSpread; 3284 break; 3285 case OMPC_PROC_BIND_unknown: 3286 llvm_unreachable("Unsupported proc_bind value."); 3287 } 3288 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3289 llvm::Value *Args[] = { 3290 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3291 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3292 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3293 } 3294 3295 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3296 SourceLocation Loc) { 3297 if (!CGF.HaveInsertPoint()) 3298 return; 3299 // Build call void __kmpc_flush(ident_t *loc) 3300 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3301 emitUpdateLocation(CGF, Loc)); 3302 } 3303 3304 namespace { 3305 /// \brief Indexes of fields for type kmp_task_t. 3306 enum KmpTaskTFields { 3307 /// \brief List of shared variables. 3308 KmpTaskTShareds, 3309 /// \brief Task routine. 3310 KmpTaskTRoutine, 3311 /// \brief Partition id for the untied tasks. 3312 KmpTaskTPartId, 3313 /// Function with call of destructors for private variables. 3314 Data1, 3315 /// Task priority. 3316 Data2, 3317 /// (Taskloops only) Lower bound. 3318 KmpTaskTLowerBound, 3319 /// (Taskloops only) Upper bound. 3320 KmpTaskTUpperBound, 3321 /// (Taskloops only) Stride. 3322 KmpTaskTStride, 3323 /// (Taskloops only) Is last iteration flag. 3324 KmpTaskTLastIter, 3325 /// (Taskloops only) Reduction data. 3326 KmpTaskTReductions, 3327 }; 3328 } // anonymous namespace 3329 3330 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3331 // FIXME: Add other entries type when they become supported. 3332 return OffloadEntriesTargetRegion.empty(); 3333 } 3334 3335 /// \brief Initialize target region entry. 3336 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3337 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3338 StringRef ParentName, unsigned LineNum, 3339 unsigned Order) { 3340 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3341 "only required for the device " 3342 "code generation."); 3343 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3344 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3345 /*Flags=*/0); 3346 ++OffloadingEntriesNum; 3347 } 3348 3349 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3350 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3351 StringRef ParentName, unsigned LineNum, 3352 llvm::Constant *Addr, llvm::Constant *ID, 3353 int32_t Flags) { 3354 // If we are emitting code for a target, the entry is already initialized, 3355 // only has to be registered. 3356 if (CGM.getLangOpts().OpenMPIsDevice) { 3357 assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3358 "Entry must exist."); 3359 auto &Entry = 3360 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3361 assert(Entry.isValid() && "Entry not initialized!"); 3362 Entry.setAddress(Addr); 3363 Entry.setID(ID); 3364 Entry.setFlags(Flags); 3365 return; 3366 } else { 3367 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags); 3368 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3369 } 3370 } 3371 3372 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3373 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3374 unsigned LineNum) const { 3375 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3376 if (PerDevice == OffloadEntriesTargetRegion.end()) 3377 return false; 3378 auto PerFile = PerDevice->second.find(FileID); 3379 if (PerFile == PerDevice->second.end()) 3380 return false; 3381 auto PerParentName = PerFile->second.find(ParentName); 3382 if (PerParentName == PerFile->second.end()) 3383 return false; 3384 auto PerLine = PerParentName->second.find(LineNum); 3385 if (PerLine == PerParentName->second.end()) 3386 return false; 3387 // Fail if this entry is already registered. 3388 if (PerLine->second.getAddress() || PerLine->second.getID()) 3389 return false; 3390 return true; 3391 } 3392 3393 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3394 const OffloadTargetRegionEntryInfoActTy &Action) { 3395 // Scan all target region entries and perform the provided action. 3396 for (auto &D : OffloadEntriesTargetRegion) 3397 for (auto &F : D.second) 3398 for (auto &P : F.second) 3399 for (auto &L : P.second) 3400 Action(D.first, F.first, P.first(), L.first, L.second); 3401 } 3402 3403 /// \brief Create a Ctor/Dtor-like function whose body is emitted through 3404 /// \a Codegen. This is used to emit the two functions that register and 3405 /// unregister the descriptor of the current compilation unit. 3406 static llvm::Function * 3407 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, 3408 const RegionCodeGenTy &Codegen) { 3409 auto &C = CGM.getContext(); 3410 FunctionArgList Args; 3411 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 3412 Args.push_back(&DummyPtr); 3413 3414 CodeGenFunction CGF(CGM); 3415 // Disable debug info for global (de-)initializer because they are not part of 3416 // some particular construct. 3417 CGF.disableDebugInfo(); 3418 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3419 auto FTy = CGM.getTypes().GetFunctionType(FI); 3420 auto *Fn = CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI); 3421 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args); 3422 Codegen(CGF); 3423 CGF.FinishFunction(); 3424 return Fn; 3425 } 3426 3427 llvm::Function * 3428 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3429 // If we don't have entries or if we are emitting code for the device, we 3430 // don't need to do anything. 3431 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3432 return nullptr; 3433 3434 auto &M = CGM.getModule(); 3435 auto &C = CGM.getContext(); 3436 3437 // Get list of devices we care about 3438 auto &Devices = CGM.getLangOpts().OMPTargetTriples; 3439 3440 // We should be creating an offloading descriptor only if there are devices 3441 // specified. 3442 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3443 3444 // Create the external variables that will point to the begin and end of the 3445 // host entries section. These will be defined by the linker. 3446 auto *OffloadEntryTy = 3447 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3448 llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( 3449 M, OffloadEntryTy, /*isConstant=*/true, 3450 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3451 ".omp_offloading.entries_begin"); 3452 llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( 3453 M, OffloadEntryTy, /*isConstant=*/true, 3454 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3455 ".omp_offloading.entries_end"); 3456 3457 // Create all device images 3458 auto *DeviceImageTy = cast<llvm::StructType>( 3459 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3460 ConstantInitBuilder DeviceImagesBuilder(CGM); 3461 auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy); 3462 3463 for (unsigned i = 0; i < Devices.size(); ++i) { 3464 StringRef T = Devices[i].getTriple(); 3465 auto *ImgBegin = new llvm::GlobalVariable( 3466 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 3467 /*Initializer=*/nullptr, 3468 Twine(".omp_offloading.img_start.") + Twine(T)); 3469 auto *ImgEnd = new llvm::GlobalVariable( 3470 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 3471 /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); 3472 3473 auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy); 3474 Dev.add(ImgBegin); 3475 Dev.add(ImgEnd); 3476 Dev.add(HostEntriesBegin); 3477 Dev.add(HostEntriesEnd); 3478 Dev.finishAndAddTo(DeviceImagesEntries); 3479 } 3480 3481 // Create device images global array. 3482 llvm::GlobalVariable *DeviceImages = 3483 DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images", 3484 CGM.getPointerAlign(), 3485 /*isConstant=*/true); 3486 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3487 3488 // This is a Zero array to be used in the creation of the constant expressions 3489 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 3490 llvm::Constant::getNullValue(CGM.Int32Ty)}; 3491 3492 // Create the target region descriptor. 3493 auto *BinaryDescriptorTy = cast<llvm::StructType>( 3494 CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); 3495 ConstantInitBuilder DescBuilder(CGM); 3496 auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy); 3497 DescInit.addInt(CGM.Int32Ty, Devices.size()); 3498 DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 3499 DeviceImages, 3500 Index)); 3501 DescInit.add(HostEntriesBegin); 3502 DescInit.add(HostEntriesEnd); 3503 3504 auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor", 3505 CGM.getPointerAlign(), 3506 /*isConstant=*/true); 3507 3508 // Emit code to register or unregister the descriptor at execution 3509 // startup or closing, respectively. 3510 3511 // Create a variable to drive the registration and unregistration of the 3512 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 3513 auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); 3514 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), 3515 IdentInfo, C.CharTy, ImplicitParamDecl::Other); 3516 3517 auto *UnRegFn = createOffloadingBinaryDescriptorFunction( 3518 CGM, ".omp_offloading.descriptor_unreg", 3519 [&](CodeGenFunction &CGF, PrePostActionTy &) { 3520 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 3521 Desc); 3522 }); 3523 auto *RegFn = createOffloadingBinaryDescriptorFunction( 3524 CGM, ".omp_offloading.descriptor_reg", 3525 [&](CodeGenFunction &CGF, PrePostActionTy &) { 3526 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), 3527 Desc); 3528 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 3529 }); 3530 if (CGM.supportsCOMDAT()) { 3531 // It is sufficient to call registration function only once, so create a 3532 // COMDAT group for registration/unregistration functions and associated 3533 // data. That would reduce startup time and code size. Registration 3534 // function serves as a COMDAT group key. 3535 auto ComdatKey = M.getOrInsertComdat(RegFn->getName()); 3536 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 3537 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 3538 RegFn->setComdat(ComdatKey); 3539 UnRegFn->setComdat(ComdatKey); 3540 DeviceImages->setComdat(ComdatKey); 3541 Desc->setComdat(ComdatKey); 3542 } 3543 return RegFn; 3544 } 3545 3546 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, 3547 llvm::Constant *Addr, uint64_t Size, 3548 int32_t Flags) { 3549 StringRef Name = Addr->getName(); 3550 auto *TgtOffloadEntryType = cast<llvm::StructType>( 3551 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); 3552 llvm::LLVMContext &C = CGM.getModule().getContext(); 3553 llvm::Module &M = CGM.getModule(); 3554 3555 // Make sure the address has the right type. 3556 llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); 3557 3558 // Create constant string with the name. 3559 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3560 3561 llvm::GlobalVariable *Str = 3562 new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, 3563 llvm::GlobalValue::InternalLinkage, StrPtrInit, 3564 ".omp_offloading.entry_name"); 3565 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3566 llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); 3567 3568 // We can't have any padding between symbols, so we need to have 1-byte 3569 // alignment. 3570 auto Align = CharUnits::fromQuantity(1); 3571 3572 // Create the entry struct. 3573 ConstantInitBuilder EntryBuilder(CGM); 3574 auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType); 3575 EntryInit.add(AddrPtr); 3576 EntryInit.add(StrPtr); 3577 EntryInit.addInt(CGM.SizeTy, Size); 3578 EntryInit.addInt(CGM.Int32Ty, Flags); 3579 EntryInit.addInt(CGM.Int32Ty, 0); 3580 llvm::GlobalVariable *Entry = EntryInit.finishAndCreateGlobal( 3581 Twine(".omp_offloading.entry.") + Name, Align, 3582 /*constant*/ true, llvm::GlobalValue::ExternalLinkage); 3583 3584 // The entry has to be created in the section the linker expects it to be. 3585 Entry->setSection(".omp_offloading.entries"); 3586 } 3587 3588 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3589 // Emit the offloading entries and metadata so that the device codegen side 3590 // can easily figure out what to emit. The produced metadata looks like 3591 // this: 3592 // 3593 // !omp_offload.info = !{!1, ...} 3594 // 3595 // Right now we only generate metadata for function that contain target 3596 // regions. 3597 3598 // If we do not have entries, we dont need to do anything. 3599 if (OffloadEntriesInfoManager.empty()) 3600 return; 3601 3602 llvm::Module &M = CGM.getModule(); 3603 llvm::LLVMContext &C = M.getContext(); 3604 SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 3605 OrderedEntries(OffloadEntriesInfoManager.size()); 3606 3607 // Create the offloading info metadata node. 3608 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3609 3610 // Auxiliary methods to create metadata values and strings. 3611 auto getMDInt = [&](unsigned v) { 3612 return llvm::ConstantAsMetadata::get( 3613 llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); 3614 }; 3615 3616 auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; 3617 3618 // Create function that emits metadata for each target region entry; 3619 auto &&TargetRegionMetadataEmitter = [&]( 3620 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, 3621 OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3622 llvm::SmallVector<llvm::Metadata *, 32> Ops; 3623 // Generate metadata for target regions. Each entry of this metadata 3624 // contains: 3625 // - Entry 0 -> Kind of this type of metadata (0). 3626 // - Entry 1 -> Device ID of the file where the entry was identified. 3627 // - Entry 2 -> File ID of the file where the entry was identified. 3628 // - Entry 3 -> Mangled name of the function where the entry was identified. 3629 // - Entry 4 -> Line in the file where the entry was identified. 3630 // - Entry 5 -> Order the entry was created. 3631 // The first element of the metadata node is the kind. 3632 Ops.push_back(getMDInt(E.getKind())); 3633 Ops.push_back(getMDInt(DeviceID)); 3634 Ops.push_back(getMDInt(FileID)); 3635 Ops.push_back(getMDString(ParentName)); 3636 Ops.push_back(getMDInt(Line)); 3637 Ops.push_back(getMDInt(E.getOrder())); 3638 3639 // Save this entry in the right position of the ordered entries array. 3640 OrderedEntries[E.getOrder()] = &E; 3641 3642 // Add metadata to the named metadata node. 3643 MD->addOperand(llvm::MDNode::get(C, Ops)); 3644 }; 3645 3646 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3647 TargetRegionMetadataEmitter); 3648 3649 for (auto *E : OrderedEntries) { 3650 assert(E && "All ordered entries must exist!"); 3651 if (auto *CE = 3652 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3653 E)) { 3654 assert(CE->getID() && CE->getAddress() && 3655 "Entry ID and Addr are invalid!"); 3656 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0); 3657 } else 3658 llvm_unreachable("Unsupported entry kind."); 3659 } 3660 } 3661 3662 /// \brief Loads all the offload entries information from the host IR 3663 /// metadata. 3664 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3665 // If we are in target mode, load the metadata from the host IR. This code has 3666 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3667 3668 if (!CGM.getLangOpts().OpenMPIsDevice) 3669 return; 3670 3671 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3672 return; 3673 3674 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3675 if (Buf.getError()) 3676 return; 3677 3678 llvm::LLVMContext C; 3679 auto ME = expectedToErrorOrAndEmitErrors( 3680 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3681 3682 if (ME.getError()) 3683 return; 3684 3685 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3686 if (!MD) 3687 return; 3688 3689 for (llvm::MDNode *MN : MD->operands()) { 3690 auto getMDInt = [&](unsigned Idx) { 3691 llvm::ConstantAsMetadata *V = 3692 cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3693 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3694 }; 3695 3696 auto getMDString = [&](unsigned Idx) { 3697 llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3698 return V->getString(); 3699 }; 3700 3701 switch (getMDInt(0)) { 3702 default: 3703 llvm_unreachable("Unexpected metadata!"); 3704 break; 3705 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3706 OFFLOAD_ENTRY_INFO_TARGET_REGION: 3707 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3708 /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), 3709 /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), 3710 /*Order=*/getMDInt(5)); 3711 break; 3712 } 3713 } 3714 } 3715 3716 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3717 if (!KmpRoutineEntryPtrTy) { 3718 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3719 auto &C = CGM.getContext(); 3720 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3721 FunctionProtoType::ExtProtoInfo EPI; 3722 KmpRoutineEntryPtrQTy = C.getPointerType( 3723 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3724 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3725 } 3726 } 3727 3728 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 3729 QualType FieldTy) { 3730 auto *Field = FieldDecl::Create( 3731 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 3732 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 3733 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 3734 Field->setAccess(AS_public); 3735 DC->addDecl(Field); 3736 return Field; 3737 } 3738 3739 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3740 3741 // Make sure the type of the entry is already created. This is the type we 3742 // have to create: 3743 // struct __tgt_offload_entry{ 3744 // void *addr; // Pointer to the offload entry info. 3745 // // (function or global) 3746 // char *name; // Name of the function or global. 3747 // size_t size; // Size of the entry info (0 if it a function). 3748 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3749 // int32_t reserved; // Reserved, to use by the runtime library. 3750 // }; 3751 if (TgtOffloadEntryQTy.isNull()) { 3752 ASTContext &C = CGM.getContext(); 3753 auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3754 RD->startDefinition(); 3755 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3756 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3757 addFieldToRecordDecl(C, RD, C.getSizeType()); 3758 addFieldToRecordDecl( 3759 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3760 addFieldToRecordDecl( 3761 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3762 RD->completeDefinition(); 3763 RD->addAttr(PackedAttr::CreateImplicit(C)); 3764 TgtOffloadEntryQTy = C.getRecordType(RD); 3765 } 3766 return TgtOffloadEntryQTy; 3767 } 3768 3769 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 3770 // These are the types we need to build: 3771 // struct __tgt_device_image{ 3772 // void *ImageStart; // Pointer to the target code start. 3773 // void *ImageEnd; // Pointer to the target code end. 3774 // // We also add the host entries to the device image, as it may be useful 3775 // // for the target runtime to have access to that information. 3776 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 3777 // // the entries. 3778 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3779 // // entries (non inclusive). 3780 // }; 3781 if (TgtDeviceImageQTy.isNull()) { 3782 ASTContext &C = CGM.getContext(); 3783 auto *RD = C.buildImplicitRecord("__tgt_device_image"); 3784 RD->startDefinition(); 3785 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3786 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3787 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3788 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3789 RD->completeDefinition(); 3790 TgtDeviceImageQTy = C.getRecordType(RD); 3791 } 3792 return TgtDeviceImageQTy; 3793 } 3794 3795 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 3796 // struct __tgt_bin_desc{ 3797 // int32_t NumDevices; // Number of devices supported. 3798 // __tgt_device_image *DeviceImages; // Arrays of device images 3799 // // (one per device). 3800 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 3801 // // entries. 3802 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3803 // // entries (non inclusive). 3804 // }; 3805 if (TgtBinaryDescriptorQTy.isNull()) { 3806 ASTContext &C = CGM.getContext(); 3807 auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); 3808 RD->startDefinition(); 3809 addFieldToRecordDecl( 3810 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3811 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 3812 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3813 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3814 RD->completeDefinition(); 3815 TgtBinaryDescriptorQTy = C.getRecordType(RD); 3816 } 3817 return TgtBinaryDescriptorQTy; 3818 } 3819 3820 namespace { 3821 struct PrivateHelpersTy { 3822 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 3823 const VarDecl *PrivateElemInit) 3824 : Original(Original), PrivateCopy(PrivateCopy), 3825 PrivateElemInit(PrivateElemInit) {} 3826 const VarDecl *Original; 3827 const VarDecl *PrivateCopy; 3828 const VarDecl *PrivateElemInit; 3829 }; 3830 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3831 } // anonymous namespace 3832 3833 static RecordDecl * 3834 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3835 if (!Privates.empty()) { 3836 auto &C = CGM.getContext(); 3837 // Build struct .kmp_privates_t. { 3838 // /* private vars */ 3839 // }; 3840 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 3841 RD->startDefinition(); 3842 for (auto &&Pair : Privates) { 3843 auto *VD = Pair.second.Original; 3844 auto Type = VD->getType(); 3845 Type = Type.getNonReferenceType(); 3846 auto *FD = addFieldToRecordDecl(C, RD, Type); 3847 if (VD->hasAttrs()) { 3848 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3849 E(VD->getAttrs().end()); 3850 I != E; ++I) 3851 FD->addAttr(*I); 3852 } 3853 } 3854 RD->completeDefinition(); 3855 return RD; 3856 } 3857 return nullptr; 3858 } 3859 3860 static RecordDecl * 3861 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3862 QualType KmpInt32Ty, 3863 QualType KmpRoutineEntryPointerQTy) { 3864 auto &C = CGM.getContext(); 3865 // Build struct kmp_task_t { 3866 // void * shareds; 3867 // kmp_routine_entry_t routine; 3868 // kmp_int32 part_id; 3869 // kmp_cmplrdata_t data1; 3870 // kmp_cmplrdata_t data2; 3871 // For taskloops additional fields: 3872 // kmp_uint64 lb; 3873 // kmp_uint64 ub; 3874 // kmp_int64 st; 3875 // kmp_int32 liter; 3876 // void * reductions; 3877 // }; 3878 auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3879 UD->startDefinition(); 3880 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3881 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3882 UD->completeDefinition(); 3883 QualType KmpCmplrdataTy = C.getRecordType(UD); 3884 auto *RD = C.buildImplicitRecord("kmp_task_t"); 3885 RD->startDefinition(); 3886 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3887 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3888 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3889 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3890 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3891 if (isOpenMPTaskLoopDirective(Kind)) { 3892 QualType KmpUInt64Ty = 3893 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3894 QualType KmpInt64Ty = 3895 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3896 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3897 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3898 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3899 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3900 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3901 } 3902 RD->completeDefinition(); 3903 return RD; 3904 } 3905 3906 static RecordDecl * 3907 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3908 ArrayRef<PrivateDataTy> Privates) { 3909 auto &C = CGM.getContext(); 3910 // Build struct kmp_task_t_with_privates { 3911 // kmp_task_t task_data; 3912 // .kmp_privates_t. privates; 3913 // }; 3914 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3915 RD->startDefinition(); 3916 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3917 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 3918 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3919 } 3920 RD->completeDefinition(); 3921 return RD; 3922 } 3923 3924 /// \brief Emit a proxy function which accepts kmp_task_t as the second 3925 /// argument. 3926 /// \code 3927 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3928 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3929 /// For taskloops: 3930 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3931 /// tt->reductions, tt->shareds); 3932 /// return 0; 3933 /// } 3934 /// \endcode 3935 static llvm::Value * 3936 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3937 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3938 QualType KmpTaskTWithPrivatesPtrQTy, 3939 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3940 QualType SharedsPtrTy, llvm::Value *TaskFunction, 3941 llvm::Value *TaskPrivatesMap) { 3942 auto &C = CGM.getContext(); 3943 FunctionArgList Args; 3944 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3945 ImplicitParamDecl::Other); 3946 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3947 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3948 ImplicitParamDecl::Other); 3949 Args.push_back(&GtidArg); 3950 Args.push_back(&TaskTypeArg); 3951 auto &TaskEntryFnInfo = 3952 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3953 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3954 auto *TaskEntry = 3955 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 3956 ".omp_task_entry.", &CGM.getModule()); 3957 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3958 CodeGenFunction CGF(CGM); 3959 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3960 Loc, Loc); 3961 3962 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3963 // tt, 3964 // For taskloops: 3965 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3966 // tt->task_data.shareds); 3967 auto *GtidParam = CGF.EmitLoadOfScalar( 3968 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3969 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3970 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3971 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3972 auto *KmpTaskTWithPrivatesQTyRD = 3973 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3974 LValue Base = 3975 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3976 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3977 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3978 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3979 auto *PartidParam = PartIdLVal.getPointer(); 3980 3981 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3982 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3983 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3984 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3985 CGF.ConvertTypeForMem(SharedsPtrTy)); 3986 3987 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3988 llvm::Value *PrivatesParam; 3989 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3990 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3991 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3992 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 3993 } else 3994 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3995 3996 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3997 TaskPrivatesMap, 3998 CGF.Builder 3999 .CreatePointerBitCastOrAddrSpaceCast( 4000 TDBase.getAddress(), CGF.VoidPtrTy) 4001 .getPointer()}; 4002 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4003 std::end(CommonArgs)); 4004 if (isOpenMPTaskLoopDirective(Kind)) { 4005 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4006 auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4007 auto *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4008 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4009 auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4010 auto *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4011 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4012 auto StLVal = CGF.EmitLValueForField(Base, *StFI); 4013 auto *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4014 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4015 auto LILVal = CGF.EmitLValueForField(Base, *LIFI); 4016 auto *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4017 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4018 auto RLVal = CGF.EmitLValueForField(Base, *RFI); 4019 auto *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4020 CallArgs.push_back(LBParam); 4021 CallArgs.push_back(UBParam); 4022 CallArgs.push_back(StParam); 4023 CallArgs.push_back(LIParam); 4024 CallArgs.push_back(RParam); 4025 } 4026 CallArgs.push_back(SharedsParam); 4027 4028 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4029 CallArgs); 4030 CGF.EmitStoreThroughLValue( 4031 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4032 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4033 CGF.FinishFunction(); 4034 return TaskEntry; 4035 } 4036 4037 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4038 SourceLocation Loc, 4039 QualType KmpInt32Ty, 4040 QualType KmpTaskTWithPrivatesPtrQTy, 4041 QualType KmpTaskTWithPrivatesQTy) { 4042 auto &C = CGM.getContext(); 4043 FunctionArgList Args; 4044 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4045 ImplicitParamDecl::Other); 4046 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4047 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4048 ImplicitParamDecl::Other); 4049 Args.push_back(&GtidArg); 4050 Args.push_back(&TaskTypeArg); 4051 auto &DestructorFnInfo = 4052 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4053 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 4054 auto *DestructorFn = 4055 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4056 ".omp_task_destructor.", &CGM.getModule()); 4057 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4058 DestructorFnInfo); 4059 CodeGenFunction CGF(CGM); 4060 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4061 Args, Loc, Loc); 4062 4063 LValue Base = CGF.EmitLoadOfPointerLValue( 4064 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4065 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4066 auto *KmpTaskTWithPrivatesQTyRD = 4067 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4068 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4069 Base = CGF.EmitLValueForField(Base, *FI); 4070 for (auto *Field : 4071 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4072 if (auto DtorKind = Field->getType().isDestructedType()) { 4073 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 4074 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4075 } 4076 } 4077 CGF.FinishFunction(); 4078 return DestructorFn; 4079 } 4080 4081 /// \brief Emit a privates mapping function for correct handling of private and 4082 /// firstprivate variables. 4083 /// \code 4084 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4085 /// **noalias priv1,..., <tyn> **noalias privn) { 4086 /// *priv1 = &.privates.priv1; 4087 /// ...; 4088 /// *privn = &.privates.privn; 4089 /// } 4090 /// \endcode 4091 static llvm::Value * 4092 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4093 ArrayRef<const Expr *> PrivateVars, 4094 ArrayRef<const Expr *> FirstprivateVars, 4095 ArrayRef<const Expr *> LastprivateVars, 4096 QualType PrivatesQTy, 4097 ArrayRef<PrivateDataTy> Privates) { 4098 auto &C = CGM.getContext(); 4099 FunctionArgList Args; 4100 ImplicitParamDecl TaskPrivatesArg( 4101 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4102 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4103 ImplicitParamDecl::Other); 4104 Args.push_back(&TaskPrivatesArg); 4105 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4106 unsigned Counter = 1; 4107 for (auto *E: PrivateVars) { 4108 Args.push_back(ImplicitParamDecl::Create( 4109 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4110 C.getPointerType(C.getPointerType(E->getType())) 4111 .withConst() 4112 .withRestrict(), 4113 ImplicitParamDecl::Other)); 4114 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4115 PrivateVarsPos[VD] = Counter; 4116 ++Counter; 4117 } 4118 for (auto *E : FirstprivateVars) { 4119 Args.push_back(ImplicitParamDecl::Create( 4120 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4121 C.getPointerType(C.getPointerType(E->getType())) 4122 .withConst() 4123 .withRestrict(), 4124 ImplicitParamDecl::Other)); 4125 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4126 PrivateVarsPos[VD] = Counter; 4127 ++Counter; 4128 } 4129 for (auto *E: LastprivateVars) { 4130 Args.push_back(ImplicitParamDecl::Create( 4131 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4132 C.getPointerType(C.getPointerType(E->getType())) 4133 .withConst() 4134 .withRestrict(), 4135 ImplicitParamDecl::Other)); 4136 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4137 PrivateVarsPos[VD] = Counter; 4138 ++Counter; 4139 } 4140 auto &TaskPrivatesMapFnInfo = 4141 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4142 auto *TaskPrivatesMapTy = 4143 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4144 auto *TaskPrivatesMap = llvm::Function::Create( 4145 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 4146 ".omp_task_privates_map.", &CGM.getModule()); 4147 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4148 TaskPrivatesMapFnInfo); 4149 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4150 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4151 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4152 CodeGenFunction CGF(CGM); 4153 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4154 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4155 4156 // *privi = &.privates.privi; 4157 LValue Base = CGF.EmitLoadOfPointerLValue( 4158 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4159 TaskPrivatesArg.getType()->castAs<PointerType>()); 4160 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4161 Counter = 0; 4162 for (auto *Field : PrivatesQTyRD->fields()) { 4163 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 4164 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4165 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4166 auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4167 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4168 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4169 ++Counter; 4170 } 4171 CGF.FinishFunction(); 4172 return TaskPrivatesMap; 4173 } 4174 4175 static bool stable_sort_comparator(const PrivateDataTy P1, 4176 const PrivateDataTy P2) { 4177 return P1.first > P2.first; 4178 } 4179 4180 /// Emit initialization for private variables in task-based directives. 4181 static void emitPrivatesInit(CodeGenFunction &CGF, 4182 const OMPExecutableDirective &D, 4183 Address KmpTaskSharedsPtr, LValue TDBase, 4184 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4185 QualType SharedsTy, QualType SharedsPtrTy, 4186 const OMPTaskDataTy &Data, 4187 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4188 auto &C = CGF.getContext(); 4189 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4190 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4191 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4192 ? OMPD_taskloop 4193 : OMPD_task; 4194 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4195 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4196 LValue SrcBase; 4197 bool IsTargetTask = 4198 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4199 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4200 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4201 // PointersArray and SizesArray. The original variables for these arrays are 4202 // not captured and we get their addresses explicitly. 4203 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4204 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4205 SrcBase = CGF.MakeAddrLValue( 4206 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4207 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4208 SharedsTy); 4209 } 4210 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4211 for (auto &&Pair : Privates) { 4212 auto *VD = Pair.second.PrivateCopy; 4213 auto *Init = VD->getAnyInitializer(); 4214 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4215 !CGF.isTrivialInitializer(Init)))) { 4216 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4217 if (auto *Elem = Pair.second.PrivateElemInit) { 4218 auto *OriginalVD = Pair.second.Original; 4219 // Check if the variable is the target-based BasePointersArray, 4220 // PointersArray or SizesArray. 4221 LValue SharedRefLValue; 4222 QualType Type = OriginalVD->getType(); 4223 auto *SharedField = CapturesInfo.lookup(OriginalVD); 4224 if (IsTargetTask && !SharedField) { 4225 assert(isa<ImplicitParamDecl>(OriginalVD) && 4226 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4227 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4228 ->getNumParams() == 0 && 4229 isa<TranslationUnitDecl>( 4230 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4231 ->getDeclContext()) && 4232 "Expected artificial target data variable."); 4233 SharedRefLValue = 4234 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4235 } else { 4236 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4237 SharedRefLValue = CGF.MakeAddrLValue( 4238 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4239 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4240 SharedRefLValue.getTBAAInfo()); 4241 } 4242 if (Type->isArrayType()) { 4243 // Initialize firstprivate array. 4244 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4245 // Perform simple memcpy. 4246 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4247 } else { 4248 // Initialize firstprivate array using element-by-element 4249 // initialization. 4250 CGF.EmitOMPAggregateAssign( 4251 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4252 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4253 Address SrcElement) { 4254 // Clean up any temporaries needed by the initialization. 4255 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4256 InitScope.addPrivate( 4257 Elem, [SrcElement]() -> Address { return SrcElement; }); 4258 (void)InitScope.Privatize(); 4259 // Emit initialization for single element. 4260 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4261 CGF, &CapturesInfo); 4262 CGF.EmitAnyExprToMem(Init, DestElement, 4263 Init->getType().getQualifiers(), 4264 /*IsInitializer=*/false); 4265 }); 4266 } 4267 } else { 4268 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4269 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4270 return SharedRefLValue.getAddress(); 4271 }); 4272 (void)InitScope.Privatize(); 4273 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4274 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4275 /*capturedByInit=*/false); 4276 } 4277 } else 4278 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4279 } 4280 ++FI; 4281 } 4282 } 4283 4284 /// Check if duplication function is required for taskloops. 4285 static bool checkInitIsRequired(CodeGenFunction &CGF, 4286 ArrayRef<PrivateDataTy> Privates) { 4287 bool InitRequired = false; 4288 for (auto &&Pair : Privates) { 4289 auto *VD = Pair.second.PrivateCopy; 4290 auto *Init = VD->getAnyInitializer(); 4291 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4292 !CGF.isTrivialInitializer(Init)); 4293 } 4294 return InitRequired; 4295 } 4296 4297 4298 /// Emit task_dup function (for initialization of 4299 /// private/firstprivate/lastprivate vars and last_iter flag) 4300 /// \code 4301 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4302 /// lastpriv) { 4303 /// // setup lastprivate flag 4304 /// task_dst->last = lastpriv; 4305 /// // could be constructor calls here... 4306 /// } 4307 /// \endcode 4308 static llvm::Value * 4309 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4310 const OMPExecutableDirective &D, 4311 QualType KmpTaskTWithPrivatesPtrQTy, 4312 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4313 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4314 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4315 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4316 auto &C = CGM.getContext(); 4317 FunctionArgList Args; 4318 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4319 KmpTaskTWithPrivatesPtrQTy, 4320 ImplicitParamDecl::Other); 4321 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4322 KmpTaskTWithPrivatesPtrQTy, 4323 ImplicitParamDecl::Other); 4324 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4325 ImplicitParamDecl::Other); 4326 Args.push_back(&DstArg); 4327 Args.push_back(&SrcArg); 4328 Args.push_back(&LastprivArg); 4329 auto &TaskDupFnInfo = 4330 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4331 auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4332 auto *TaskDup = 4333 llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, 4334 ".omp_task_dup.", &CGM.getModule()); 4335 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4336 CodeGenFunction CGF(CGM); 4337 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4338 Loc); 4339 4340 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4341 CGF.GetAddrOfLocalVar(&DstArg), 4342 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4343 // task_dst->liter = lastpriv; 4344 if (WithLastIter) { 4345 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4346 LValue Base = CGF.EmitLValueForField( 4347 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4348 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4349 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4350 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4351 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4352 } 4353 4354 // Emit initial values for private copies (if any). 4355 assert(!Privates.empty()); 4356 Address KmpTaskSharedsPtr = Address::invalid(); 4357 if (!Data.FirstprivateVars.empty()) { 4358 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4359 CGF.GetAddrOfLocalVar(&SrcArg), 4360 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4361 LValue Base = CGF.EmitLValueForField( 4362 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4363 KmpTaskSharedsPtr = Address( 4364 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4365 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4366 KmpTaskTShareds)), 4367 Loc), 4368 CGF.getNaturalTypeAlignment(SharedsTy)); 4369 } 4370 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4371 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4372 CGF.FinishFunction(); 4373 return TaskDup; 4374 } 4375 4376 /// Checks if destructor function is required to be generated. 4377 /// \return true if cleanups are required, false otherwise. 4378 static bool 4379 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4380 bool NeedsCleanup = false; 4381 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4382 auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4383 for (auto *FD : PrivateRD->fields()) { 4384 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4385 if (NeedsCleanup) 4386 break; 4387 } 4388 return NeedsCleanup; 4389 } 4390 4391 CGOpenMPRuntime::TaskResultTy 4392 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4393 const OMPExecutableDirective &D, 4394 llvm::Value *TaskFunction, QualType SharedsTy, 4395 Address Shareds, const OMPTaskDataTy &Data) { 4396 auto &C = CGM.getContext(); 4397 llvm::SmallVector<PrivateDataTy, 4> Privates; 4398 // Aggregate privates and sort them by the alignment. 4399 auto I = Data.PrivateCopies.begin(); 4400 for (auto *E : Data.PrivateVars) { 4401 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4402 Privates.push_back(std::make_pair( 4403 C.getDeclAlign(VD), 4404 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4405 /*PrivateElemInit=*/nullptr))); 4406 ++I; 4407 } 4408 I = Data.FirstprivateCopies.begin(); 4409 auto IElemInitRef = Data.FirstprivateInits.begin(); 4410 for (auto *E : Data.FirstprivateVars) { 4411 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4412 Privates.push_back(std::make_pair( 4413 C.getDeclAlign(VD), 4414 PrivateHelpersTy( 4415 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4416 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 4417 ++I; 4418 ++IElemInitRef; 4419 } 4420 I = Data.LastprivateCopies.begin(); 4421 for (auto *E : Data.LastprivateVars) { 4422 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4423 Privates.push_back(std::make_pair( 4424 C.getDeclAlign(VD), 4425 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4426 /*PrivateElemInit=*/nullptr))); 4427 ++I; 4428 } 4429 std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator); 4430 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4431 // Build type kmp_routine_entry_t (if not built yet). 4432 emitKmpRoutineEntryT(KmpInt32Ty); 4433 // Build type kmp_task_t (if not built yet). 4434 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4435 if (SavedKmpTaskloopTQTy.isNull()) { 4436 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4437 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4438 } 4439 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4440 } else { 4441 assert((D.getDirectiveKind() == OMPD_task || 4442 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4443 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4444 "Expected taskloop, task or target directive"); 4445 if (SavedKmpTaskTQTy.isNull()) { 4446 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4447 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4448 } 4449 KmpTaskTQTy = SavedKmpTaskTQTy; 4450 } 4451 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4452 // Build particular struct kmp_task_t for the given task. 4453 auto *KmpTaskTWithPrivatesQTyRD = 4454 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4455 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4456 QualType KmpTaskTWithPrivatesPtrQTy = 4457 C.getPointerType(KmpTaskTWithPrivatesQTy); 4458 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4459 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 4460 auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4461 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4462 4463 // Emit initial values for private copies (if any). 4464 llvm::Value *TaskPrivatesMap = nullptr; 4465 auto *TaskPrivatesMapTy = 4466 std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); 4467 if (!Privates.empty()) { 4468 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4469 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4470 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4471 FI->getType(), Privates); 4472 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4473 TaskPrivatesMap, TaskPrivatesMapTy); 4474 } else { 4475 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4476 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4477 } 4478 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4479 // kmp_task_t *tt); 4480 auto *TaskEntry = emitProxyTaskFunction( 4481 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4482 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4483 TaskPrivatesMap); 4484 4485 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4486 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4487 // kmp_routine_entry_t *task_entry); 4488 // Task flags. Format is taken from 4489 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 4490 // description of kmp_tasking_flags struct. 4491 enum { 4492 TiedFlag = 0x1, 4493 FinalFlag = 0x2, 4494 DestructorsFlag = 0x8, 4495 PriorityFlag = 0x20 4496 }; 4497 unsigned Flags = Data.Tied ? TiedFlag : 0; 4498 bool NeedsCleanup = false; 4499 if (!Privates.empty()) { 4500 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4501 if (NeedsCleanup) 4502 Flags = Flags | DestructorsFlag; 4503 } 4504 if (Data.Priority.getInt()) 4505 Flags = Flags | PriorityFlag; 4506 auto *TaskFlags = 4507 Data.Final.getPointer() 4508 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4509 CGF.Builder.getInt32(FinalFlag), 4510 CGF.Builder.getInt32(/*C=*/0)) 4511 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4512 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4513 auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4514 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 4515 getThreadID(CGF, Loc), TaskFlags, 4516 KmpTaskTWithPrivatesTySize, SharedsSize, 4517 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4518 TaskEntry, KmpRoutineEntryPtrTy)}; 4519 auto *NewTask = CGF.EmitRuntimeCall( 4520 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 4521 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4522 NewTask, KmpTaskTWithPrivatesPtrTy); 4523 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4524 KmpTaskTWithPrivatesQTy); 4525 LValue TDBase = 4526 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4527 // Fill the data in the resulting kmp_task_t record. 4528 // Copy shareds if there are any. 4529 Address KmpTaskSharedsPtr = Address::invalid(); 4530 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4531 KmpTaskSharedsPtr = 4532 Address(CGF.EmitLoadOfScalar( 4533 CGF.EmitLValueForField( 4534 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4535 KmpTaskTShareds)), 4536 Loc), 4537 CGF.getNaturalTypeAlignment(SharedsTy)); 4538 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4539 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4540 CGF.EmitAggregateCopy(Dest, Src, SharedsTy); 4541 } 4542 // Emit initial values for private copies (if any). 4543 TaskResultTy Result; 4544 if (!Privates.empty()) { 4545 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4546 SharedsTy, SharedsPtrTy, Data, Privates, 4547 /*ForDup=*/false); 4548 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4549 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4550 Result.TaskDupFn = emitTaskDupFunction( 4551 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4552 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4553 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4554 } 4555 } 4556 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4557 enum { Priority = 0, Destructors = 1 }; 4558 // Provide pointer to function with destructors for privates. 4559 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4560 auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl(); 4561 if (NeedsCleanup) { 4562 llvm::Value *DestructorFn = emitDestructorsFunction( 4563 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4564 KmpTaskTWithPrivatesQTy); 4565 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4566 LValue DestructorsLV = CGF.EmitLValueForField( 4567 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4568 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4569 DestructorFn, KmpRoutineEntryPtrTy), 4570 DestructorsLV); 4571 } 4572 // Set priority. 4573 if (Data.Priority.getInt()) { 4574 LValue Data2LV = CGF.EmitLValueForField( 4575 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4576 LValue PriorityLV = CGF.EmitLValueForField( 4577 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4578 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4579 } 4580 Result.NewTask = NewTask; 4581 Result.TaskEntry = TaskEntry; 4582 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4583 Result.TDBase = TDBase; 4584 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4585 return Result; 4586 } 4587 4588 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 4589 const OMPExecutableDirective &D, 4590 llvm::Value *TaskFunction, 4591 QualType SharedsTy, Address Shareds, 4592 const Expr *IfCond, 4593 const OMPTaskDataTy &Data) { 4594 if (!CGF.HaveInsertPoint()) 4595 return; 4596 4597 TaskResultTy Result = 4598 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4599 llvm::Value *NewTask = Result.NewTask; 4600 llvm::Value *TaskEntry = Result.TaskEntry; 4601 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 4602 LValue TDBase = Result.TDBase; 4603 RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 4604 auto &C = CGM.getContext(); 4605 // Process list of dependences. 4606 Address DependenciesArray = Address::invalid(); 4607 unsigned NumDependencies = Data.Dependences.size(); 4608 if (NumDependencies) { 4609 // Dependence kind for RTL. 4610 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 4611 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4612 RecordDecl *KmpDependInfoRD; 4613 QualType FlagsTy = 4614 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4615 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4616 if (KmpDependInfoTy.isNull()) { 4617 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4618 KmpDependInfoRD->startDefinition(); 4619 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4620 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4621 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4622 KmpDependInfoRD->completeDefinition(); 4623 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4624 } else 4625 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4626 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 4627 // Define type kmp_depend_info[<Dependences.size()>]; 4628 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4629 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 4630 ArrayType::Normal, /*IndexTypeQuals=*/0); 4631 // kmp_depend_info[<Dependences.size()>] deps; 4632 DependenciesArray = 4633 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4634 for (unsigned i = 0; i < NumDependencies; ++i) { 4635 const Expr *E = Data.Dependences[i].second; 4636 auto Addr = CGF.EmitLValue(E); 4637 llvm::Value *Size; 4638 QualType Ty = E->getType(); 4639 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4640 LValue UpAddrLVal = 4641 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 4642 llvm::Value *UpAddr = 4643 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 4644 llvm::Value *LowIntPtr = 4645 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 4646 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 4647 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4648 } else 4649 Size = CGF.getTypeSize(Ty); 4650 auto Base = CGF.MakeAddrLValue( 4651 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 4652 KmpDependInfoTy); 4653 // deps[i].base_addr = &<Dependences[i].second>; 4654 auto BaseAddrLVal = CGF.EmitLValueForField( 4655 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4656 CGF.EmitStoreOfScalar( 4657 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 4658 BaseAddrLVal); 4659 // deps[i].len = sizeof(<Dependences[i].second>); 4660 auto LenLVal = CGF.EmitLValueForField( 4661 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4662 CGF.EmitStoreOfScalar(Size, LenLVal); 4663 // deps[i].flags = <Dependences[i].first>; 4664 RTLDependenceKindTy DepKind; 4665 switch (Data.Dependences[i].first) { 4666 case OMPC_DEPEND_in: 4667 DepKind = DepIn; 4668 break; 4669 // Out and InOut dependencies must use the same code. 4670 case OMPC_DEPEND_out: 4671 case OMPC_DEPEND_inout: 4672 DepKind = DepInOut; 4673 break; 4674 case OMPC_DEPEND_source: 4675 case OMPC_DEPEND_sink: 4676 case OMPC_DEPEND_unknown: 4677 llvm_unreachable("Unknown task dependence type"); 4678 } 4679 auto FlagsLVal = CGF.EmitLValueForField( 4680 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4681 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4682 FlagsLVal); 4683 } 4684 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4685 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 4686 CGF.VoidPtrTy); 4687 } 4688 4689 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 4690 // libcall. 4691 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 4692 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 4693 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 4694 // list is not empty 4695 auto *ThreadID = getThreadID(CGF, Loc); 4696 auto *UpLoc = emitUpdateLocation(CGF, Loc); 4697 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 4698 llvm::Value *DepTaskArgs[7]; 4699 if (NumDependencies) { 4700 DepTaskArgs[0] = UpLoc; 4701 DepTaskArgs[1] = ThreadID; 4702 DepTaskArgs[2] = NewTask; 4703 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 4704 DepTaskArgs[4] = DependenciesArray.getPointer(); 4705 DepTaskArgs[5] = CGF.Builder.getInt32(0); 4706 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4707 } 4708 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 4709 &TaskArgs, 4710 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 4711 if (!Data.Tied) { 4712 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4713 auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 4714 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4715 } 4716 if (NumDependencies) { 4717 CGF.EmitRuntimeCall( 4718 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 4719 } else { 4720 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 4721 TaskArgs); 4722 } 4723 // Check if parent region is untied and build return for untied task; 4724 if (auto *Region = 4725 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4726 Region->emitUntiedSwitch(CGF); 4727 }; 4728 4729 llvm::Value *DepWaitTaskArgs[6]; 4730 if (NumDependencies) { 4731 DepWaitTaskArgs[0] = UpLoc; 4732 DepWaitTaskArgs[1] = ThreadID; 4733 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 4734 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 4735 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4736 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4737 } 4738 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 4739 NumDependencies, &DepWaitTaskArgs, 4740 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 4741 auto &RT = CGF.CGM.getOpenMPRuntime(); 4742 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4743 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 4744 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 4745 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 4746 // is specified. 4747 if (NumDependencies) 4748 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 4749 DepWaitTaskArgs); 4750 // Call proxy_task_entry(gtid, new_task); 4751 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 4752 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 4753 Action.Enter(CGF); 4754 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 4755 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 4756 OutlinedFnArgs); 4757 }; 4758 4759 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 4760 // kmp_task_t *new_task); 4761 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 4762 // kmp_task_t *new_task); 4763 RegionCodeGenTy RCG(CodeGen); 4764 CommonActionTy Action( 4765 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 4766 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 4767 RCG.setAction(Action); 4768 RCG(CGF); 4769 }; 4770 4771 if (IfCond) 4772 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 4773 else { 4774 RegionCodeGenTy ThenRCG(ThenCodeGen); 4775 ThenRCG(CGF); 4776 } 4777 } 4778 4779 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 4780 const OMPLoopDirective &D, 4781 llvm::Value *TaskFunction, 4782 QualType SharedsTy, Address Shareds, 4783 const Expr *IfCond, 4784 const OMPTaskDataTy &Data) { 4785 if (!CGF.HaveInsertPoint()) 4786 return; 4787 TaskResultTy Result = 4788 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4789 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 4790 // libcall. 4791 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 4792 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 4793 // sched, kmp_uint64 grainsize, void *task_dup); 4794 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4795 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4796 llvm::Value *IfVal; 4797 if (IfCond) { 4798 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 4799 /*isSigned=*/true); 4800 } else 4801 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 4802 4803 LValue LBLVal = CGF.EmitLValueForField( 4804 Result.TDBase, 4805 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 4806 auto *LBVar = 4807 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 4808 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 4809 /*IsInitializer=*/true); 4810 LValue UBLVal = CGF.EmitLValueForField( 4811 Result.TDBase, 4812 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 4813 auto *UBVar = 4814 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 4815 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 4816 /*IsInitializer=*/true); 4817 LValue StLVal = CGF.EmitLValueForField( 4818 Result.TDBase, 4819 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 4820 auto *StVar = 4821 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 4822 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 4823 /*IsInitializer=*/true); 4824 // Store reductions address. 4825 LValue RedLVal = CGF.EmitLValueForField( 4826 Result.TDBase, 4827 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 4828 if (Data.Reductions) 4829 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 4830 else { 4831 CGF.EmitNullInitialization(RedLVal.getAddress(), 4832 CGF.getContext().VoidPtrTy); 4833 } 4834 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 4835 llvm::Value *TaskArgs[] = { 4836 UpLoc, 4837 ThreadID, 4838 Result.NewTask, 4839 IfVal, 4840 LBLVal.getPointer(), 4841 UBLVal.getPointer(), 4842 CGF.EmitLoadOfScalar(StLVal, Loc), 4843 llvm::ConstantInt::getNullValue( 4844 CGF.IntTy), // Always 0 because taskgroup emitted by the compiler 4845 llvm::ConstantInt::getSigned( 4846 CGF.IntTy, Data.Schedule.getPointer() 4847 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4848 : NoSchedule), 4849 Data.Schedule.getPointer() 4850 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4851 /*isSigned=*/false) 4852 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 4853 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4854 Result.TaskDupFn, CGF.VoidPtrTy) 4855 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 4856 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 4857 } 4858 4859 /// \brief Emit reduction operation for each element of array (required for 4860 /// array sections) LHS op = RHS. 4861 /// \param Type Type of array. 4862 /// \param LHSVar Variable on the left side of the reduction operation 4863 /// (references element of array in original variable). 4864 /// \param RHSVar Variable on the right side of the reduction operation 4865 /// (references element of array in original variable). 4866 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4867 /// RHSVar. 4868 static void EmitOMPAggregateReduction( 4869 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4870 const VarDecl *RHSVar, 4871 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4872 const Expr *, const Expr *)> &RedOpGen, 4873 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4874 const Expr *UpExpr = nullptr) { 4875 // Perform element-by-element initialization. 4876 QualType ElementTy; 4877 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4878 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4879 4880 // Drill down to the base element type on both arrays. 4881 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 4882 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4883 4884 auto RHSBegin = RHSAddr.getPointer(); 4885 auto LHSBegin = LHSAddr.getPointer(); 4886 // Cast from pointer to array type to pointer to single element. 4887 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 4888 // The basic structure here is a while-do loop. 4889 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4890 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4891 auto IsEmpty = 4892 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4893 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4894 4895 // Enter the loop body, making that address the current address. 4896 auto EntryBB = CGF.Builder.GetInsertBlock(); 4897 CGF.EmitBlock(BodyBB); 4898 4899 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4900 4901 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4902 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4903 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4904 Address RHSElementCurrent = 4905 Address(RHSElementPHI, 4906 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4907 4908 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4909 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4910 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4911 Address LHSElementCurrent = 4912 Address(LHSElementPHI, 4913 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4914 4915 // Emit copy. 4916 CodeGenFunction::OMPPrivateScope Scope(CGF); 4917 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 4918 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 4919 Scope.Privatize(); 4920 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4921 Scope.ForceCleanup(); 4922 4923 // Shift the address forward by one element. 4924 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4925 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 4926 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4927 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 4928 // Check whether we've reached the end. 4929 auto Done = 4930 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4931 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4932 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4933 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4934 4935 // Done. 4936 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4937 } 4938 4939 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4940 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4941 /// UDR combiner function. 4942 static void emitReductionCombiner(CodeGenFunction &CGF, 4943 const Expr *ReductionOp) { 4944 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4945 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4946 if (auto *DRE = 4947 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4948 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4949 std::pair<llvm::Function *, llvm::Function *> Reduction = 4950 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4951 RValue Func = RValue::get(Reduction.first); 4952 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 4953 CGF.EmitIgnoredExpr(ReductionOp); 4954 return; 4955 } 4956 CGF.EmitIgnoredExpr(ReductionOp); 4957 } 4958 4959 llvm::Value *CGOpenMPRuntime::emitReductionFunction( 4960 CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType, 4961 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 4962 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 4963 auto &C = CGM.getContext(); 4964 4965 // void reduction_func(void *LHSArg, void *RHSArg); 4966 FunctionArgList Args; 4967 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 4968 ImplicitParamDecl::Other); 4969 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 4970 ImplicitParamDecl::Other); 4971 Args.push_back(&LHSArg); 4972 Args.push_back(&RHSArg); 4973 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4974 auto *Fn = llvm::Function::Create( 4975 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 4976 ".omp.reduction.reduction_func", &CGM.getModule()); 4977 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 4978 CodeGenFunction CGF(CGM); 4979 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 4980 4981 // Dst = (void*[n])(LHSArg); 4982 // Src = (void*[n])(RHSArg); 4983 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4984 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 4985 ArgsType), CGF.getPointerAlign()); 4986 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4987 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 4988 ArgsType), CGF.getPointerAlign()); 4989 4990 // ... 4991 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 4992 // ... 4993 CodeGenFunction::OMPPrivateScope Scope(CGF); 4994 auto IPriv = Privates.begin(); 4995 unsigned Idx = 0; 4996 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 4997 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 4998 Scope.addPrivate(RHSVar, [&]() -> Address { 4999 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5000 }); 5001 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5002 Scope.addPrivate(LHSVar, [&]() -> Address { 5003 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5004 }); 5005 QualType PrivTy = (*IPriv)->getType(); 5006 if (PrivTy->isVariablyModifiedType()) { 5007 // Get array size and emit VLA type. 5008 ++Idx; 5009 Address Elem = 5010 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 5011 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5012 auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); 5013 auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5014 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5015 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5016 CGF.EmitVariablyModifiedType(PrivTy); 5017 } 5018 } 5019 Scope.Privatize(); 5020 IPriv = Privates.begin(); 5021 auto ILHS = LHSExprs.begin(); 5022 auto IRHS = RHSExprs.begin(); 5023 for (auto *E : ReductionOps) { 5024 if ((*IPriv)->getType()->isArrayType()) { 5025 // Emit reduction for array section. 5026 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5027 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5028 EmitOMPAggregateReduction( 5029 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5030 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5031 emitReductionCombiner(CGF, E); 5032 }); 5033 } else 5034 // Emit reduction for array subscript or single variable. 5035 emitReductionCombiner(CGF, E); 5036 ++IPriv; 5037 ++ILHS; 5038 ++IRHS; 5039 } 5040 Scope.ForceCleanup(); 5041 CGF.FinishFunction(); 5042 return Fn; 5043 } 5044 5045 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5046 const Expr *ReductionOp, 5047 const Expr *PrivateRef, 5048 const DeclRefExpr *LHS, 5049 const DeclRefExpr *RHS) { 5050 if (PrivateRef->getType()->isArrayType()) { 5051 // Emit reduction for array section. 5052 auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5053 auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5054 EmitOMPAggregateReduction( 5055 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5056 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5057 emitReductionCombiner(CGF, ReductionOp); 5058 }); 5059 } else 5060 // Emit reduction for array subscript or single variable. 5061 emitReductionCombiner(CGF, ReductionOp); 5062 } 5063 5064 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5065 ArrayRef<const Expr *> Privates, 5066 ArrayRef<const Expr *> LHSExprs, 5067 ArrayRef<const Expr *> RHSExprs, 5068 ArrayRef<const Expr *> ReductionOps, 5069 ReductionOptionsTy Options) { 5070 if (!CGF.HaveInsertPoint()) 5071 return; 5072 5073 bool WithNowait = Options.WithNowait; 5074 bool SimpleReduction = Options.SimpleReduction; 5075 5076 // Next code should be emitted for reduction: 5077 // 5078 // static kmp_critical_name lock = { 0 }; 5079 // 5080 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5081 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5082 // ... 5083 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5084 // *(Type<n>-1*)rhs[<n>-1]); 5085 // } 5086 // 5087 // ... 5088 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5089 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5090 // RedList, reduce_func, &<lock>)) { 5091 // case 1: 5092 // ... 5093 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5094 // ... 5095 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5096 // break; 5097 // case 2: 5098 // ... 5099 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5100 // ... 5101 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5102 // break; 5103 // default:; 5104 // } 5105 // 5106 // if SimpleReduction is true, only the next code is generated: 5107 // ... 5108 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5109 // ... 5110 5111 auto &C = CGM.getContext(); 5112 5113 if (SimpleReduction) { 5114 CodeGenFunction::RunCleanupsScope Scope(CGF); 5115 auto IPriv = Privates.begin(); 5116 auto ILHS = LHSExprs.begin(); 5117 auto IRHS = RHSExprs.begin(); 5118 for (auto *E : ReductionOps) { 5119 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5120 cast<DeclRefExpr>(*IRHS)); 5121 ++IPriv; 5122 ++ILHS; 5123 ++IRHS; 5124 } 5125 return; 5126 } 5127 5128 // 1. Build a list of reduction variables. 5129 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5130 auto Size = RHSExprs.size(); 5131 for (auto *E : Privates) { 5132 if (E->getType()->isVariablyModifiedType()) 5133 // Reserve place for array size. 5134 ++Size; 5135 } 5136 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5137 QualType ReductionArrayTy = 5138 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5139 /*IndexTypeQuals=*/0); 5140 Address ReductionList = 5141 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5142 auto IPriv = Privates.begin(); 5143 unsigned Idx = 0; 5144 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5145 Address Elem = 5146 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 5147 CGF.Builder.CreateStore( 5148 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5149 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5150 Elem); 5151 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5152 // Store array size. 5153 ++Idx; 5154 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 5155 CGF.getPointerSize()); 5156 llvm::Value *Size = CGF.Builder.CreateIntCast( 5157 CGF.getVLASize( 5158 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5159 .NumElts, 5160 CGF.SizeTy, /*isSigned=*/false); 5161 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5162 Elem); 5163 } 5164 } 5165 5166 // 2. Emit reduce_func(). 5167 auto *ReductionFn = emitReductionFunction( 5168 CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), 5169 Privates, LHSExprs, RHSExprs, ReductionOps); 5170 5171 // 3. Create static kmp_critical_name lock = { 0 }; 5172 auto *Lock = getCriticalRegionLock(".reduction"); 5173 5174 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5175 // RedList, reduce_func, &<lock>); 5176 auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5177 auto *ThreadId = getThreadID(CGF, Loc); 5178 auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5179 auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5180 ReductionList.getPointer(), CGF.VoidPtrTy); 5181 llvm::Value *Args[] = { 5182 IdentTLoc, // ident_t *<loc> 5183 ThreadId, // i32 <gtid> 5184 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5185 ReductionArrayTySize, // size_type sizeof(RedList) 5186 RL, // void *RedList 5187 ReductionFn, // void (*) (void *, void *) <reduce_func> 5188 Lock // kmp_critical_name *&<lock> 5189 }; 5190 auto Res = CGF.EmitRuntimeCall( 5191 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5192 : OMPRTL__kmpc_reduce), 5193 Args); 5194 5195 // 5. Build switch(res) 5196 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5197 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5198 5199 // 6. Build case 1: 5200 // ... 5201 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5202 // ... 5203 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5204 // break; 5205 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5206 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5207 CGF.EmitBlock(Case1BB); 5208 5209 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5210 llvm::Value *EndArgs[] = { 5211 IdentTLoc, // ident_t *<loc> 5212 ThreadId, // i32 <gtid> 5213 Lock // kmp_critical_name *&<lock> 5214 }; 5215 auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 5216 CodeGenFunction &CGF, PrePostActionTy &Action) { 5217 auto &RT = CGF.CGM.getOpenMPRuntime(); 5218 auto IPriv = Privates.begin(); 5219 auto ILHS = LHSExprs.begin(); 5220 auto IRHS = RHSExprs.begin(); 5221 for (auto *E : ReductionOps) { 5222 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5223 cast<DeclRefExpr>(*IRHS)); 5224 ++IPriv; 5225 ++ILHS; 5226 ++IRHS; 5227 } 5228 }; 5229 RegionCodeGenTy RCG(CodeGen); 5230 CommonActionTy Action( 5231 nullptr, llvm::None, 5232 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5233 : OMPRTL__kmpc_end_reduce), 5234 EndArgs); 5235 RCG.setAction(Action); 5236 RCG(CGF); 5237 5238 CGF.EmitBranch(DefaultBB); 5239 5240 // 7. Build case 2: 5241 // ... 5242 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5243 // ... 5244 // break; 5245 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5246 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5247 CGF.EmitBlock(Case2BB); 5248 5249 auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 5250 CodeGenFunction &CGF, PrePostActionTy &Action) { 5251 auto ILHS = LHSExprs.begin(); 5252 auto IRHS = RHSExprs.begin(); 5253 auto IPriv = Privates.begin(); 5254 for (auto *E : ReductionOps) { 5255 const Expr *XExpr = nullptr; 5256 const Expr *EExpr = nullptr; 5257 const Expr *UpExpr = nullptr; 5258 BinaryOperatorKind BO = BO_Comma; 5259 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 5260 if (BO->getOpcode() == BO_Assign) { 5261 XExpr = BO->getLHS(); 5262 UpExpr = BO->getRHS(); 5263 } 5264 } 5265 // Try to emit update expression as a simple atomic. 5266 auto *RHSExpr = UpExpr; 5267 if (RHSExpr) { 5268 // Analyze RHS part of the whole expression. 5269 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 5270 RHSExpr->IgnoreParenImpCasts())) { 5271 // If this is a conditional operator, analyze its condition for 5272 // min/max reduction operator. 5273 RHSExpr = ACO->getCond(); 5274 } 5275 if (auto *BORHS = 5276 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5277 EExpr = BORHS->getRHS(); 5278 BO = BORHS->getOpcode(); 5279 } 5280 } 5281 if (XExpr) { 5282 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5283 auto &&AtomicRedGen = [BO, VD, 5284 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5285 const Expr *EExpr, const Expr *UpExpr) { 5286 LValue X = CGF.EmitLValue(XExpr); 5287 RValue E; 5288 if (EExpr) 5289 E = CGF.EmitAnyExpr(EExpr); 5290 CGF.EmitOMPAtomicSimpleUpdateExpr( 5291 X, E, BO, /*IsXLHSInRHSPart=*/true, 5292 llvm::AtomicOrdering::Monotonic, Loc, 5293 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5294 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5295 PrivateScope.addPrivate( 5296 VD, [&CGF, VD, XRValue, Loc]() -> Address { 5297 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5298 CGF.emitOMPSimpleStore( 5299 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5300 VD->getType().getNonReferenceType(), Loc); 5301 return LHSTemp; 5302 }); 5303 (void)PrivateScope.Privatize(); 5304 return CGF.EmitAnyExpr(UpExpr); 5305 }); 5306 }; 5307 if ((*IPriv)->getType()->isArrayType()) { 5308 // Emit atomic reduction for array section. 5309 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5310 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5311 AtomicRedGen, XExpr, EExpr, UpExpr); 5312 } else 5313 // Emit atomic reduction for array subscript or single variable. 5314 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5315 } else { 5316 // Emit as a critical region. 5317 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5318 const Expr *, const Expr *) { 5319 auto &RT = CGF.CGM.getOpenMPRuntime(); 5320 RT.emitCriticalRegion( 5321 CGF, ".atomic_reduction", 5322 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5323 Action.Enter(CGF); 5324 emitReductionCombiner(CGF, E); 5325 }, 5326 Loc); 5327 }; 5328 if ((*IPriv)->getType()->isArrayType()) { 5329 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5330 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5331 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5332 CritRedGen); 5333 } else 5334 CritRedGen(CGF, nullptr, nullptr, nullptr); 5335 } 5336 ++ILHS; 5337 ++IRHS; 5338 ++IPriv; 5339 } 5340 }; 5341 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5342 if (!WithNowait) { 5343 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5344 llvm::Value *EndArgs[] = { 5345 IdentTLoc, // ident_t *<loc> 5346 ThreadId, // i32 <gtid> 5347 Lock // kmp_critical_name *&<lock> 5348 }; 5349 CommonActionTy Action(nullptr, llvm::None, 5350 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5351 EndArgs); 5352 AtomicRCG.setAction(Action); 5353 AtomicRCG(CGF); 5354 } else 5355 AtomicRCG(CGF); 5356 5357 CGF.EmitBranch(DefaultBB); 5358 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5359 } 5360 5361 /// Generates unique name for artificial threadprivate variables. 5362 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5363 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5364 const Expr *Ref) { 5365 SmallString<256> Buffer; 5366 llvm::raw_svector_ostream Out(Buffer); 5367 const clang::DeclRefExpr *DE; 5368 const VarDecl *D = ::getBaseDecl(Ref, DE); 5369 if (!D) 5370 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5371 D = D->getCanonicalDecl(); 5372 Out << Prefix << "." 5373 << (D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)) 5374 << "_" << D->getCanonicalDecl()->getLocStart().getRawEncoding(); 5375 return Out.str(); 5376 } 5377 5378 /// Emits reduction initializer function: 5379 /// \code 5380 /// void @.red_init(void* %arg) { 5381 /// %0 = bitcast void* %arg to <type>* 5382 /// store <type> <init>, <type>* %0 5383 /// ret void 5384 /// } 5385 /// \endcode 5386 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5387 SourceLocation Loc, 5388 ReductionCodeGen &RCG, unsigned N) { 5389 auto &C = CGM.getContext(); 5390 FunctionArgList Args; 5391 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5392 ImplicitParamDecl::Other); 5393 Args.emplace_back(&Param); 5394 auto &FnInfo = 5395 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5396 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5397 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5398 ".red_init.", &CGM.getModule()); 5399 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5400 CodeGenFunction CGF(CGM); 5401 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5402 Address PrivateAddr = CGF.EmitLoadOfPointer( 5403 CGF.GetAddrOfLocalVar(&Param), 5404 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5405 llvm::Value *Size = nullptr; 5406 // If the size of the reduction item is non-constant, load it from global 5407 // threadprivate variable. 5408 if (RCG.getSizes(N).second) { 5409 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5410 CGF, CGM.getContext().getSizeType(), 5411 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5412 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5413 CGM.getContext().getSizeType(), Loc); 5414 } 5415 RCG.emitAggregateType(CGF, N, Size); 5416 LValue SharedLVal; 5417 // If initializer uses initializer from declare reduction construct, emit a 5418 // pointer to the address of the original reduction item (reuired by reduction 5419 // initializer) 5420 if (RCG.usesReductionInitializer(N)) { 5421 Address SharedAddr = 5422 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5423 CGF, CGM.getContext().VoidPtrTy, 5424 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 5425 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5426 } else { 5427 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 5428 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5429 CGM.getContext().VoidPtrTy); 5430 } 5431 // Emit the initializer: 5432 // %0 = bitcast void* %arg to <type>* 5433 // store <type> <init>, <type>* %0 5434 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 5435 [](CodeGenFunction &) { return false; }); 5436 CGF.FinishFunction(); 5437 return Fn; 5438 } 5439 5440 /// Emits reduction combiner function: 5441 /// \code 5442 /// void @.red_comb(void* %arg0, void* %arg1) { 5443 /// %lhs = bitcast void* %arg0 to <type>* 5444 /// %rhs = bitcast void* %arg1 to <type>* 5445 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5446 /// store <type> %2, <type>* %lhs 5447 /// ret void 5448 /// } 5449 /// \endcode 5450 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5451 SourceLocation Loc, 5452 ReductionCodeGen &RCG, unsigned N, 5453 const Expr *ReductionOp, 5454 const Expr *LHS, const Expr *RHS, 5455 const Expr *PrivateRef) { 5456 auto &C = CGM.getContext(); 5457 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5458 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5459 FunctionArgList Args; 5460 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5461 C.VoidPtrTy, ImplicitParamDecl::Other); 5462 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5463 ImplicitParamDecl::Other); 5464 Args.emplace_back(&ParamInOut); 5465 Args.emplace_back(&ParamIn); 5466 auto &FnInfo = 5467 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5468 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5469 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5470 ".red_comb.", &CGM.getModule()); 5471 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5472 CodeGenFunction CGF(CGM); 5473 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5474 llvm::Value *Size = nullptr; 5475 // If the size of the reduction item is non-constant, load it from global 5476 // threadprivate variable. 5477 if (RCG.getSizes(N).second) { 5478 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5479 CGF, CGM.getContext().getSizeType(), 5480 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5481 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5482 CGM.getContext().getSizeType(), Loc); 5483 } 5484 RCG.emitAggregateType(CGF, N, Size); 5485 // Remap lhs and rhs variables to the addresses of the function arguments. 5486 // %lhs = bitcast void* %arg0 to <type>* 5487 // %rhs = bitcast void* %arg1 to <type>* 5488 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5489 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address { 5490 // Pull out the pointer to the variable. 5491 Address PtrAddr = CGF.EmitLoadOfPointer( 5492 CGF.GetAddrOfLocalVar(&ParamInOut), 5493 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5494 return CGF.Builder.CreateElementBitCast( 5495 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5496 }); 5497 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address { 5498 // Pull out the pointer to the variable. 5499 Address PtrAddr = CGF.EmitLoadOfPointer( 5500 CGF.GetAddrOfLocalVar(&ParamIn), 5501 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5502 return CGF.Builder.CreateElementBitCast( 5503 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5504 }); 5505 PrivateScope.Privatize(); 5506 // Emit the combiner body: 5507 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5508 // store <type> %2, <type>* %lhs 5509 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5510 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5511 cast<DeclRefExpr>(RHS)); 5512 CGF.FinishFunction(); 5513 return Fn; 5514 } 5515 5516 /// Emits reduction finalizer function: 5517 /// \code 5518 /// void @.red_fini(void* %arg) { 5519 /// %0 = bitcast void* %arg to <type>* 5520 /// <destroy>(<type>* %0) 5521 /// ret void 5522 /// } 5523 /// \endcode 5524 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5525 SourceLocation Loc, 5526 ReductionCodeGen &RCG, unsigned N) { 5527 if (!RCG.needCleanups(N)) 5528 return nullptr; 5529 auto &C = CGM.getContext(); 5530 FunctionArgList Args; 5531 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5532 ImplicitParamDecl::Other); 5533 Args.emplace_back(&Param); 5534 auto &FnInfo = 5535 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5536 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5537 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5538 ".red_fini.", &CGM.getModule()); 5539 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5540 CodeGenFunction CGF(CGM); 5541 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5542 Address PrivateAddr = CGF.EmitLoadOfPointer( 5543 CGF.GetAddrOfLocalVar(&Param), 5544 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5545 llvm::Value *Size = nullptr; 5546 // If the size of the reduction item is non-constant, load it from global 5547 // threadprivate variable. 5548 if (RCG.getSizes(N).second) { 5549 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5550 CGF, CGM.getContext().getSizeType(), 5551 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5552 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5553 CGM.getContext().getSizeType(), Loc); 5554 } 5555 RCG.emitAggregateType(CGF, N, Size); 5556 // Emit the finalizer body: 5557 // <destroy>(<type>* %0) 5558 RCG.emitCleanups(CGF, N, PrivateAddr); 5559 CGF.FinishFunction(); 5560 return Fn; 5561 } 5562 5563 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5564 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5565 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5566 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5567 return nullptr; 5568 5569 // Build typedef struct: 5570 // kmp_task_red_input { 5571 // void *reduce_shar; // shared reduction item 5572 // size_t reduce_size; // size of data item 5573 // void *reduce_init; // data initialization routine 5574 // void *reduce_fini; // data finalization routine 5575 // void *reduce_comb; // data combiner routine 5576 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5577 // } kmp_task_red_input_t; 5578 ASTContext &C = CGM.getContext(); 5579 auto *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 5580 RD->startDefinition(); 5581 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5582 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5583 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5584 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5585 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5586 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5587 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5588 RD->completeDefinition(); 5589 QualType RDType = C.getRecordType(RD); 5590 unsigned Size = Data.ReductionVars.size(); 5591 llvm::APInt ArraySize(/*numBits=*/64, Size); 5592 QualType ArrayRDType = C.getConstantArrayType( 5593 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 5594 // kmp_task_red_input_t .rd_input.[Size]; 5595 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5596 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 5597 Data.ReductionOps); 5598 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5599 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5600 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5601 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5602 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5603 TaskRedInput.getPointer(), Idxs, 5604 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5605 ".rd_input.gep."); 5606 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 5607 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5608 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5609 RCG.emitSharedLValue(CGF, Cnt); 5610 llvm::Value *CastedShared = 5611 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 5612 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 5613 RCG.emitAggregateType(CGF, Cnt); 5614 llvm::Value *SizeValInChars; 5615 llvm::Value *SizeVal; 5616 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 5617 // We use delayed creation/initialization for VLAs, array sections and 5618 // custom reduction initializations. It is required because runtime does not 5619 // provide the way to pass the sizes of VLAs/array sections to 5620 // initializer/combiner/finalizer functions and does not pass the pointer to 5621 // original reduction item to the initializer. Instead threadprivate global 5622 // variables are used to store these values and use them in the functions. 5623 bool DelayedCreation = !!SizeVal; 5624 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 5625 /*isSigned=*/false); 5626 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 5627 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 5628 // ElemLVal.reduce_init = init; 5629 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 5630 llvm::Value *InitAddr = 5631 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 5632 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 5633 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 5634 // ElemLVal.reduce_fini = fini; 5635 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 5636 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 5637 llvm::Value *FiniAddr = Fini 5638 ? CGF.EmitCastToVoidPtr(Fini) 5639 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 5640 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 5641 // ElemLVal.reduce_comb = comb; 5642 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 5643 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 5644 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 5645 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 5646 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 5647 // ElemLVal.flags = 0; 5648 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 5649 if (DelayedCreation) { 5650 CGF.EmitStoreOfScalar( 5651 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), 5652 FlagsLVal); 5653 } else 5654 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 5655 } 5656 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 5657 // *data); 5658 llvm::Value *Args[] = { 5659 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 5660 /*isSigned=*/true), 5661 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5662 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 5663 CGM.VoidPtrTy)}; 5664 return CGF.EmitRuntimeCall( 5665 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 5666 } 5667 5668 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 5669 SourceLocation Loc, 5670 ReductionCodeGen &RCG, 5671 unsigned N) { 5672 auto Sizes = RCG.getSizes(N); 5673 // Emit threadprivate global variable if the type is non-constant 5674 // (Sizes.second = nullptr). 5675 if (Sizes.second) { 5676 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 5677 /*isSigned=*/false); 5678 Address SizeAddr = getAddrOfArtificialThreadPrivate( 5679 CGF, CGM.getContext().getSizeType(), 5680 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5681 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 5682 } 5683 // Store address of the original reduction item if custom initializer is used. 5684 if (RCG.usesReductionInitializer(N)) { 5685 Address SharedAddr = getAddrOfArtificialThreadPrivate( 5686 CGF, CGM.getContext().VoidPtrTy, 5687 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 5688 CGF.Builder.CreateStore( 5689 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5690 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 5691 SharedAddr, /*IsVolatile=*/false); 5692 } 5693 } 5694 5695 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 5696 SourceLocation Loc, 5697 llvm::Value *ReductionsPtr, 5698 LValue SharedLVal) { 5699 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 5700 // *d); 5701 llvm::Value *Args[] = { 5702 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 5703 /*isSigned=*/true), 5704 ReductionsPtr, 5705 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 5706 CGM.VoidPtrTy)}; 5707 return Address( 5708 CGF.EmitRuntimeCall( 5709 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 5710 SharedLVal.getAlignment()); 5711 } 5712 5713 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 5714 SourceLocation Loc) { 5715 if (!CGF.HaveInsertPoint()) 5716 return; 5717 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 5718 // global_tid); 5719 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 5720 // Ignore return result until untied tasks are supported. 5721 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 5722 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5723 Region->emitUntiedSwitch(CGF); 5724 } 5725 5726 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 5727 OpenMPDirectiveKind InnerKind, 5728 const RegionCodeGenTy &CodeGen, 5729 bool HasCancel) { 5730 if (!CGF.HaveInsertPoint()) 5731 return; 5732 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 5733 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 5734 } 5735 5736 namespace { 5737 enum RTCancelKind { 5738 CancelNoreq = 0, 5739 CancelParallel = 1, 5740 CancelLoop = 2, 5741 CancelSections = 3, 5742 CancelTaskgroup = 4 5743 }; 5744 } // anonymous namespace 5745 5746 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 5747 RTCancelKind CancelKind = CancelNoreq; 5748 if (CancelRegion == OMPD_parallel) 5749 CancelKind = CancelParallel; 5750 else if (CancelRegion == OMPD_for) 5751 CancelKind = CancelLoop; 5752 else if (CancelRegion == OMPD_sections) 5753 CancelKind = CancelSections; 5754 else { 5755 assert(CancelRegion == OMPD_taskgroup); 5756 CancelKind = CancelTaskgroup; 5757 } 5758 return CancelKind; 5759 } 5760 5761 void CGOpenMPRuntime::emitCancellationPointCall( 5762 CodeGenFunction &CGF, SourceLocation Loc, 5763 OpenMPDirectiveKind CancelRegion) { 5764 if (!CGF.HaveInsertPoint()) 5765 return; 5766 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 5767 // global_tid, kmp_int32 cncl_kind); 5768 if (auto *OMPRegionInfo = 5769 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5770 // For 'cancellation point taskgroup', the task region info may not have a 5771 // cancel. This may instead happen in another adjacent task. 5772 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 5773 llvm::Value *Args[] = { 5774 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 5775 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5776 // Ignore return result until untied tasks are supported. 5777 auto *Result = CGF.EmitRuntimeCall( 5778 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 5779 // if (__kmpc_cancellationpoint()) { 5780 // exit from construct; 5781 // } 5782 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5783 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 5784 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 5785 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5786 CGF.EmitBlock(ExitBB); 5787 // exit from construct; 5788 auto CancelDest = 5789 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5790 CGF.EmitBranchThroughCleanup(CancelDest); 5791 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5792 } 5793 } 5794 } 5795 5796 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 5797 const Expr *IfCond, 5798 OpenMPDirectiveKind CancelRegion) { 5799 if (!CGF.HaveInsertPoint()) 5800 return; 5801 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 5802 // kmp_int32 cncl_kind); 5803 if (auto *OMPRegionInfo = 5804 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5805 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 5806 PrePostActionTy &) { 5807 auto &RT = CGF.CGM.getOpenMPRuntime(); 5808 llvm::Value *Args[] = { 5809 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 5810 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5811 // Ignore return result until untied tasks are supported. 5812 auto *Result = CGF.EmitRuntimeCall( 5813 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 5814 // if (__kmpc_cancel()) { 5815 // exit from construct; 5816 // } 5817 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5818 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 5819 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 5820 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5821 CGF.EmitBlock(ExitBB); 5822 // exit from construct; 5823 auto CancelDest = 5824 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5825 CGF.EmitBranchThroughCleanup(CancelDest); 5826 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5827 }; 5828 if (IfCond) 5829 emitOMPIfClause(CGF, IfCond, ThenGen, 5830 [](CodeGenFunction &, PrePostActionTy &) {}); 5831 else { 5832 RegionCodeGenTy ThenRCG(ThenGen); 5833 ThenRCG(CGF); 5834 } 5835 } 5836 } 5837 5838 /// \brief Obtain information that uniquely identifies a target entry. This 5839 /// consists of the file and device IDs as well as line number associated with 5840 /// the relevant entry source location. 5841 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 5842 unsigned &DeviceID, unsigned &FileID, 5843 unsigned &LineNum) { 5844 5845 auto &SM = C.getSourceManager(); 5846 5847 // The loc should be always valid and have a file ID (the user cannot use 5848 // #pragma directives in macros) 5849 5850 assert(Loc.isValid() && "Source location is expected to be always valid."); 5851 assert(Loc.isFileID() && "Source location is expected to refer to a file."); 5852 5853 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 5854 assert(PLoc.isValid() && "Source location is expected to be always valid."); 5855 5856 llvm::sys::fs::UniqueID ID; 5857 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 5858 llvm_unreachable("Source file with target region no longer exists!"); 5859 5860 DeviceID = ID.getDevice(); 5861 FileID = ID.getFile(); 5862 LineNum = PLoc.getLine(); 5863 } 5864 5865 void CGOpenMPRuntime::emitTargetOutlinedFunction( 5866 const OMPExecutableDirective &D, StringRef ParentName, 5867 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5868 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5869 assert(!ParentName.empty() && "Invalid target region parent name!"); 5870 5871 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 5872 IsOffloadEntry, CodeGen); 5873 } 5874 5875 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 5876 const OMPExecutableDirective &D, StringRef ParentName, 5877 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5878 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5879 // Create a unique name for the entry function using the source location 5880 // information of the current target region. The name will be something like: 5881 // 5882 // __omp_offloading_DD_FFFF_PP_lBB 5883 // 5884 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 5885 // mangled name of the function that encloses the target region and BB is the 5886 // line number of the target region. 5887 5888 unsigned DeviceID; 5889 unsigned FileID; 5890 unsigned Line; 5891 getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, 5892 Line); 5893 SmallString<64> EntryFnName; 5894 { 5895 llvm::raw_svector_ostream OS(EntryFnName); 5896 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 5897 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 5898 } 5899 5900 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 5901 5902 CodeGenFunction CGF(CGM, true); 5903 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 5904 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 5905 5906 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 5907 5908 // If this target outline function is not an offload entry, we don't need to 5909 // register it. 5910 if (!IsOffloadEntry) 5911 return; 5912 5913 // The target region ID is used by the runtime library to identify the current 5914 // target region, so it only has to be unique and not necessarily point to 5915 // anything. It could be the pointer to the outlined function that implements 5916 // the target region, but we aren't using that so that the compiler doesn't 5917 // need to keep that, and could therefore inline the host function if proven 5918 // worthwhile during optimization. In the other hand, if emitting code for the 5919 // device, the ID has to be the function address so that it can retrieved from 5920 // the offloading entry and launched by the runtime library. We also mark the 5921 // outlined function to have external linkage in case we are emitting code for 5922 // the device, because these functions will be entry points to the device. 5923 5924 if (CGM.getLangOpts().OpenMPIsDevice) { 5925 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 5926 OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); 5927 OutlinedFn->setDSOLocal(false); 5928 } else 5929 OutlinedFnID = new llvm::GlobalVariable( 5930 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 5931 llvm::GlobalValue::PrivateLinkage, 5932 llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); 5933 5934 // Register the information for the entry associated with this target region. 5935 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 5936 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 5937 /*Flags=*/0); 5938 } 5939 5940 /// discard all CompoundStmts intervening between two constructs 5941 static const Stmt *ignoreCompoundStmts(const Stmt *Body) { 5942 while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) 5943 Body = CS->body_front(); 5944 5945 return Body; 5946 } 5947 5948 /// Emit the number of teams for a target directive. Inspect the num_teams 5949 /// clause associated with a teams construct combined or closely nested 5950 /// with the target directive. 5951 /// 5952 /// Emit a team of size one for directives such as 'target parallel' that 5953 /// have no associated teams construct. 5954 /// 5955 /// Otherwise, return nullptr. 5956 static llvm::Value * 5957 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 5958 CodeGenFunction &CGF, 5959 const OMPExecutableDirective &D) { 5960 5961 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 5962 "teams directive expected to be " 5963 "emitted only for the host!"); 5964 5965 auto &Bld = CGF.Builder; 5966 5967 // If the target directive is combined with a teams directive: 5968 // Return the value in the num_teams clause, if any. 5969 // Otherwise, return 0 to denote the runtime default. 5970 if (isOpenMPTeamsDirective(D.getDirectiveKind())) { 5971 if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { 5972 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 5973 auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), 5974 /*IgnoreResultAssign*/ true); 5975 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 5976 /*IsSigned=*/true); 5977 } 5978 5979 // The default value is 0. 5980 return Bld.getInt32(0); 5981 } 5982 5983 // If the target directive is combined with a parallel directive but not a 5984 // teams directive, start one team. 5985 if (isOpenMPParallelDirective(D.getDirectiveKind())) 5986 return Bld.getInt32(1); 5987 5988 // If the current target region has a teams region enclosed, we need to get 5989 // the number of teams to pass to the runtime function call. This is done 5990 // by generating the expression in a inlined region. This is required because 5991 // the expression is captured in the enclosing target environment when the 5992 // teams directive is not combined with target. 5993 5994 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 5995 5996 if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( 5997 ignoreCompoundStmts(CS.getCapturedStmt()))) { 5998 if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { 5999 if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 6000 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 6001 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6002 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 6003 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 6004 /*IsSigned=*/true); 6005 } 6006 6007 // If we have an enclosed teams directive but no num_teams clause we use 6008 // the default value 0. 6009 return Bld.getInt32(0); 6010 } 6011 } 6012 6013 // No teams associated with the directive. 6014 return nullptr; 6015 } 6016 6017 /// Emit the number of threads for a target directive. Inspect the 6018 /// thread_limit clause associated with a teams construct combined or closely 6019 /// nested with the target directive. 6020 /// 6021 /// Emit the num_threads clause for directives such as 'target parallel' that 6022 /// have no associated teams construct. 6023 /// 6024 /// Otherwise, return nullptr. 6025 static llvm::Value * 6026 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 6027 CodeGenFunction &CGF, 6028 const OMPExecutableDirective &D) { 6029 6030 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 6031 "teams directive expected to be " 6032 "emitted only for the host!"); 6033 6034 auto &Bld = CGF.Builder; 6035 6036 // 6037 // If the target directive is combined with a teams directive: 6038 // Return the value in the thread_limit clause, if any. 6039 // 6040 // If the target directive is combined with a parallel directive: 6041 // Return the value in the num_threads clause, if any. 6042 // 6043 // If both clauses are set, select the minimum of the two. 6044 // 6045 // If neither teams or parallel combined directives set the number of threads 6046 // in a team, return 0 to denote the runtime default. 6047 // 6048 // If this is not a teams directive return nullptr. 6049 6050 if (isOpenMPTeamsDirective(D.getDirectiveKind()) || 6051 isOpenMPParallelDirective(D.getDirectiveKind())) { 6052 llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); 6053 llvm::Value *NumThreadsVal = nullptr; 6054 llvm::Value *ThreadLimitVal = nullptr; 6055 6056 if (const auto *ThreadLimitClause = 6057 D.getSingleClause<OMPThreadLimitClause>()) { 6058 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6059 auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), 6060 /*IgnoreResultAssign*/ true); 6061 ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, 6062 /*IsSigned=*/true); 6063 } 6064 6065 if (const auto *NumThreadsClause = 6066 D.getSingleClause<OMPNumThreadsClause>()) { 6067 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6068 llvm::Value *NumThreads = 6069 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 6070 /*IgnoreResultAssign*/ true); 6071 NumThreadsVal = 6072 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); 6073 } 6074 6075 // Select the lesser of thread_limit and num_threads. 6076 if (NumThreadsVal) 6077 ThreadLimitVal = ThreadLimitVal 6078 ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, 6079 ThreadLimitVal), 6080 NumThreadsVal, ThreadLimitVal) 6081 : NumThreadsVal; 6082 6083 // Set default value passed to the runtime if either teams or a target 6084 // parallel type directive is found but no clause is specified. 6085 if (!ThreadLimitVal) 6086 ThreadLimitVal = DefaultThreadLimitVal; 6087 6088 return ThreadLimitVal; 6089 } 6090 6091 // If the current target region has a teams region enclosed, we need to get 6092 // the thread limit to pass to the runtime function call. This is done 6093 // by generating the expression in a inlined region. This is required because 6094 // the expression is captured in the enclosing target environment when the 6095 // teams directive is not combined with target. 6096 6097 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6098 6099 if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( 6100 ignoreCompoundStmts(CS.getCapturedStmt()))) { 6101 if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { 6102 if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 6103 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 6104 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6105 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 6106 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 6107 /*IsSigned=*/true); 6108 } 6109 6110 // If we have an enclosed teams directive but no thread_limit clause we 6111 // use the default value 0. 6112 return CGF.Builder.getInt32(0); 6113 } 6114 } 6115 6116 // No teams associated with the directive. 6117 return nullptr; 6118 } 6119 6120 namespace { 6121 // \brief Utility to handle information from clauses associated with a given 6122 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6123 // It provides a convenient interface to obtain the information and generate 6124 // code for that information. 6125 class MappableExprsHandler { 6126 public: 6127 /// \brief Values for bit flags used to specify the mapping type for 6128 /// offloading. 6129 enum OpenMPOffloadMappingFlags { 6130 /// \brief Allocate memory on the device and move data from host to device. 6131 OMP_MAP_TO = 0x01, 6132 /// \brief Allocate memory on the device and move data from device to host. 6133 OMP_MAP_FROM = 0x02, 6134 /// \brief Always perform the requested mapping action on the element, even 6135 /// if it was already mapped before. 6136 OMP_MAP_ALWAYS = 0x04, 6137 /// \brief Delete the element from the device environment, ignoring the 6138 /// current reference count associated with the element. 6139 OMP_MAP_DELETE = 0x08, 6140 /// \brief The element being mapped is a pointer-pointee pair; both the 6141 /// pointer and the pointee should be mapped. 6142 OMP_MAP_PTR_AND_OBJ = 0x10, 6143 /// \brief This flags signals that the base address of an entry should be 6144 /// passed to the target kernel as an argument. 6145 OMP_MAP_TARGET_PARAM = 0x20, 6146 /// \brief Signal that the runtime library has to return the device pointer 6147 /// in the current position for the data being mapped. Used when we have the 6148 /// use_device_ptr clause. 6149 OMP_MAP_RETURN_PARAM = 0x40, 6150 /// \brief This flag signals that the reference being passed is a pointer to 6151 /// private data. 6152 OMP_MAP_PRIVATE = 0x80, 6153 /// \brief Pass the element to the device by value. 6154 OMP_MAP_LITERAL = 0x100, 6155 /// Implicit map 6156 OMP_MAP_IMPLICIT = 0x200, 6157 }; 6158 6159 /// Class that associates information with a base pointer to be passed to the 6160 /// runtime library. 6161 class BasePointerInfo { 6162 /// The base pointer. 6163 llvm::Value *Ptr = nullptr; 6164 /// The base declaration that refers to this device pointer, or null if 6165 /// there is none. 6166 const ValueDecl *DevPtrDecl = nullptr; 6167 6168 public: 6169 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 6170 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 6171 llvm::Value *operator*() const { return Ptr; } 6172 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 6173 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 6174 }; 6175 6176 typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy; 6177 typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; 6178 typedef SmallVector<uint64_t, 16> MapFlagsArrayTy; 6179 6180 private: 6181 /// \brief Directive from where the map clauses were extracted. 6182 const OMPExecutableDirective &CurDir; 6183 6184 /// \brief Function the directive is being generated for. 6185 CodeGenFunction &CGF; 6186 6187 /// \brief Set of all first private variables in the current directive. 6188 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 6189 /// Set of all reduction variables in the current directive. 6190 llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls; 6191 6192 /// Map between device pointer declarations and their expression components. 6193 /// The key value for declarations in 'this' is null. 6194 llvm::DenseMap< 6195 const ValueDecl *, 6196 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6197 DevPointersMap; 6198 6199 llvm::Value *getExprTypeSize(const Expr *E) const { 6200 auto ExprTy = E->getType().getCanonicalType(); 6201 6202 // Reference types are ignored for mapping purposes. 6203 if (auto *RefTy = ExprTy->getAs<ReferenceType>()) 6204 ExprTy = RefTy->getPointeeType().getCanonicalType(); 6205 6206 // Given that an array section is considered a built-in type, we need to 6207 // do the calculation based on the length of the section instead of relying 6208 // on CGF.getTypeSize(E->getType()). 6209 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 6210 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 6211 OAE->getBase()->IgnoreParenImpCasts()) 6212 .getCanonicalType(); 6213 6214 // If there is no length associated with the expression, that means we 6215 // are using the whole length of the base. 6216 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 6217 return CGF.getTypeSize(BaseTy); 6218 6219 llvm::Value *ElemSize; 6220 if (auto *PTy = BaseTy->getAs<PointerType>()) 6221 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 6222 else { 6223 auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 6224 assert(ATy && "Expecting array type if not a pointer type."); 6225 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 6226 } 6227 6228 // If we don't have a length at this point, that is because we have an 6229 // array section with a single element. 6230 if (!OAE->getLength()) 6231 return ElemSize; 6232 6233 auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 6234 LengthVal = 6235 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 6236 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 6237 } 6238 return CGF.getTypeSize(ExprTy); 6239 } 6240 6241 /// \brief Return the corresponding bits for a given map clause modifier. Add 6242 /// a flag marking the map as a pointer if requested. Add a flag marking the 6243 /// map as the first one of a series of maps that relate to the same map 6244 /// expression. 6245 uint64_t getMapTypeBits(OpenMPMapClauseKind MapType, 6246 OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, 6247 bool AddIsTargetParamFlag) const { 6248 uint64_t Bits = 0u; 6249 switch (MapType) { 6250 case OMPC_MAP_alloc: 6251 case OMPC_MAP_release: 6252 // alloc and release is the default behavior in the runtime library, i.e. 6253 // if we don't pass any bits alloc/release that is what the runtime is 6254 // going to do. Therefore, we don't need to signal anything for these two 6255 // type modifiers. 6256 break; 6257 case OMPC_MAP_to: 6258 Bits = OMP_MAP_TO; 6259 break; 6260 case OMPC_MAP_from: 6261 Bits = OMP_MAP_FROM; 6262 break; 6263 case OMPC_MAP_tofrom: 6264 Bits = OMP_MAP_TO | OMP_MAP_FROM; 6265 break; 6266 case OMPC_MAP_delete: 6267 Bits = OMP_MAP_DELETE; 6268 break; 6269 default: 6270 llvm_unreachable("Unexpected map type!"); 6271 break; 6272 } 6273 if (AddPtrFlag) 6274 Bits |= OMP_MAP_PTR_AND_OBJ; 6275 if (AddIsTargetParamFlag) 6276 Bits |= OMP_MAP_TARGET_PARAM; 6277 if (MapTypeModifier == OMPC_MAP_always) 6278 Bits |= OMP_MAP_ALWAYS; 6279 return Bits; 6280 } 6281 6282 /// \brief Return true if the provided expression is a final array section. A 6283 /// final array section, is one whose length can't be proved to be one. 6284 bool isFinalArraySectionExpression(const Expr *E) const { 6285 auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 6286 6287 // It is not an array section and therefore not a unity-size one. 6288 if (!OASE) 6289 return false; 6290 6291 // An array section with no colon always refer to a single element. 6292 if (OASE->getColonLoc().isInvalid()) 6293 return false; 6294 6295 auto *Length = OASE->getLength(); 6296 6297 // If we don't have a length we have to check if the array has size 1 6298 // for this dimension. Also, we should always expect a length if the 6299 // base type is pointer. 6300 if (!Length) { 6301 auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 6302 OASE->getBase()->IgnoreParenImpCasts()) 6303 .getCanonicalType(); 6304 if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 6305 return ATy->getSize().getSExtValue() != 1; 6306 // If we don't have a constant dimension length, we have to consider 6307 // the current section as having any size, so it is not necessarily 6308 // unitary. If it happen to be unity size, that's user fault. 6309 return true; 6310 } 6311 6312 // Check if the length evaluates to 1. 6313 llvm::APSInt ConstLength; 6314 if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) 6315 return true; // Can have more that size 1. 6316 6317 return ConstLength.getSExtValue() != 1; 6318 } 6319 6320 /// \brief Generate the base pointers, section pointers, sizes and map type 6321 /// bits for the provided map type, map modifier, and expression components. 6322 /// \a IsFirstComponent should be set to true if the provided set of 6323 /// components is the first associated with a capture. 6324 void generateInfoForComponentList( 6325 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 6326 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6327 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 6328 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 6329 bool IsFirstComponentList, bool IsImplicit) const { 6330 6331 // The following summarizes what has to be generated for each map and the 6332 // types bellow. The generated information is expressed in this order: 6333 // base pointer, section pointer, size, flags 6334 // (to add to the ones that come from the map type and modifier). 6335 // 6336 // double d; 6337 // int i[100]; 6338 // float *p; 6339 // 6340 // struct S1 { 6341 // int i; 6342 // float f[50]; 6343 // } 6344 // struct S2 { 6345 // int i; 6346 // float f[50]; 6347 // S1 s; 6348 // double *p; 6349 // struct S2 *ps; 6350 // } 6351 // S2 s; 6352 // S2 *ps; 6353 // 6354 // map(d) 6355 // &d, &d, sizeof(double), noflags 6356 // 6357 // map(i) 6358 // &i, &i, 100*sizeof(int), noflags 6359 // 6360 // map(i[1:23]) 6361 // &i(=&i[0]), &i[1], 23*sizeof(int), noflags 6362 // 6363 // map(p) 6364 // &p, &p, sizeof(float*), noflags 6365 // 6366 // map(p[1:24]) 6367 // p, &p[1], 24*sizeof(float), noflags 6368 // 6369 // map(s) 6370 // &s, &s, sizeof(S2), noflags 6371 // 6372 // map(s.i) 6373 // &s, &(s.i), sizeof(int), noflags 6374 // 6375 // map(s.s.f) 6376 // &s, &(s.i.f), 50*sizeof(int), noflags 6377 // 6378 // map(s.p) 6379 // &s, &(s.p), sizeof(double*), noflags 6380 // 6381 // map(s.p[:22], s.a s.b) 6382 // &s, &(s.p), sizeof(double*), noflags 6383 // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag 6384 // 6385 // map(s.ps) 6386 // &s, &(s.ps), sizeof(S2*), noflags 6387 // 6388 // map(s.ps->s.i) 6389 // &s, &(s.ps), sizeof(S2*), noflags 6390 // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag 6391 // 6392 // map(s.ps->ps) 6393 // &s, &(s.ps), sizeof(S2*), noflags 6394 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag 6395 // 6396 // map(s.ps->ps->ps) 6397 // &s, &(s.ps), sizeof(S2*), noflags 6398 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag 6399 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag 6400 // 6401 // map(s.ps->ps->s.f[:22]) 6402 // &s, &(s.ps), sizeof(S2*), noflags 6403 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag 6404 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag 6405 // 6406 // map(ps) 6407 // &ps, &ps, sizeof(S2*), noflags 6408 // 6409 // map(ps->i) 6410 // ps, &(ps->i), sizeof(int), noflags 6411 // 6412 // map(ps->s.f) 6413 // ps, &(ps->s.f[0]), 50*sizeof(float), noflags 6414 // 6415 // map(ps->p) 6416 // ps, &(ps->p), sizeof(double*), noflags 6417 // 6418 // map(ps->p[:22]) 6419 // ps, &(ps->p), sizeof(double*), noflags 6420 // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag 6421 // 6422 // map(ps->ps) 6423 // ps, &(ps->ps), sizeof(S2*), noflags 6424 // 6425 // map(ps->ps->s.i) 6426 // ps, &(ps->ps), sizeof(S2*), noflags 6427 // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag 6428 // 6429 // map(ps->ps->ps) 6430 // ps, &(ps->ps), sizeof(S2*), noflags 6431 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag 6432 // 6433 // map(ps->ps->ps->ps) 6434 // ps, &(ps->ps), sizeof(S2*), noflags 6435 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag 6436 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag 6437 // 6438 // map(ps->ps->ps->s.f[:22]) 6439 // ps, &(ps->ps), sizeof(S2*), noflags 6440 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag 6441 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag 6442 6443 // Track if the map information being generated is the first for a capture. 6444 bool IsCaptureFirstInfo = IsFirstComponentList; 6445 6446 // Scan the components from the base to the complete expression. 6447 auto CI = Components.rbegin(); 6448 auto CE = Components.rend(); 6449 auto I = CI; 6450 6451 // Track if the map information being generated is the first for a list of 6452 // components. 6453 bool IsExpressionFirstInfo = true; 6454 llvm::Value *BP = nullptr; 6455 6456 if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) { 6457 // The base is the 'this' pointer. The content of the pointer is going 6458 // to be the base of the field being mapped. 6459 BP = CGF.EmitScalarExpr(ME->getBase()); 6460 } else { 6461 // The base is the reference to the variable. 6462 // BP = &Var. 6463 BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); 6464 6465 // If the variable is a pointer and is being dereferenced (i.e. is not 6466 // the last component), the base has to be the pointer itself, not its 6467 // reference. References are ignored for mapping purposes. 6468 QualType Ty = 6469 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 6470 if (Ty->isAnyPointerType() && std::next(I) != CE) { 6471 auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty); 6472 BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), 6473 Ty->castAs<PointerType>()) 6474 .getPointer(); 6475 6476 // We do not need to generate individual map information for the 6477 // pointer, it can be associated with the combined storage. 6478 ++I; 6479 } 6480 } 6481 6482 uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0; 6483 for (; I != CE; ++I) { 6484 auto Next = std::next(I); 6485 6486 // We need to generate the addresses and sizes if this is the last 6487 // component, if the component is a pointer or if it is an array section 6488 // whose length can't be proved to be one. If this is a pointer, it 6489 // becomes the base address for the following components. 6490 6491 // A final array section, is one whose length can't be proved to be one. 6492 bool IsFinalArraySection = 6493 isFinalArraySectionExpression(I->getAssociatedExpression()); 6494 6495 // Get information on whether the element is a pointer. Have to do a 6496 // special treatment for array sections given that they are built-in 6497 // types. 6498 const auto *OASE = 6499 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 6500 bool IsPointer = 6501 (OASE && 6502 OMPArraySectionExpr::getBaseOriginalType(OASE) 6503 .getCanonicalType() 6504 ->isAnyPointerType()) || 6505 I->getAssociatedExpression()->getType()->isAnyPointerType(); 6506 6507 if (Next == CE || IsPointer || IsFinalArraySection) { 6508 6509 // If this is not the last component, we expect the pointer to be 6510 // associated with an array expression or member expression. 6511 assert((Next == CE || 6512 isa<MemberExpr>(Next->getAssociatedExpression()) || 6513 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 6514 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 6515 "Unexpected expression"); 6516 6517 llvm::Value *LB = 6518 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); 6519 auto *Size = getExprTypeSize(I->getAssociatedExpression()); 6520 6521 // If we have a member expression and the current component is a 6522 // reference, we have to map the reference too. Whenever we have a 6523 // reference, the section that reference refers to is going to be a 6524 // load instruction from the storage assigned to the reference. 6525 if (isa<MemberExpr>(I->getAssociatedExpression()) && 6526 I->getAssociatedDeclaration()->getType()->isReferenceType()) { 6527 auto *LI = cast<llvm::LoadInst>(LB); 6528 auto *RefAddr = LI->getPointerOperand(); 6529 6530 BasePointers.push_back(BP); 6531 Pointers.push_back(RefAddr); 6532 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 6533 Types.push_back(DefaultFlags | 6534 getMapTypeBits( 6535 /*MapType*/ OMPC_MAP_alloc, 6536 /*MapTypeModifier=*/OMPC_MAP_unknown, 6537 !IsExpressionFirstInfo, IsCaptureFirstInfo)); 6538 IsExpressionFirstInfo = false; 6539 IsCaptureFirstInfo = false; 6540 // The reference will be the next base address. 6541 BP = RefAddr; 6542 } 6543 6544 BasePointers.push_back(BP); 6545 Pointers.push_back(LB); 6546 Sizes.push_back(Size); 6547 6548 // We need to add a pointer flag for each map that comes from the 6549 // same expression except for the first one. We also need to signal 6550 // this map is the first one that relates with the current capture 6551 // (there is a set of entries for each capture). 6552 Types.push_back(DefaultFlags | getMapTypeBits(MapType, MapTypeModifier, 6553 !IsExpressionFirstInfo, 6554 IsCaptureFirstInfo)); 6555 6556 // If we have a final array section, we are done with this expression. 6557 if (IsFinalArraySection) 6558 break; 6559 6560 // The pointer becomes the base for the next element. 6561 if (Next != CE) 6562 BP = LB; 6563 6564 IsExpressionFirstInfo = false; 6565 IsCaptureFirstInfo = false; 6566 } 6567 } 6568 } 6569 6570 /// \brief Return the adjusted map modifiers if the declaration a capture 6571 /// refers to appears in a first-private clause. This is expected to be used 6572 /// only with directives that start with 'target'. 6573 unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap, 6574 unsigned CurrentModifiers) { 6575 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 6576 6577 // A first private variable captured by reference will use only the 6578 // 'private ptr' and 'map to' flag. Return the right flags if the captured 6579 // declaration is known as first-private in this handler. 6580 if (FirstPrivateDecls.count(Cap.getCapturedVar())) 6581 return MappableExprsHandler::OMP_MAP_PRIVATE | 6582 MappableExprsHandler::OMP_MAP_TO; 6583 // Reduction variable will use only the 'private ptr' and 'map to_from' 6584 // flag. 6585 if (ReductionDecls.count(Cap.getCapturedVar())) { 6586 return MappableExprsHandler::OMP_MAP_TO | 6587 MappableExprsHandler::OMP_MAP_FROM; 6588 } 6589 6590 // We didn't modify anything. 6591 return CurrentModifiers; 6592 } 6593 6594 public: 6595 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 6596 : CurDir(Dir), CGF(CGF) { 6597 // Extract firstprivate clause information. 6598 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 6599 for (const auto *D : C->varlists()) 6600 FirstPrivateDecls.insert( 6601 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 6602 for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) { 6603 for (const auto *D : C->varlists()) { 6604 ReductionDecls.insert( 6605 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 6606 } 6607 } 6608 // Extract device pointer clause information. 6609 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 6610 for (auto L : C->component_lists()) 6611 DevPointersMap[L.first].push_back(L.second); 6612 } 6613 6614 /// \brief Generate all the base pointers, section pointers, sizes and map 6615 /// types for the extracted mappable expressions. Also, for each item that 6616 /// relates with a device pointer, a pair of the relevant declaration and 6617 /// index where it occurs is appended to the device pointers info array. 6618 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 6619 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 6620 MapFlagsArrayTy &Types) const { 6621 BasePointers.clear(); 6622 Pointers.clear(); 6623 Sizes.clear(); 6624 Types.clear(); 6625 6626 struct MapInfo { 6627 /// Kind that defines how a device pointer has to be returned. 6628 enum ReturnPointerKind { 6629 // Don't have to return any pointer. 6630 RPK_None, 6631 // Pointer is the base of the declaration. 6632 RPK_Base, 6633 // Pointer is a member of the base declaration - 'this' 6634 RPK_Member, 6635 // Pointer is a reference and a member of the base declaration - 'this' 6636 RPK_MemberReference, 6637 }; 6638 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 6639 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 6640 OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; 6641 ReturnPointerKind ReturnDevicePointer = RPK_None; 6642 bool IsImplicit = false; 6643 6644 MapInfo() = default; 6645 MapInfo( 6646 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6647 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 6648 ReturnPointerKind ReturnDevicePointer, bool IsImplicit) 6649 : Components(Components), MapType(MapType), 6650 MapTypeModifier(MapTypeModifier), 6651 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 6652 }; 6653 6654 // We have to process the component lists that relate with the same 6655 // declaration in a single chunk so that we can generate the map flags 6656 // correctly. Therefore, we organize all lists in a map. 6657 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 6658 6659 // Helper function to fill the information map for the different supported 6660 // clauses. 6661 auto &&InfoGen = [&Info]( 6662 const ValueDecl *D, 6663 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 6664 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, 6665 MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) { 6666 const ValueDecl *VD = 6667 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 6668 Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer, 6669 IsImplicit); 6670 }; 6671 6672 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 6673 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 6674 for (auto L : C->component_lists()) { 6675 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), 6676 MapInfo::RPK_None, C->isImplicit()); 6677 } 6678 for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 6679 for (auto L : C->component_lists()) { 6680 InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, 6681 MapInfo::RPK_None, C->isImplicit()); 6682 } 6683 for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 6684 for (auto L : C->component_lists()) { 6685 InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, 6686 MapInfo::RPK_None, C->isImplicit()); 6687 } 6688 6689 // Look at the use_device_ptr clause information and mark the existing map 6690 // entries as such. If there is no map information for an entry in the 6691 // use_device_ptr list, we create one with map type 'alloc' and zero size 6692 // section. It is the user fault if that was not mapped before. 6693 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 6694 for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) 6695 for (auto L : C->component_lists()) { 6696 assert(!L.second.empty() && "Not expecting empty list of components!"); 6697 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 6698 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 6699 auto *IE = L.second.back().getAssociatedExpression(); 6700 // If the first component is a member expression, we have to look into 6701 // 'this', which maps to null in the map of map information. Otherwise 6702 // look directly for the information. 6703 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 6704 6705 // We potentially have map information for this declaration already. 6706 // Look for the first set of components that refer to it. 6707 if (It != Info.end()) { 6708 auto CI = std::find_if( 6709 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 6710 return MI.Components.back().getAssociatedDeclaration() == VD; 6711 }); 6712 // If we found a map entry, signal that the pointer has to be returned 6713 // and move on to the next declaration. 6714 if (CI != It->second.end()) { 6715 CI->ReturnDevicePointer = isa<MemberExpr>(IE) 6716 ? (VD->getType()->isReferenceType() 6717 ? MapInfo::RPK_MemberReference 6718 : MapInfo::RPK_Member) 6719 : MapInfo::RPK_Base; 6720 continue; 6721 } 6722 } 6723 6724 // We didn't find any match in our map information - generate a zero 6725 // size array section. 6726 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 6727 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(IE), 6728 IE->getExprLoc()); 6729 BasePointers.push_back({Ptr, VD}); 6730 Pointers.push_back(Ptr); 6731 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 6732 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 6733 } 6734 6735 for (auto &M : Info) { 6736 // We need to know when we generate information for the first component 6737 // associated with a capture, because the mapping flags depend on it. 6738 bool IsFirstComponentList = true; 6739 for (MapInfo &L : M.second) { 6740 assert(!L.Components.empty() && 6741 "Not expecting declaration with no component lists."); 6742 6743 // Remember the current base pointer index. 6744 unsigned CurrentBasePointersIdx = BasePointers.size(); 6745 // FIXME: MSVC 2013 seems to require this-> to find the member method. 6746 this->generateInfoForComponentList( 6747 L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers, 6748 Sizes, Types, IsFirstComponentList, L.IsImplicit); 6749 6750 // If this entry relates with a device pointer, set the relevant 6751 // declaration and add the 'return pointer' flag. 6752 if (IsFirstComponentList && 6753 L.ReturnDevicePointer != MapInfo::RPK_None) { 6754 // If the pointer is not the base of the map, we need to skip the 6755 // base. If it is a reference in a member field, we also need to skip 6756 // the map of the reference. 6757 if (L.ReturnDevicePointer != MapInfo::RPK_Base) { 6758 ++CurrentBasePointersIdx; 6759 if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference) 6760 ++CurrentBasePointersIdx; 6761 } 6762 assert(BasePointers.size() > CurrentBasePointersIdx && 6763 "Unexpected number of mapped base pointers."); 6764 6765 auto *RelevantVD = L.Components.back().getAssociatedDeclaration(); 6766 assert(RelevantVD && 6767 "No relevant declaration related with device pointer??"); 6768 6769 BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 6770 Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 6771 } 6772 IsFirstComponentList = false; 6773 } 6774 } 6775 } 6776 6777 /// \brief Generate the base pointers, section pointers, sizes and map types 6778 /// associated to a given capture. 6779 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 6780 llvm::Value *Arg, 6781 MapBaseValuesArrayTy &BasePointers, 6782 MapValuesArrayTy &Pointers, 6783 MapValuesArrayTy &Sizes, 6784 MapFlagsArrayTy &Types) const { 6785 assert(!Cap->capturesVariableArrayType() && 6786 "Not expecting to generate map info for a variable array type!"); 6787 6788 BasePointers.clear(); 6789 Pointers.clear(); 6790 Sizes.clear(); 6791 Types.clear(); 6792 6793 // We need to know when we generating information for the first component 6794 // associated with a capture, because the mapping flags depend on it. 6795 bool IsFirstComponentList = true; 6796 6797 const ValueDecl *VD = 6798 Cap->capturesThis() 6799 ? nullptr 6800 : Cap->getCapturedVar()->getCanonicalDecl(); 6801 6802 // If this declaration appears in a is_device_ptr clause we just have to 6803 // pass the pointer by value. If it is a reference to a declaration, we just 6804 // pass its value, otherwise, if it is a member expression, we need to map 6805 // 'to' the field. 6806 if (!VD) { 6807 auto It = DevPointersMap.find(VD); 6808 if (It != DevPointersMap.end()) { 6809 for (auto L : It->second) { 6810 generateInfoForComponentList( 6811 /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, 6812 BasePointers, Pointers, Sizes, Types, IsFirstComponentList, 6813 /*IsImplicit=*/false); 6814 IsFirstComponentList = false; 6815 } 6816 return; 6817 } 6818 } else if (DevPointersMap.count(VD)) { 6819 BasePointers.push_back({Arg, VD}); 6820 Pointers.push_back(Arg); 6821 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 6822 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 6823 return; 6824 } 6825 6826 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 6827 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 6828 for (auto L : C->decl_component_lists(VD)) { 6829 assert(L.first == VD && 6830 "We got information for the wrong declaration??"); 6831 assert(!L.second.empty() && 6832 "Not expecting declaration with no component lists."); 6833 generateInfoForComponentList( 6834 C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, 6835 Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit()); 6836 IsFirstComponentList = false; 6837 } 6838 6839 return; 6840 } 6841 6842 /// \brief Generate the default map information for a given capture \a CI, 6843 /// record field declaration \a RI and captured value \a CV. 6844 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 6845 const FieldDecl &RI, llvm::Value *CV, 6846 MapBaseValuesArrayTy &CurBasePointers, 6847 MapValuesArrayTy &CurPointers, 6848 MapValuesArrayTy &CurSizes, 6849 MapFlagsArrayTy &CurMapTypes) { 6850 6851 // Do the default mapping. 6852 if (CI.capturesThis()) { 6853 CurBasePointers.push_back(CV); 6854 CurPointers.push_back(CV); 6855 const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 6856 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 6857 // Default map type. 6858 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 6859 } else if (CI.capturesVariableByCopy()) { 6860 CurBasePointers.push_back(CV); 6861 CurPointers.push_back(CV); 6862 if (!RI.getType()->isAnyPointerType()) { 6863 // We have to signal to the runtime captures passed by value that are 6864 // not pointers. 6865 CurMapTypes.push_back(OMP_MAP_LITERAL); 6866 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 6867 } else { 6868 // Pointers are implicitly mapped with a zero size and no flags 6869 // (other than first map that is added for all implicit maps). 6870 CurMapTypes.push_back(0u); 6871 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 6872 } 6873 } else { 6874 assert(CI.capturesVariable() && "Expected captured reference."); 6875 CurBasePointers.push_back(CV); 6876 CurPointers.push_back(CV); 6877 6878 const ReferenceType *PtrTy = 6879 cast<ReferenceType>(RI.getType().getTypePtr()); 6880 QualType ElementType = PtrTy->getPointeeType(); 6881 CurSizes.push_back(CGF.getTypeSize(ElementType)); 6882 // The default map type for a scalar/complex type is 'to' because by 6883 // default the value doesn't have to be retrieved. For an aggregate 6884 // type, the default is 'tofrom'. 6885 CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses( 6886 CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) 6887 : OMP_MAP_TO)); 6888 } 6889 // Every default map produces a single argument which is a target parameter. 6890 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 6891 } 6892 }; 6893 6894 enum OpenMPOffloadingReservedDeviceIDs { 6895 /// \brief Device ID if the device was not defined, runtime should get it 6896 /// from environment variables in the spec. 6897 OMP_DEVICEID_UNDEF = -1, 6898 }; 6899 } // anonymous namespace 6900 6901 /// \brief Emit the arrays used to pass the captures and map information to the 6902 /// offloading runtime library. If there is no map or capture information, 6903 /// return nullptr by reference. 6904 static void 6905 emitOffloadingArrays(CodeGenFunction &CGF, 6906 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 6907 MappableExprsHandler::MapValuesArrayTy &Pointers, 6908 MappableExprsHandler::MapValuesArrayTy &Sizes, 6909 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 6910 CGOpenMPRuntime::TargetDataInfo &Info) { 6911 auto &CGM = CGF.CGM; 6912 auto &Ctx = CGF.getContext(); 6913 6914 // Reset the array information. 6915 Info.clearArrayInfo(); 6916 Info.NumberOfPtrs = BasePointers.size(); 6917 6918 if (Info.NumberOfPtrs) { 6919 // Detect if we have any capture size requiring runtime evaluation of the 6920 // size so that a constant array could be eventually used. 6921 bool hasRuntimeEvaluationCaptureSize = false; 6922 for (auto *S : Sizes) 6923 if (!isa<llvm::Constant>(S)) { 6924 hasRuntimeEvaluationCaptureSize = true; 6925 break; 6926 } 6927 6928 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 6929 QualType PointerArrayType = 6930 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 6931 /*IndexTypeQuals=*/0); 6932 6933 Info.BasePointersArray = 6934 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 6935 Info.PointersArray = 6936 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 6937 6938 // If we don't have any VLA types or other types that require runtime 6939 // evaluation, we can use a constant array for the map sizes, otherwise we 6940 // need to fill up the arrays as we do for the pointers. 6941 if (hasRuntimeEvaluationCaptureSize) { 6942 QualType SizeArrayType = Ctx.getConstantArrayType( 6943 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 6944 /*IndexTypeQuals=*/0); 6945 Info.SizesArray = 6946 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 6947 } else { 6948 // We expect all the sizes to be constant, so we collect them to create 6949 // a constant array. 6950 SmallVector<llvm::Constant *, 16> ConstSizes; 6951 for (auto S : Sizes) 6952 ConstSizes.push_back(cast<llvm::Constant>(S)); 6953 6954 auto *SizesArrayInit = llvm::ConstantArray::get( 6955 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 6956 auto *SizesArrayGbl = new llvm::GlobalVariable( 6957 CGM.getModule(), SizesArrayInit->getType(), 6958 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 6959 SizesArrayInit, ".offload_sizes"); 6960 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 6961 Info.SizesArray = SizesArrayGbl; 6962 } 6963 6964 // The map types are always constant so we don't need to generate code to 6965 // fill arrays. Instead, we create an array constant. 6966 llvm::Constant *MapTypesArrayInit = 6967 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 6968 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 6969 CGM.getModule(), MapTypesArrayInit->getType(), 6970 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 6971 MapTypesArrayInit, ".offload_maptypes"); 6972 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 6973 Info.MapTypesArray = MapTypesArrayGbl; 6974 6975 for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) { 6976 llvm::Value *BPVal = *BasePointers[i]; 6977 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 6978 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 6979 Info.BasePointersArray, 0, i); 6980 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6981 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 6982 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 6983 CGF.Builder.CreateStore(BPVal, BPAddr); 6984 6985 if (Info.requiresDevicePointerInfo()) 6986 if (auto *DevVD = BasePointers[i].getDevicePtrDecl()) 6987 Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr)); 6988 6989 llvm::Value *PVal = Pointers[i]; 6990 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 6991 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 6992 Info.PointersArray, 0, i); 6993 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6994 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 6995 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 6996 CGF.Builder.CreateStore(PVal, PAddr); 6997 6998 if (hasRuntimeEvaluationCaptureSize) { 6999 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 7000 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 7001 Info.SizesArray, 7002 /*Idx0=*/0, 7003 /*Idx1=*/i); 7004 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 7005 CGF.Builder.CreateStore( 7006 CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), 7007 SAddr); 7008 } 7009 } 7010 } 7011 } 7012 /// \brief Emit the arguments to be passed to the runtime library based on the 7013 /// arrays of pointers, sizes and map types. 7014 static void emitOffloadingArraysArgument( 7015 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 7016 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 7017 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 7018 auto &CGM = CGF.CGM; 7019 if (Info.NumberOfPtrs) { 7020 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7021 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7022 Info.BasePointersArray, 7023 /*Idx0=*/0, /*Idx1=*/0); 7024 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7025 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7026 Info.PointersArray, 7027 /*Idx0=*/0, 7028 /*Idx1=*/0); 7029 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7030 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 7031 /*Idx0=*/0, /*Idx1=*/0); 7032 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7033 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 7034 Info.MapTypesArray, 7035 /*Idx0=*/0, 7036 /*Idx1=*/0); 7037 } else { 7038 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 7039 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 7040 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 7041 MapTypesArrayArg = 7042 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 7043 } 7044 } 7045 7046 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 7047 const OMPExecutableDirective &D, 7048 llvm::Value *OutlinedFn, 7049 llvm::Value *OutlinedFnID, 7050 const Expr *IfCond, const Expr *Device) { 7051 if (!CGF.HaveInsertPoint()) 7052 return; 7053 7054 assert(OutlinedFn && "Invalid outlined function!"); 7055 7056 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 7057 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 7058 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 7059 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 7060 PrePostActionTy &) { 7061 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 7062 }; 7063 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 7064 7065 CodeGenFunction::OMPTargetDataInfo InputInfo; 7066 llvm::Value *MapTypesArray = nullptr; 7067 // Fill up the pointer arrays and transfer execution to the device. 7068 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 7069 &MapTypesArray, &CS, RequiresOuterTask, 7070 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 7071 // On top of the arrays that were filled up, the target offloading call 7072 // takes as arguments the device id as well as the host pointer. The host 7073 // pointer is used by the runtime library to identify the current target 7074 // region, so it only has to be unique and not necessarily point to 7075 // anything. It could be the pointer to the outlined function that 7076 // implements the target region, but we aren't using that so that the 7077 // compiler doesn't need to keep that, and could therefore inline the host 7078 // function if proven worthwhile during optimization. 7079 7080 // From this point on, we need to have an ID of the target region defined. 7081 assert(OutlinedFnID && "Invalid outlined function ID!"); 7082 7083 // Emit device ID if any. 7084 llvm::Value *DeviceID; 7085 if (Device) { 7086 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7087 CGF.Int64Ty, /*isSigned=*/true); 7088 } else { 7089 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 7090 } 7091 7092 // Emit the number of elements in the offloading arrays. 7093 llvm::Value *PointerNum = 7094 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 7095 7096 // Return value of the runtime offloading call. 7097 llvm::Value *Return; 7098 7099 auto *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D); 7100 auto *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D); 7101 7102 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 7103 // The target region is an outlined function launched by the runtime 7104 // via calls __tgt_target() or __tgt_target_teams(). 7105 // 7106 // __tgt_target() launches a target region with one team and one thread, 7107 // executing a serial region. This master thread may in turn launch 7108 // more threads within its team upon encountering a parallel region, 7109 // however, no additional teams can be launched on the device. 7110 // 7111 // __tgt_target_teams() launches a target region with one or more teams, 7112 // each with one or more threads. This call is required for target 7113 // constructs such as: 7114 // 'target teams' 7115 // 'target' / 'teams' 7116 // 'target teams distribute parallel for' 7117 // 'target parallel' 7118 // and so on. 7119 // 7120 // Note that on the host and CPU targets, the runtime implementation of 7121 // these calls simply call the outlined function without forking threads. 7122 // The outlined functions themselves have runtime calls to 7123 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 7124 // the compiler in emitTeamsCall() and emitParallelCall(). 7125 // 7126 // In contrast, on the NVPTX target, the implementation of 7127 // __tgt_target_teams() launches a GPU kernel with the requested number 7128 // of teams and threads so no additional calls to the runtime are required. 7129 if (NumTeams) { 7130 // If we have NumTeams defined this means that we have an enclosed teams 7131 // region. Therefore we also expect to have NumThreads defined. These two 7132 // values should be defined in the presence of a teams directive, 7133 // regardless of having any clauses associated. If the user is using teams 7134 // but no clauses, these two values will be the default that should be 7135 // passed to the runtime library - a 32-bit integer with the value zero. 7136 assert(NumThreads && "Thread limit expression should be available along " 7137 "with number of teams."); 7138 llvm::Value *OffloadingArgs[] = {DeviceID, 7139 OutlinedFnID, 7140 PointerNum, 7141 InputInfo.BasePointersArray.getPointer(), 7142 InputInfo.PointersArray.getPointer(), 7143 InputInfo.SizesArray.getPointer(), 7144 MapTypesArray, 7145 NumTeams, 7146 NumThreads}; 7147 Return = CGF.EmitRuntimeCall( 7148 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 7149 : OMPRTL__tgt_target_teams), 7150 OffloadingArgs); 7151 } else { 7152 llvm::Value *OffloadingArgs[] = {DeviceID, 7153 OutlinedFnID, 7154 PointerNum, 7155 InputInfo.BasePointersArray.getPointer(), 7156 InputInfo.PointersArray.getPointer(), 7157 InputInfo.SizesArray.getPointer(), 7158 MapTypesArray}; 7159 Return = CGF.EmitRuntimeCall( 7160 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 7161 : OMPRTL__tgt_target), 7162 OffloadingArgs); 7163 } 7164 7165 // Check the error code and execute the host version if required. 7166 llvm::BasicBlock *OffloadFailedBlock = 7167 CGF.createBasicBlock("omp_offload.failed"); 7168 llvm::BasicBlock *OffloadContBlock = 7169 CGF.createBasicBlock("omp_offload.cont"); 7170 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 7171 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 7172 7173 CGF.EmitBlock(OffloadFailedBlock); 7174 if (RequiresOuterTask) { 7175 CapturedVars.clear(); 7176 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 7177 } 7178 emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); 7179 CGF.EmitBranch(OffloadContBlock); 7180 7181 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 7182 }; 7183 7184 // Notify that the host version must be executed. 7185 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 7186 RequiresOuterTask](CodeGenFunction &CGF, 7187 PrePostActionTy &) { 7188 if (RequiresOuterTask) { 7189 CapturedVars.clear(); 7190 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 7191 } 7192 emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); 7193 }; 7194 7195 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 7196 &CapturedVars, RequiresOuterTask, 7197 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 7198 // Fill up the arrays with all the captured variables. 7199 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 7200 MappableExprsHandler::MapValuesArrayTy Pointers; 7201 MappableExprsHandler::MapValuesArrayTy Sizes; 7202 MappableExprsHandler::MapFlagsArrayTy MapTypes; 7203 7204 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 7205 MappableExprsHandler::MapValuesArrayTy CurPointers; 7206 MappableExprsHandler::MapValuesArrayTy CurSizes; 7207 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 7208 7209 // Get mappable expression information. 7210 MappableExprsHandler MEHandler(D, CGF); 7211 7212 auto RI = CS.getCapturedRecordDecl()->field_begin(); 7213 auto CV = CapturedVars.begin(); 7214 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 7215 CE = CS.capture_end(); 7216 CI != CE; ++CI, ++RI, ++CV) { 7217 CurBasePointers.clear(); 7218 CurPointers.clear(); 7219 CurSizes.clear(); 7220 CurMapTypes.clear(); 7221 7222 // VLA sizes are passed to the outlined region by copy and do not have map 7223 // information associated. 7224 if (CI->capturesVariableArrayType()) { 7225 CurBasePointers.push_back(*CV); 7226 CurPointers.push_back(*CV); 7227 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 7228 // Copy to the device as an argument. No need to retrieve it. 7229 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 7230 MappableExprsHandler::OMP_MAP_TARGET_PARAM); 7231 } else { 7232 // If we have any information in the map clause, we use it, otherwise we 7233 // just do a default mapping. 7234 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 7235 CurSizes, CurMapTypes); 7236 if (CurBasePointers.empty()) 7237 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 7238 CurPointers, CurSizes, CurMapTypes); 7239 } 7240 // We expect to have at least an element of information for this capture. 7241 assert(!CurBasePointers.empty() && 7242 "Non-existing map pointer for capture!"); 7243 assert(CurBasePointers.size() == CurPointers.size() && 7244 CurBasePointers.size() == CurSizes.size() && 7245 CurBasePointers.size() == CurMapTypes.size() && 7246 "Inconsistent map information sizes!"); 7247 7248 // We need to append the results of this capture to what we already have. 7249 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 7250 Pointers.append(CurPointers.begin(), CurPointers.end()); 7251 Sizes.append(CurSizes.begin(), CurSizes.end()); 7252 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 7253 } 7254 7255 TargetDataInfo Info; 7256 // Fill up the arrays and create the arguments. 7257 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 7258 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 7259 Info.PointersArray, Info.SizesArray, 7260 Info.MapTypesArray, Info); 7261 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 7262 InputInfo.BasePointersArray = 7263 Address(Info.BasePointersArray, CGM.getPointerAlign()); 7264 InputInfo.PointersArray = 7265 Address(Info.PointersArray, CGM.getPointerAlign()); 7266 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 7267 MapTypesArray = Info.MapTypesArray; 7268 if (RequiresOuterTask) 7269 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 7270 else 7271 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 7272 }; 7273 7274 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 7275 CodeGenFunction &CGF, PrePostActionTy &) { 7276 if (RequiresOuterTask) { 7277 CodeGenFunction::OMPTargetDataInfo InputInfo; 7278 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 7279 } else { 7280 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 7281 } 7282 }; 7283 7284 // If we have a target function ID it means that we need to support 7285 // offloading, otherwise, just execute on the host. We need to execute on host 7286 // regardless of the conditional in the if clause if, e.g., the user do not 7287 // specify target triples. 7288 if (OutlinedFnID) { 7289 if (IfCond) { 7290 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 7291 } else { 7292 RegionCodeGenTy ThenRCG(TargetThenGen); 7293 ThenRCG(CGF); 7294 } 7295 } else { 7296 RegionCodeGenTy ElseRCG(TargetElseGen); 7297 ElseRCG(CGF); 7298 } 7299 } 7300 7301 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 7302 StringRef ParentName) { 7303 if (!S) 7304 return; 7305 7306 // Codegen OMP target directives that offload compute to the device. 7307 bool requiresDeviceCodegen = 7308 isa<OMPExecutableDirective>(S) && 7309 isOpenMPTargetExecutionDirective( 7310 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 7311 7312 if (requiresDeviceCodegen) { 7313 auto &E = *cast<OMPExecutableDirective>(S); 7314 unsigned DeviceID; 7315 unsigned FileID; 7316 unsigned Line; 7317 getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID, 7318 FileID, Line); 7319 7320 // Is this a target region that should not be emitted as an entry point? If 7321 // so just signal we are done with this target region. 7322 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 7323 ParentName, Line)) 7324 return; 7325 7326 switch (S->getStmtClass()) { 7327 case Stmt::OMPTargetDirectiveClass: 7328 CodeGenFunction::EmitOMPTargetDeviceFunction( 7329 CGM, ParentName, cast<OMPTargetDirective>(*S)); 7330 break; 7331 case Stmt::OMPTargetParallelDirectiveClass: 7332 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 7333 CGM, ParentName, cast<OMPTargetParallelDirective>(*S)); 7334 break; 7335 case Stmt::OMPTargetTeamsDirectiveClass: 7336 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 7337 CGM, ParentName, cast<OMPTargetTeamsDirective>(*S)); 7338 break; 7339 case Stmt::OMPTargetTeamsDistributeDirectiveClass: 7340 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 7341 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(*S)); 7342 break; 7343 case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass: 7344 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 7345 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(*S)); 7346 break; 7347 case Stmt::OMPTargetParallelForDirectiveClass: 7348 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 7349 CGM, ParentName, cast<OMPTargetParallelForDirective>(*S)); 7350 break; 7351 case Stmt::OMPTargetParallelForSimdDirectiveClass: 7352 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 7353 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(*S)); 7354 break; 7355 case Stmt::OMPTargetSimdDirectiveClass: 7356 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 7357 CGM, ParentName, cast<OMPTargetSimdDirective>(*S)); 7358 break; 7359 case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass: 7360 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 7361 CGM, ParentName, 7362 cast<OMPTargetTeamsDistributeParallelForDirective>(*S)); 7363 break; 7364 case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass: 7365 CodeGenFunction:: 7366 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 7367 CGM, ParentName, 7368 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(*S)); 7369 break; 7370 default: 7371 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 7372 } 7373 return; 7374 } 7375 7376 if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { 7377 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 7378 return; 7379 7380 scanForTargetRegionsFunctions( 7381 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 7382 return; 7383 } 7384 7385 // If this is a lambda function, look into its body. 7386 if (auto *L = dyn_cast<LambdaExpr>(S)) 7387 S = L->getBody(); 7388 7389 // Keep looking for target regions recursively. 7390 for (auto *II : S->children()) 7391 scanForTargetRegionsFunctions(II, ParentName); 7392 } 7393 7394 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 7395 auto &FD = *cast<FunctionDecl>(GD.getDecl()); 7396 7397 // If emitting code for the host, we do not process FD here. Instead we do 7398 // the normal code generation. 7399 if (!CGM.getLangOpts().OpenMPIsDevice) 7400 return false; 7401 7402 // Try to detect target regions in the function. 7403 scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); 7404 7405 // We should not emit any function other that the ones created during the 7406 // scanning. Therefore, we signal that this function is completely dealt 7407 // with. 7408 return true; 7409 } 7410 7411 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 7412 if (!CGM.getLangOpts().OpenMPIsDevice) 7413 return false; 7414 7415 // Check if there are Ctors/Dtors in this declaration and look for target 7416 // regions in it. We use the complete variant to produce the kernel name 7417 // mangling. 7418 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 7419 if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 7420 for (auto *Ctor : RD->ctors()) { 7421 StringRef ParentName = 7422 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 7423 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 7424 } 7425 auto *Dtor = RD->getDestructor(); 7426 if (Dtor) { 7427 StringRef ParentName = 7428 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 7429 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 7430 } 7431 } 7432 7433 // If we are in target mode, we do not emit any global (declare target is not 7434 // implemented yet). Therefore we signal that GD was processed in this case. 7435 return true; 7436 } 7437 7438 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 7439 auto *VD = GD.getDecl(); 7440 if (isa<FunctionDecl>(VD)) 7441 return emitTargetFunctions(GD); 7442 7443 return emitTargetGlobalVariable(GD); 7444 } 7445 7446 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 7447 // If we have offloading in the current module, we need to emit the entries 7448 // now and register the offloading descriptor. 7449 createOffloadEntriesAndInfoMetadata(); 7450 7451 // Create and register the offloading binary descriptors. This is the main 7452 // entity that captures all the information about offloading in the current 7453 // compilation unit. 7454 return createOffloadingBinaryDescriptorRegistration(); 7455 } 7456 7457 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 7458 const OMPExecutableDirective &D, 7459 SourceLocation Loc, 7460 llvm::Value *OutlinedFn, 7461 ArrayRef<llvm::Value *> CapturedVars) { 7462 if (!CGF.HaveInsertPoint()) 7463 return; 7464 7465 auto *RTLoc = emitUpdateLocation(CGF, Loc); 7466 CodeGenFunction::RunCleanupsScope Scope(CGF); 7467 7468 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 7469 llvm::Value *Args[] = { 7470 RTLoc, 7471 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 7472 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 7473 llvm::SmallVector<llvm::Value *, 16> RealArgs; 7474 RealArgs.append(std::begin(Args), std::end(Args)); 7475 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 7476 7477 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 7478 CGF.EmitRuntimeCall(RTLFn, RealArgs); 7479 } 7480 7481 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 7482 const Expr *NumTeams, 7483 const Expr *ThreadLimit, 7484 SourceLocation Loc) { 7485 if (!CGF.HaveInsertPoint()) 7486 return; 7487 7488 auto *RTLoc = emitUpdateLocation(CGF, Loc); 7489 7490 llvm::Value *NumTeamsVal = 7491 (NumTeams) 7492 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 7493 CGF.CGM.Int32Ty, /* isSigned = */ true) 7494 : CGF.Builder.getInt32(0); 7495 7496 llvm::Value *ThreadLimitVal = 7497 (ThreadLimit) 7498 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 7499 CGF.CGM.Int32Ty, /* isSigned = */ true) 7500 : CGF.Builder.getInt32(0); 7501 7502 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 7503 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 7504 ThreadLimitVal}; 7505 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 7506 PushNumTeamsArgs); 7507 } 7508 7509 void CGOpenMPRuntime::emitTargetDataCalls( 7510 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 7511 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 7512 if (!CGF.HaveInsertPoint()) 7513 return; 7514 7515 // Action used to replace the default codegen action and turn privatization 7516 // off. 7517 PrePostActionTy NoPrivAction; 7518 7519 // Generate the code for the opening of the data environment. Capture all the 7520 // arguments of the runtime call by reference because they are used in the 7521 // closing of the region. 7522 auto &&BeginThenGen = [this, &D, Device, &Info, 7523 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 7524 // Fill up the arrays with all the mapped variables. 7525 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 7526 MappableExprsHandler::MapValuesArrayTy Pointers; 7527 MappableExprsHandler::MapValuesArrayTy Sizes; 7528 MappableExprsHandler::MapFlagsArrayTy MapTypes; 7529 7530 // Get map clause information. 7531 MappableExprsHandler MCHandler(D, CGF); 7532 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 7533 7534 // Fill up the arrays and create the arguments. 7535 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 7536 7537 llvm::Value *BasePointersArrayArg = nullptr; 7538 llvm::Value *PointersArrayArg = nullptr; 7539 llvm::Value *SizesArrayArg = nullptr; 7540 llvm::Value *MapTypesArrayArg = nullptr; 7541 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 7542 SizesArrayArg, MapTypesArrayArg, Info); 7543 7544 // Emit device ID if any. 7545 llvm::Value *DeviceID = nullptr; 7546 if (Device) { 7547 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7548 CGF.Int64Ty, /*isSigned=*/true); 7549 } else { 7550 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 7551 } 7552 7553 // Emit the number of elements in the offloading arrays. 7554 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 7555 7556 llvm::Value *OffloadingArgs[] = { 7557 DeviceID, PointerNum, BasePointersArrayArg, 7558 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 7559 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 7560 OffloadingArgs); 7561 7562 // If device pointer privatization is required, emit the body of the region 7563 // here. It will have to be duplicated: with and without privatization. 7564 if (!Info.CaptureDeviceAddrMap.empty()) 7565 CodeGen(CGF); 7566 }; 7567 7568 // Generate code for the closing of the data region. 7569 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 7570 PrePostActionTy &) { 7571 assert(Info.isValid() && "Invalid data environment closing arguments."); 7572 7573 llvm::Value *BasePointersArrayArg = nullptr; 7574 llvm::Value *PointersArrayArg = nullptr; 7575 llvm::Value *SizesArrayArg = nullptr; 7576 llvm::Value *MapTypesArrayArg = nullptr; 7577 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 7578 SizesArrayArg, MapTypesArrayArg, Info); 7579 7580 // Emit device ID if any. 7581 llvm::Value *DeviceID = nullptr; 7582 if (Device) { 7583 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7584 CGF.Int64Ty, /*isSigned=*/true); 7585 } else { 7586 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 7587 } 7588 7589 // Emit the number of elements in the offloading arrays. 7590 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 7591 7592 llvm::Value *OffloadingArgs[] = { 7593 DeviceID, PointerNum, BasePointersArrayArg, 7594 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 7595 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 7596 OffloadingArgs); 7597 }; 7598 7599 // If we need device pointer privatization, we need to emit the body of the 7600 // region with no privatization in the 'else' branch of the conditional. 7601 // Otherwise, we don't have to do anything. 7602 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 7603 PrePostActionTy &) { 7604 if (!Info.CaptureDeviceAddrMap.empty()) { 7605 CodeGen.setAction(NoPrivAction); 7606 CodeGen(CGF); 7607 } 7608 }; 7609 7610 // We don't have to do anything to close the region if the if clause evaluates 7611 // to false. 7612 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 7613 7614 if (IfCond) { 7615 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 7616 } else { 7617 RegionCodeGenTy RCG(BeginThenGen); 7618 RCG(CGF); 7619 } 7620 7621 // If we don't require privatization of device pointers, we emit the body in 7622 // between the runtime calls. This avoids duplicating the body code. 7623 if (Info.CaptureDeviceAddrMap.empty()) { 7624 CodeGen.setAction(NoPrivAction); 7625 CodeGen(CGF); 7626 } 7627 7628 if (IfCond) { 7629 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 7630 } else { 7631 RegionCodeGenTy RCG(EndThenGen); 7632 RCG(CGF); 7633 } 7634 } 7635 7636 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 7637 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 7638 const Expr *Device) { 7639 if (!CGF.HaveInsertPoint()) 7640 return; 7641 7642 assert((isa<OMPTargetEnterDataDirective>(D) || 7643 isa<OMPTargetExitDataDirective>(D) || 7644 isa<OMPTargetUpdateDirective>(D)) && 7645 "Expecting either target enter, exit data, or update directives."); 7646 7647 CodeGenFunction::OMPTargetDataInfo InputInfo; 7648 llvm::Value *MapTypesArray = nullptr; 7649 // Generate the code for the opening of the data environment. 7650 auto &&ThenGen = [this, &D, Device, &InputInfo, 7651 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 7652 // Emit device ID if any. 7653 llvm::Value *DeviceID = nullptr; 7654 if (Device) { 7655 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7656 CGF.Int64Ty, /*isSigned=*/true); 7657 } else { 7658 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 7659 } 7660 7661 // Emit the number of elements in the offloading arrays. 7662 llvm::Constant *PointerNum = 7663 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 7664 7665 llvm::Value *OffloadingArgs[] = {DeviceID, 7666 PointerNum, 7667 InputInfo.BasePointersArray.getPointer(), 7668 InputInfo.PointersArray.getPointer(), 7669 InputInfo.SizesArray.getPointer(), 7670 MapTypesArray}; 7671 7672 // Select the right runtime function call for each expected standalone 7673 // directive. 7674 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 7675 OpenMPRTLFunction RTLFn; 7676 switch (D.getDirectiveKind()) { 7677 default: 7678 llvm_unreachable("Unexpected standalone target data directive."); 7679 break; 7680 case OMPD_target_enter_data: 7681 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 7682 : OMPRTL__tgt_target_data_begin; 7683 break; 7684 case OMPD_target_exit_data: 7685 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 7686 : OMPRTL__tgt_target_data_end; 7687 break; 7688 case OMPD_target_update: 7689 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 7690 : OMPRTL__tgt_target_data_update; 7691 break; 7692 } 7693 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 7694 }; 7695 7696 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 7697 CodeGenFunction &CGF, PrePostActionTy &) { 7698 // Fill up the arrays with all the mapped variables. 7699 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 7700 MappableExprsHandler::MapValuesArrayTy Pointers; 7701 MappableExprsHandler::MapValuesArrayTy Sizes; 7702 MappableExprsHandler::MapFlagsArrayTy MapTypes; 7703 7704 // Get map clause information. 7705 MappableExprsHandler MEHandler(D, CGF); 7706 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 7707 7708 TargetDataInfo Info; 7709 // Fill up the arrays and create the arguments. 7710 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 7711 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 7712 Info.PointersArray, Info.SizesArray, 7713 Info.MapTypesArray, Info); 7714 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 7715 InputInfo.BasePointersArray = 7716 Address(Info.BasePointersArray, CGM.getPointerAlign()); 7717 InputInfo.PointersArray = 7718 Address(Info.PointersArray, CGM.getPointerAlign()); 7719 InputInfo.SizesArray = 7720 Address(Info.SizesArray, CGM.getPointerAlign()); 7721 MapTypesArray = Info.MapTypesArray; 7722 if (D.hasClausesOfKind<OMPDependClause>()) 7723 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 7724 else 7725 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 7726 }; 7727 7728 if (IfCond) 7729 emitOMPIfClause(CGF, IfCond, TargetThenGen, 7730 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 7731 else { 7732 RegionCodeGenTy ThenRCG(TargetThenGen); 7733 ThenRCG(CGF); 7734 } 7735 } 7736 7737 namespace { 7738 /// Kind of parameter in a function with 'declare simd' directive. 7739 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 7740 /// Attribute set of the parameter. 7741 struct ParamAttrTy { 7742 ParamKindTy Kind = Vector; 7743 llvm::APSInt StrideOrArg; 7744 llvm::APSInt Alignment; 7745 }; 7746 } // namespace 7747 7748 static unsigned evaluateCDTSize(const FunctionDecl *FD, 7749 ArrayRef<ParamAttrTy> ParamAttrs) { 7750 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 7751 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 7752 // of that clause. The VLEN value must be power of 2. 7753 // In other case the notion of the function`s "characteristic data type" (CDT) 7754 // is used to compute the vector length. 7755 // CDT is defined in the following order: 7756 // a) For non-void function, the CDT is the return type. 7757 // b) If the function has any non-uniform, non-linear parameters, then the 7758 // CDT is the type of the first such parameter. 7759 // c) If the CDT determined by a) or b) above is struct, union, or class 7760 // type which is pass-by-value (except for the type that maps to the 7761 // built-in complex data type), the characteristic data type is int. 7762 // d) If none of the above three cases is applicable, the CDT is int. 7763 // The VLEN is then determined based on the CDT and the size of vector 7764 // register of that ISA for which current vector version is generated. The 7765 // VLEN is computed using the formula below: 7766 // VLEN = sizeof(vector_register) / sizeof(CDT), 7767 // where vector register size specified in section 3.2.1 Registers and the 7768 // Stack Frame of original AMD64 ABI document. 7769 QualType RetType = FD->getReturnType(); 7770 if (RetType.isNull()) 7771 return 0; 7772 ASTContext &C = FD->getASTContext(); 7773 QualType CDT; 7774 if (!RetType.isNull() && !RetType->isVoidType()) 7775 CDT = RetType; 7776 else { 7777 unsigned Offset = 0; 7778 if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 7779 if (ParamAttrs[Offset].Kind == Vector) 7780 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 7781 ++Offset; 7782 } 7783 if (CDT.isNull()) { 7784 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 7785 if (ParamAttrs[I + Offset].Kind == Vector) { 7786 CDT = FD->getParamDecl(I)->getType(); 7787 break; 7788 } 7789 } 7790 } 7791 } 7792 if (CDT.isNull()) 7793 CDT = C.IntTy; 7794 CDT = CDT->getCanonicalTypeUnqualified(); 7795 if (CDT->isRecordType() || CDT->isUnionType()) 7796 CDT = C.IntTy; 7797 return C.getTypeSize(CDT); 7798 } 7799 7800 static void 7801 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 7802 const llvm::APSInt &VLENVal, 7803 ArrayRef<ParamAttrTy> ParamAttrs, 7804 OMPDeclareSimdDeclAttr::BranchStateTy State) { 7805 struct ISADataTy { 7806 char ISA; 7807 unsigned VecRegSize; 7808 }; 7809 ISADataTy ISAData[] = { 7810 { 7811 'b', 128 7812 }, // SSE 7813 { 7814 'c', 256 7815 }, // AVX 7816 { 7817 'd', 256 7818 }, // AVX2 7819 { 7820 'e', 512 7821 }, // AVX512 7822 }; 7823 llvm::SmallVector<char, 2> Masked; 7824 switch (State) { 7825 case OMPDeclareSimdDeclAttr::BS_Undefined: 7826 Masked.push_back('N'); 7827 Masked.push_back('M'); 7828 break; 7829 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 7830 Masked.push_back('N'); 7831 break; 7832 case OMPDeclareSimdDeclAttr::BS_Inbranch: 7833 Masked.push_back('M'); 7834 break; 7835 } 7836 for (auto Mask : Masked) { 7837 for (auto &Data : ISAData) { 7838 SmallString<256> Buffer; 7839 llvm::raw_svector_ostream Out(Buffer); 7840 Out << "_ZGV" << Data.ISA << Mask; 7841 if (!VLENVal) { 7842 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 7843 evaluateCDTSize(FD, ParamAttrs)); 7844 } else 7845 Out << VLENVal; 7846 for (auto &ParamAttr : ParamAttrs) { 7847 switch (ParamAttr.Kind){ 7848 case LinearWithVarStride: 7849 Out << 's' << ParamAttr.StrideOrArg; 7850 break; 7851 case Linear: 7852 Out << 'l'; 7853 if (!!ParamAttr.StrideOrArg) 7854 Out << ParamAttr.StrideOrArg; 7855 break; 7856 case Uniform: 7857 Out << 'u'; 7858 break; 7859 case Vector: 7860 Out << 'v'; 7861 break; 7862 } 7863 if (!!ParamAttr.Alignment) 7864 Out << 'a' << ParamAttr.Alignment; 7865 } 7866 Out << '_' << Fn->getName(); 7867 Fn->addFnAttr(Out.str()); 7868 } 7869 } 7870 } 7871 7872 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 7873 llvm::Function *Fn) { 7874 ASTContext &C = CGM.getContext(); 7875 FD = FD->getMostRecentDecl(); 7876 // Map params to their positions in function decl. 7877 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 7878 if (isa<CXXMethodDecl>(FD)) 7879 ParamPositions.insert({FD, 0}); 7880 unsigned ParamPos = ParamPositions.size(); 7881 for (auto *P : FD->parameters()) { 7882 ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); 7883 ++ParamPos; 7884 } 7885 while (FD) { 7886 for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 7887 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 7888 // Mark uniform parameters. 7889 for (auto *E : Attr->uniforms()) { 7890 E = E->IgnoreParenImpCasts(); 7891 unsigned Pos; 7892 if (isa<CXXThisExpr>(E)) 7893 Pos = ParamPositions[FD]; 7894 else { 7895 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 7896 ->getCanonicalDecl(); 7897 Pos = ParamPositions[PVD]; 7898 } 7899 ParamAttrs[Pos].Kind = Uniform; 7900 } 7901 // Get alignment info. 7902 auto NI = Attr->alignments_begin(); 7903 for (auto *E : Attr->aligneds()) { 7904 E = E->IgnoreParenImpCasts(); 7905 unsigned Pos; 7906 QualType ParmTy; 7907 if (isa<CXXThisExpr>(E)) { 7908 Pos = ParamPositions[FD]; 7909 ParmTy = E->getType(); 7910 } else { 7911 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 7912 ->getCanonicalDecl(); 7913 Pos = ParamPositions[PVD]; 7914 ParmTy = PVD->getType(); 7915 } 7916 ParamAttrs[Pos].Alignment = 7917 (*NI) 7918 ? (*NI)->EvaluateKnownConstInt(C) 7919 : llvm::APSInt::getUnsigned( 7920 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 7921 .getQuantity()); 7922 ++NI; 7923 } 7924 // Mark linear parameters. 7925 auto SI = Attr->steps_begin(); 7926 auto MI = Attr->modifiers_begin(); 7927 for (auto *E : Attr->linears()) { 7928 E = E->IgnoreParenImpCasts(); 7929 unsigned Pos; 7930 if (isa<CXXThisExpr>(E)) 7931 Pos = ParamPositions[FD]; 7932 else { 7933 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 7934 ->getCanonicalDecl(); 7935 Pos = ParamPositions[PVD]; 7936 } 7937 auto &ParamAttr = ParamAttrs[Pos]; 7938 ParamAttr.Kind = Linear; 7939 if (*SI) { 7940 if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, 7941 Expr::SE_AllowSideEffects)) { 7942 if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 7943 if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 7944 ParamAttr.Kind = LinearWithVarStride; 7945 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 7946 ParamPositions[StridePVD->getCanonicalDecl()]); 7947 } 7948 } 7949 } 7950 } 7951 ++SI; 7952 ++MI; 7953 } 7954 llvm::APSInt VLENVal; 7955 if (const Expr *VLEN = Attr->getSimdlen()) 7956 VLENVal = VLEN->EvaluateKnownConstInt(C); 7957 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 7958 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 7959 CGM.getTriple().getArch() == llvm::Triple::x86_64) 7960 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 7961 } 7962 FD = FD->getPreviousDecl(); 7963 } 7964 } 7965 7966 namespace { 7967 /// Cleanup action for doacross support. 7968 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 7969 public: 7970 static const int DoacrossFinArgs = 2; 7971 7972 private: 7973 llvm::Value *RTLFn; 7974 llvm::Value *Args[DoacrossFinArgs]; 7975 7976 public: 7977 DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) 7978 : RTLFn(RTLFn) { 7979 assert(CallArgs.size() == DoacrossFinArgs); 7980 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 7981 } 7982 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 7983 if (!CGF.HaveInsertPoint()) 7984 return; 7985 CGF.EmitRuntimeCall(RTLFn, Args); 7986 } 7987 }; 7988 } // namespace 7989 7990 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 7991 const OMPLoopDirective &D) { 7992 if (!CGF.HaveInsertPoint()) 7993 return; 7994 7995 ASTContext &C = CGM.getContext(); 7996 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 7997 RecordDecl *RD; 7998 if (KmpDimTy.isNull()) { 7999 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 8000 // kmp_int64 lo; // lower 8001 // kmp_int64 up; // upper 8002 // kmp_int64 st; // stride 8003 // }; 8004 RD = C.buildImplicitRecord("kmp_dim"); 8005 RD->startDefinition(); 8006 addFieldToRecordDecl(C, RD, Int64Ty); 8007 addFieldToRecordDecl(C, RD, Int64Ty); 8008 addFieldToRecordDecl(C, RD, Int64Ty); 8009 RD->completeDefinition(); 8010 KmpDimTy = C.getRecordType(RD); 8011 } else 8012 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 8013 8014 Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); 8015 CGF.EmitNullInitialization(DimsAddr, KmpDimTy); 8016 enum { LowerFD = 0, UpperFD, StrideFD }; 8017 // Fill dims with data. 8018 LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy); 8019 // dims.upper = num_iterations; 8020 LValue UpperLVal = 8021 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD)); 8022 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 8023 CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(), 8024 Int64Ty, D.getNumIterations()->getExprLoc()); 8025 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 8026 // dims.stride = 1; 8027 LValue StrideLVal = 8028 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD)); 8029 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 8030 StrideLVal); 8031 8032 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 8033 // kmp_int32 num_dims, struct kmp_dim * dims); 8034 llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()), 8035 getThreadID(CGF, D.getLocStart()), 8036 llvm::ConstantInt::getSigned(CGM.Int32Ty, 1), 8037 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8038 DimsAddr.getPointer(), CGM.VoidPtrTy)}; 8039 8040 llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); 8041 CGF.EmitRuntimeCall(RTLFn, Args); 8042 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 8043 emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())}; 8044 llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 8045 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 8046 llvm::makeArrayRef(FiniArgs)); 8047 } 8048 8049 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 8050 const OMPDependClause *C) { 8051 QualType Int64Ty = 8052 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8053 const Expr *CounterVal = C->getCounterValue(); 8054 assert(CounterVal); 8055 llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal), 8056 CounterVal->getType(), Int64Ty, 8057 CounterVal->getExprLoc()); 8058 Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr"); 8059 CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty); 8060 llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()), 8061 getThreadID(CGF, C->getLocStart()), 8062 CntAddr.getPointer()}; 8063 llvm::Value *RTLFn; 8064 if (C->getDependencyKind() == OMPC_DEPEND_source) 8065 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 8066 else { 8067 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 8068 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 8069 } 8070 CGF.EmitRuntimeCall(RTLFn, Args); 8071 } 8072 8073 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 8074 llvm::Value *Callee, 8075 ArrayRef<llvm::Value *> Args) const { 8076 assert(Loc.isValid() && "Outlined function call location must be valid."); 8077 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 8078 8079 if (auto *Fn = dyn_cast<llvm::Function>(Callee)) { 8080 if (Fn->doesNotThrow()) { 8081 CGF.EmitNounwindRuntimeCall(Fn, Args); 8082 return; 8083 } 8084 } 8085 CGF.EmitRuntimeCall(Callee, Args); 8086 } 8087 8088 void CGOpenMPRuntime::emitOutlinedFunctionCall( 8089 CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, 8090 ArrayRef<llvm::Value *> Args) const { 8091 emitCall(CGF, Loc, OutlinedFn, Args); 8092 } 8093 8094 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 8095 const VarDecl *NativeParam, 8096 const VarDecl *TargetParam) const { 8097 return CGF.GetAddrOfLocalVar(NativeParam); 8098 } 8099 8100 llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 8101 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 8102 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 8103 llvm_unreachable("Not supported in SIMD-only mode"); 8104 } 8105 8106 llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 8107 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 8108 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 8109 llvm_unreachable("Not supported in SIMD-only mode"); 8110 } 8111 8112 llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 8113 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 8114 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 8115 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 8116 bool Tied, unsigned &NumberOfParts) { 8117 llvm_unreachable("Not supported in SIMD-only mode"); 8118 } 8119 8120 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 8121 SourceLocation Loc, 8122 llvm::Value *OutlinedFn, 8123 ArrayRef<llvm::Value *> CapturedVars, 8124 const Expr *IfCond) { 8125 llvm_unreachable("Not supported in SIMD-only mode"); 8126 } 8127 8128 void CGOpenMPSIMDRuntime::emitCriticalRegion( 8129 CodeGenFunction &CGF, StringRef CriticalName, 8130 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 8131 const Expr *Hint) { 8132 llvm_unreachable("Not supported in SIMD-only mode"); 8133 } 8134 8135 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 8136 const RegionCodeGenTy &MasterOpGen, 8137 SourceLocation Loc) { 8138 llvm_unreachable("Not supported in SIMD-only mode"); 8139 } 8140 8141 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 8142 SourceLocation Loc) { 8143 llvm_unreachable("Not supported in SIMD-only mode"); 8144 } 8145 8146 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 8147 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 8148 SourceLocation Loc) { 8149 llvm_unreachable("Not supported in SIMD-only mode"); 8150 } 8151 8152 void CGOpenMPSIMDRuntime::emitSingleRegion( 8153 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 8154 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 8155 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 8156 ArrayRef<const Expr *> AssignmentOps) { 8157 llvm_unreachable("Not supported in SIMD-only mode"); 8158 } 8159 8160 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 8161 const RegionCodeGenTy &OrderedOpGen, 8162 SourceLocation Loc, 8163 bool IsThreads) { 8164 llvm_unreachable("Not supported in SIMD-only mode"); 8165 } 8166 8167 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 8168 SourceLocation Loc, 8169 OpenMPDirectiveKind Kind, 8170 bool EmitChecks, 8171 bool ForceSimpleCall) { 8172 llvm_unreachable("Not supported in SIMD-only mode"); 8173 } 8174 8175 void CGOpenMPSIMDRuntime::emitForDispatchInit( 8176 CodeGenFunction &CGF, SourceLocation Loc, 8177 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 8178 bool Ordered, const DispatchRTInput &DispatchValues) { 8179 llvm_unreachable("Not supported in SIMD-only mode"); 8180 } 8181 8182 void CGOpenMPSIMDRuntime::emitForStaticInit( 8183 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 8184 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 8185 llvm_unreachable("Not supported in SIMD-only mode"); 8186 } 8187 8188 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 8189 CodeGenFunction &CGF, SourceLocation Loc, 8190 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 8191 llvm_unreachable("Not supported in SIMD-only mode"); 8192 } 8193 8194 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 8195 SourceLocation Loc, 8196 unsigned IVSize, 8197 bool IVSigned) { 8198 llvm_unreachable("Not supported in SIMD-only mode"); 8199 } 8200 8201 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 8202 SourceLocation Loc, 8203 OpenMPDirectiveKind DKind) { 8204 llvm_unreachable("Not supported in SIMD-only mode"); 8205 } 8206 8207 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 8208 SourceLocation Loc, 8209 unsigned IVSize, bool IVSigned, 8210 Address IL, Address LB, 8211 Address UB, Address ST) { 8212 llvm_unreachable("Not supported in SIMD-only mode"); 8213 } 8214 8215 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 8216 llvm::Value *NumThreads, 8217 SourceLocation Loc) { 8218 llvm_unreachable("Not supported in SIMD-only mode"); 8219 } 8220 8221 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 8222 OpenMPProcBindClauseKind ProcBind, 8223 SourceLocation Loc) { 8224 llvm_unreachable("Not supported in SIMD-only mode"); 8225 } 8226 8227 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 8228 const VarDecl *VD, 8229 Address VDAddr, 8230 SourceLocation Loc) { 8231 llvm_unreachable("Not supported in SIMD-only mode"); 8232 } 8233 8234 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 8235 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 8236 CodeGenFunction *CGF) { 8237 llvm_unreachable("Not supported in SIMD-only mode"); 8238 } 8239 8240 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 8241 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 8242 llvm_unreachable("Not supported in SIMD-only mode"); 8243 } 8244 8245 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 8246 ArrayRef<const Expr *> Vars, 8247 SourceLocation Loc) { 8248 llvm_unreachable("Not supported in SIMD-only mode"); 8249 } 8250 8251 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 8252 const OMPExecutableDirective &D, 8253 llvm::Value *TaskFunction, 8254 QualType SharedsTy, Address Shareds, 8255 const Expr *IfCond, 8256 const OMPTaskDataTy &Data) { 8257 llvm_unreachable("Not supported in SIMD-only mode"); 8258 } 8259 8260 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 8261 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 8262 llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, 8263 const Expr *IfCond, const OMPTaskDataTy &Data) { 8264 llvm_unreachable("Not supported in SIMD-only mode"); 8265 } 8266 8267 void CGOpenMPSIMDRuntime::emitReduction( 8268 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 8269 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 8270 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 8271 assert(Options.SimpleReduction && "Only simple reduction is expected."); 8272 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 8273 ReductionOps, Options); 8274 } 8275 8276 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 8277 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 8278 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 8279 llvm_unreachable("Not supported in SIMD-only mode"); 8280 } 8281 8282 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 8283 SourceLocation Loc, 8284 ReductionCodeGen &RCG, 8285 unsigned N) { 8286 llvm_unreachable("Not supported in SIMD-only mode"); 8287 } 8288 8289 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 8290 SourceLocation Loc, 8291 llvm::Value *ReductionsPtr, 8292 LValue SharedLVal) { 8293 llvm_unreachable("Not supported in SIMD-only mode"); 8294 } 8295 8296 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 8297 SourceLocation Loc) { 8298 llvm_unreachable("Not supported in SIMD-only mode"); 8299 } 8300 8301 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 8302 CodeGenFunction &CGF, SourceLocation Loc, 8303 OpenMPDirectiveKind CancelRegion) { 8304 llvm_unreachable("Not supported in SIMD-only mode"); 8305 } 8306 8307 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 8308 SourceLocation Loc, const Expr *IfCond, 8309 OpenMPDirectiveKind CancelRegion) { 8310 llvm_unreachable("Not supported in SIMD-only mode"); 8311 } 8312 8313 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 8314 const OMPExecutableDirective &D, StringRef ParentName, 8315 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 8316 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 8317 llvm_unreachable("Not supported in SIMD-only mode"); 8318 } 8319 8320 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 8321 const OMPExecutableDirective &D, 8322 llvm::Value *OutlinedFn, 8323 llvm::Value *OutlinedFnID, 8324 const Expr *IfCond, const Expr *Device) { 8325 llvm_unreachable("Not supported in SIMD-only mode"); 8326 } 8327 8328 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 8329 llvm_unreachable("Not supported in SIMD-only mode"); 8330 } 8331 8332 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 8333 llvm_unreachable("Not supported in SIMD-only mode"); 8334 } 8335 8336 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 8337 return false; 8338 } 8339 8340 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 8341 return nullptr; 8342 } 8343 8344 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 8345 const OMPExecutableDirective &D, 8346 SourceLocation Loc, 8347 llvm::Value *OutlinedFn, 8348 ArrayRef<llvm::Value *> CapturedVars) { 8349 llvm_unreachable("Not supported in SIMD-only mode"); 8350 } 8351 8352 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 8353 const Expr *NumTeams, 8354 const Expr *ThreadLimit, 8355 SourceLocation Loc) { 8356 llvm_unreachable("Not supported in SIMD-only mode"); 8357 } 8358 8359 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 8360 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 8361 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 8362 llvm_unreachable("Not supported in SIMD-only mode"); 8363 } 8364 8365 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 8366 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 8367 const Expr *Device) { 8368 llvm_unreachable("Not supported in SIMD-only mode"); 8369 } 8370 8371 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 8372 const OMPLoopDirective &D) { 8373 llvm_unreachable("Not supported in SIMD-only mode"); 8374 } 8375 8376 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 8377 const OMPDependClause *C) { 8378 llvm_unreachable("Not supported in SIMD-only mode"); 8379 } 8380 8381 const VarDecl * 8382 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 8383 const VarDecl *NativeParam) const { 8384 llvm_unreachable("Not supported in SIMD-only mode"); 8385 } 8386 8387 Address 8388 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 8389 const VarDecl *NativeParam, 8390 const VarDecl *TargetParam) const { 8391 llvm_unreachable("Not supported in SIMD-only mode"); 8392 } 8393 8394