1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/BitmaskEnum.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/CallSite.h" 25 #include "llvm/IR/DerivedTypes.h" 26 #include "llvm/IR/GlobalValue.h" 27 #include "llvm/IR/Value.h" 28 #include "llvm/Support/Format.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <cassert> 31 32 using namespace clang; 33 using namespace CodeGen; 34 35 namespace { 36 /// \brief Base class for handling code generation inside OpenMP regions. 37 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 38 public: 39 /// \brief Kinds of OpenMP regions used in codegen. 40 enum CGOpenMPRegionKind { 41 /// \brief Region with outlined function for standalone 'parallel' 42 /// directive. 43 ParallelOutlinedRegion, 44 /// \brief Region with outlined function for standalone 'task' directive. 45 TaskOutlinedRegion, 46 /// \brief Region for constructs that do not require function outlining, 47 /// like 'for', 'sections', 'atomic' etc. directives. 48 InlinedRegion, 49 /// \brief Region with outlined function for standalone 'target' directive. 50 TargetRegion, 51 }; 52 53 CGOpenMPRegionInfo(const CapturedStmt &CS, 54 const CGOpenMPRegionKind RegionKind, 55 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 56 bool HasCancel) 57 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 58 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 59 60 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 61 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 62 bool HasCancel) 63 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 64 Kind(Kind), HasCancel(HasCancel) {} 65 66 /// \brief Get a variable or parameter for storing global thread id 67 /// inside OpenMP construct. 68 virtual const VarDecl *getThreadIDVariable() const = 0; 69 70 /// \brief Emit the captured statement body. 71 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 72 73 /// \brief Get an LValue for the current ThreadID variable. 74 /// \return LValue for thread id variable. This LValue always has type int32*. 75 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 76 77 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 78 79 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 80 81 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 82 83 bool hasCancel() const { return HasCancel; } 84 85 static bool classof(const CGCapturedStmtInfo *Info) { 86 return Info->getKind() == CR_OpenMP; 87 } 88 89 ~CGOpenMPRegionInfo() override = default; 90 91 protected: 92 CGOpenMPRegionKind RegionKind; 93 RegionCodeGenTy CodeGen; 94 OpenMPDirectiveKind Kind; 95 bool HasCancel; 96 }; 97 98 /// \brief API for captured statement code generation in OpenMP constructs. 99 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 100 public: 101 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 102 const RegionCodeGenTy &CodeGen, 103 OpenMPDirectiveKind Kind, bool HasCancel, 104 StringRef HelperName) 105 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 106 HasCancel), 107 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 108 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 109 } 110 111 /// \brief Get a variable or parameter for storing global thread id 112 /// inside OpenMP construct. 113 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 114 115 /// \brief Get the name of the capture helper. 116 StringRef getHelperName() const override { return HelperName; } 117 118 static bool classof(const CGCapturedStmtInfo *Info) { 119 return CGOpenMPRegionInfo::classof(Info) && 120 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 121 ParallelOutlinedRegion; 122 } 123 124 private: 125 /// \brief A variable or parameter storing global thread id for OpenMP 126 /// constructs. 127 const VarDecl *ThreadIDVar; 128 StringRef HelperName; 129 }; 130 131 /// \brief API for captured statement code generation in OpenMP constructs. 132 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 133 public: 134 class UntiedTaskActionTy final : public PrePostActionTy { 135 bool Untied; 136 const VarDecl *PartIDVar; 137 const RegionCodeGenTy UntiedCodeGen; 138 llvm::SwitchInst *UntiedSwitch = nullptr; 139 140 public: 141 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 142 const RegionCodeGenTy &UntiedCodeGen) 143 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 144 void Enter(CodeGenFunction &CGF) override { 145 if (Untied) { 146 // Emit task switching point. 147 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 148 CGF.GetAddrOfLocalVar(PartIDVar), 149 PartIDVar->getType()->castAs<PointerType>()); 150 auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); 151 auto *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// \brief Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// \brief Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// \brief Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// \brief A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// \brief API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // \brief Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// \brief Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// \brief Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// \brief Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// \brief Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// \brief CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// \brief API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// \brief This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// \brief Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// \brief API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 SourceLocation()); 358 PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { 359 return CGF.EmitLValue(&DRE).getAddress(); 360 }); 361 } 362 (void)PrivScope.Privatize(); 363 } 364 365 /// \brief Lookup the captured field decl for a variable. 366 const FieldDecl *lookup(const VarDecl *VD) const override { 367 if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 368 return FD; 369 return nullptr; 370 } 371 372 /// \brief Emit the captured statement body. 373 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 374 llvm_unreachable("No body for expressions"); 375 } 376 377 /// \brief Get a variable or parameter for storing global thread id 378 /// inside OpenMP construct. 379 const VarDecl *getThreadIDVariable() const override { 380 llvm_unreachable("No thread id for expressions"); 381 } 382 383 /// \brief Get the name of the capture helper. 384 StringRef getHelperName() const override { 385 llvm_unreachable("No helper name for expressions"); 386 } 387 388 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 389 390 private: 391 /// Private scope to capture global variables. 392 CodeGenFunction::OMPPrivateScope PrivScope; 393 }; 394 395 /// \brief RAII for emitting code of OpenMP constructs. 396 class InlinedOpenMPRegionRAII { 397 CodeGenFunction &CGF; 398 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 399 FieldDecl *LambdaThisCaptureField = nullptr; 400 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 401 402 public: 403 /// \brief Constructs region for combined constructs. 404 /// \param CodeGen Code generation sequence for combined directives. Includes 405 /// a list of functions used for code generation of implicitly inlined 406 /// regions. 407 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 408 OpenMPDirectiveKind Kind, bool HasCancel) 409 : CGF(CGF) { 410 // Start emission for the construct. 411 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 412 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 413 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 414 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 415 CGF.LambdaThisCaptureField = nullptr; 416 BlockInfo = CGF.BlockInfo; 417 CGF.BlockInfo = nullptr; 418 } 419 420 ~InlinedOpenMPRegionRAII() { 421 // Restore original CapturedStmtInfo only if we're done with code emission. 422 auto *OldCSI = 423 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 424 delete CGF.CapturedStmtInfo; 425 CGF.CapturedStmtInfo = OldCSI; 426 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 427 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 428 CGF.BlockInfo = BlockInfo; 429 } 430 }; 431 432 /// \brief Values for bit flags used in the ident_t to describe the fields. 433 /// All enumeric elements are named and described in accordance with the code 434 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 435 enum OpenMPLocationFlags : unsigned { 436 /// \brief Use trampoline for internal microtask. 437 OMP_IDENT_IMD = 0x01, 438 /// \brief Use c-style ident structure. 439 OMP_IDENT_KMPC = 0x02, 440 /// \brief Atomic reduction option for kmpc_reduce. 441 OMP_ATOMIC_REDUCE = 0x10, 442 /// \brief Explicit 'barrier' directive. 443 OMP_IDENT_BARRIER_EXPL = 0x20, 444 /// \brief Implicit barrier in code. 445 OMP_IDENT_BARRIER_IMPL = 0x40, 446 /// \brief Implicit barrier in 'for' directive. 447 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 448 /// \brief Implicit barrier in 'sections' directive. 449 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 450 /// \brief Implicit barrier in 'single' directive. 451 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 452 /// Call of __kmp_for_static_init for static loop. 453 OMP_IDENT_WORK_LOOP = 0x200, 454 /// Call of __kmp_for_static_init for sections. 455 OMP_IDENT_WORK_SECTIONS = 0x400, 456 /// Call of __kmp_for_static_init for distribute. 457 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 458 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 459 }; 460 461 /// \brief Describes ident structure that describes a source location. 462 /// All descriptions are taken from 463 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 464 /// Original structure: 465 /// typedef struct ident { 466 /// kmp_int32 reserved_1; /**< might be used in Fortran; 467 /// see above */ 468 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 469 /// KMP_IDENT_KMPC identifies this union 470 /// member */ 471 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 472 /// see above */ 473 ///#if USE_ITT_BUILD 474 /// /* but currently used for storing 475 /// region-specific ITT */ 476 /// /* contextual information. */ 477 ///#endif /* USE_ITT_BUILD */ 478 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 479 /// C++ */ 480 /// char const *psource; /**< String describing the source location. 481 /// The string is composed of semi-colon separated 482 // fields which describe the source file, 483 /// the function and a pair of line numbers that 484 /// delimit the construct. 485 /// */ 486 /// } ident_t; 487 enum IdentFieldIndex { 488 /// \brief might be used in Fortran 489 IdentField_Reserved_1, 490 /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 491 IdentField_Flags, 492 /// \brief Not really used in Fortran any more 493 IdentField_Reserved_2, 494 /// \brief Source[4] in Fortran, do not use for C++ 495 IdentField_Reserved_3, 496 /// \brief String describing the source location. The string is composed of 497 /// semi-colon separated fields which describe the source file, the function 498 /// and a pair of line numbers that delimit the construct. 499 IdentField_PSource 500 }; 501 502 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 503 /// the enum sched_type in kmp.h). 504 enum OpenMPSchedType { 505 /// \brief Lower bound for default (unordered) versions. 506 OMP_sch_lower = 32, 507 OMP_sch_static_chunked = 33, 508 OMP_sch_static = 34, 509 OMP_sch_dynamic_chunked = 35, 510 OMP_sch_guided_chunked = 36, 511 OMP_sch_runtime = 37, 512 OMP_sch_auto = 38, 513 /// static with chunk adjustment (e.g., simd) 514 OMP_sch_static_balanced_chunked = 45, 515 /// \brief Lower bound for 'ordered' versions. 516 OMP_ord_lower = 64, 517 OMP_ord_static_chunked = 65, 518 OMP_ord_static = 66, 519 OMP_ord_dynamic_chunked = 67, 520 OMP_ord_guided_chunked = 68, 521 OMP_ord_runtime = 69, 522 OMP_ord_auto = 70, 523 OMP_sch_default = OMP_sch_static, 524 /// \brief dist_schedule types 525 OMP_dist_sch_static_chunked = 91, 526 OMP_dist_sch_static = 92, 527 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 528 /// Set if the monotonic schedule modifier was present. 529 OMP_sch_modifier_monotonic = (1 << 29), 530 /// Set if the nonmonotonic schedule modifier was present. 531 OMP_sch_modifier_nonmonotonic = (1 << 30), 532 }; 533 534 enum OpenMPRTLFunction { 535 /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 536 /// kmpc_micro microtask, ...); 537 OMPRTL__kmpc_fork_call, 538 /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, 539 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 540 OMPRTL__kmpc_threadprivate_cached, 541 /// \brief Call to void __kmpc_threadprivate_register( ident_t *, 542 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 543 OMPRTL__kmpc_threadprivate_register, 544 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 545 OMPRTL__kmpc_global_thread_num, 546 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 547 // kmp_critical_name *crit); 548 OMPRTL__kmpc_critical, 549 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 550 // global_tid, kmp_critical_name *crit, uintptr_t hint); 551 OMPRTL__kmpc_critical_with_hint, 552 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 553 // kmp_critical_name *crit); 554 OMPRTL__kmpc_end_critical, 555 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 556 // global_tid); 557 OMPRTL__kmpc_cancel_barrier, 558 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 559 OMPRTL__kmpc_barrier, 560 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 561 OMPRTL__kmpc_for_static_fini, 562 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 563 // global_tid); 564 OMPRTL__kmpc_serialized_parallel, 565 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 566 // global_tid); 567 OMPRTL__kmpc_end_serialized_parallel, 568 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 569 // kmp_int32 num_threads); 570 OMPRTL__kmpc_push_num_threads, 571 // Call to void __kmpc_flush(ident_t *loc); 572 OMPRTL__kmpc_flush, 573 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 574 OMPRTL__kmpc_master, 575 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 576 OMPRTL__kmpc_end_master, 577 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 578 // int end_part); 579 OMPRTL__kmpc_omp_taskyield, 580 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 581 OMPRTL__kmpc_single, 582 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 583 OMPRTL__kmpc_end_single, 584 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 585 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 586 // kmp_routine_entry_t *task_entry); 587 OMPRTL__kmpc_omp_task_alloc, 588 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 589 // new_task); 590 OMPRTL__kmpc_omp_task, 591 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 592 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 593 // kmp_int32 didit); 594 OMPRTL__kmpc_copyprivate, 595 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 596 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 597 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 598 OMPRTL__kmpc_reduce, 599 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 600 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 601 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 602 // *lck); 603 OMPRTL__kmpc_reduce_nowait, 604 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 605 // kmp_critical_name *lck); 606 OMPRTL__kmpc_end_reduce, 607 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 608 // kmp_critical_name *lck); 609 OMPRTL__kmpc_end_reduce_nowait, 610 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 611 // kmp_task_t * new_task); 612 OMPRTL__kmpc_omp_task_begin_if0, 613 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 614 // kmp_task_t * new_task); 615 OMPRTL__kmpc_omp_task_complete_if0, 616 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 617 OMPRTL__kmpc_ordered, 618 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 619 OMPRTL__kmpc_end_ordered, 620 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 621 // global_tid); 622 OMPRTL__kmpc_omp_taskwait, 623 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 624 OMPRTL__kmpc_taskgroup, 625 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 626 OMPRTL__kmpc_end_taskgroup, 627 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 628 // int proc_bind); 629 OMPRTL__kmpc_push_proc_bind, 630 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 631 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 632 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 633 OMPRTL__kmpc_omp_task_with_deps, 634 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 635 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 636 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 637 OMPRTL__kmpc_omp_wait_deps, 638 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 639 // global_tid, kmp_int32 cncl_kind); 640 OMPRTL__kmpc_cancellationpoint, 641 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 642 // kmp_int32 cncl_kind); 643 OMPRTL__kmpc_cancel, 644 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 645 // kmp_int32 num_teams, kmp_int32 thread_limit); 646 OMPRTL__kmpc_push_num_teams, 647 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 648 // microtask, ...); 649 OMPRTL__kmpc_fork_teams, 650 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 651 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 652 // sched, kmp_uint64 grainsize, void *task_dup); 653 OMPRTL__kmpc_taskloop, 654 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 655 // num_dims, struct kmp_dim *dims); 656 OMPRTL__kmpc_doacross_init, 657 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 658 OMPRTL__kmpc_doacross_fini, 659 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 660 // *vec); 661 OMPRTL__kmpc_doacross_post, 662 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 663 // *vec); 664 OMPRTL__kmpc_doacross_wait, 665 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 666 // *data); 667 OMPRTL__kmpc_task_reduction_init, 668 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 669 // *d); 670 OMPRTL__kmpc_task_reduction_get_th_data, 671 672 // 673 // Offloading related calls 674 // 675 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 676 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 677 // *arg_types); 678 OMPRTL__tgt_target, 679 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 680 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 681 // *arg_types); 682 OMPRTL__tgt_target_nowait, 683 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 684 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 685 // *arg_types, int32_t num_teams, int32_t thread_limit); 686 OMPRTL__tgt_target_teams, 687 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 688 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 689 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 690 OMPRTL__tgt_target_teams_nowait, 691 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 692 OMPRTL__tgt_register_lib, 693 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 694 OMPRTL__tgt_unregister_lib, 695 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 696 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 697 OMPRTL__tgt_target_data_begin, 698 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 699 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 700 // *arg_types); 701 OMPRTL__tgt_target_data_begin_nowait, 702 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 703 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 704 OMPRTL__tgt_target_data_end, 705 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 706 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 707 // *arg_types); 708 OMPRTL__tgt_target_data_end_nowait, 709 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 710 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 711 OMPRTL__tgt_target_data_update, 712 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 713 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 714 // *arg_types); 715 OMPRTL__tgt_target_data_update_nowait, 716 }; 717 718 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 719 /// region. 720 class CleanupTy final : public EHScopeStack::Cleanup { 721 PrePostActionTy *Action; 722 723 public: 724 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 725 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 726 if (!CGF.HaveInsertPoint()) 727 return; 728 Action->Exit(CGF); 729 } 730 }; 731 732 } // anonymous namespace 733 734 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 735 CodeGenFunction::RunCleanupsScope Scope(CGF); 736 if (PrePostAction) { 737 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 738 Callback(CodeGen, CGF, *PrePostAction); 739 } else { 740 PrePostActionTy Action; 741 Callback(CodeGen, CGF, Action); 742 } 743 } 744 745 /// Check if the combiner is a call to UDR combiner and if it is so return the 746 /// UDR decl used for reduction. 747 static const OMPDeclareReductionDecl * 748 getReductionInit(const Expr *ReductionOp) { 749 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 750 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 751 if (auto *DRE = 752 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 753 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 754 return DRD; 755 return nullptr; 756 } 757 758 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 759 const OMPDeclareReductionDecl *DRD, 760 const Expr *InitOp, 761 Address Private, Address Original, 762 QualType Ty) { 763 if (DRD->getInitializer()) { 764 std::pair<llvm::Function *, llvm::Function *> Reduction = 765 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 766 auto *CE = cast<CallExpr>(InitOp); 767 auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 768 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 769 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 770 auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 771 auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 772 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 773 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 774 [=]() -> Address { return Private; }); 775 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 776 [=]() -> Address { return Original; }); 777 (void)PrivateScope.Privatize(); 778 RValue Func = RValue::get(Reduction.second); 779 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 780 CGF.EmitIgnoredExpr(InitOp); 781 } else { 782 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 783 auto *GV = new llvm::GlobalVariable( 784 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 785 llvm::GlobalValue::PrivateLinkage, Init, ".init"); 786 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 787 RValue InitRVal; 788 switch (CGF.getEvaluationKind(Ty)) { 789 case TEK_Scalar: 790 InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); 791 break; 792 case TEK_Complex: 793 InitRVal = 794 RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); 795 break; 796 case TEK_Aggregate: 797 InitRVal = RValue::getAggregate(LV.getAddress()); 798 break; 799 } 800 OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); 801 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 802 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 803 /*IsInitializer=*/false); 804 } 805 } 806 807 /// \brief Emit initialization of arrays of complex types. 808 /// \param DestAddr Address of the array. 809 /// \param Type Type of array. 810 /// \param Init Initial expression of array. 811 /// \param SrcAddr Address of the original array. 812 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 813 QualType Type, bool EmitDeclareReductionInit, 814 const Expr *Init, 815 const OMPDeclareReductionDecl *DRD, 816 Address SrcAddr = Address::invalid()) { 817 // Perform element-by-element initialization. 818 QualType ElementTy; 819 820 // Drill down to the base element type on both arrays. 821 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 822 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 823 DestAddr = 824 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 825 if (DRD) 826 SrcAddr = 827 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 828 829 llvm::Value *SrcBegin = nullptr; 830 if (DRD) 831 SrcBegin = SrcAddr.getPointer(); 832 auto DestBegin = DestAddr.getPointer(); 833 // Cast from pointer to array type to pointer to single element. 834 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 835 // The basic structure here is a while-do loop. 836 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 837 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 838 auto IsEmpty = 839 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 840 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 841 842 // Enter the loop body, making that address the current address. 843 auto EntryBB = CGF.Builder.GetInsertBlock(); 844 CGF.EmitBlock(BodyBB); 845 846 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 847 848 llvm::PHINode *SrcElementPHI = nullptr; 849 Address SrcElementCurrent = Address::invalid(); 850 if (DRD) { 851 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 852 "omp.arraycpy.srcElementPast"); 853 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 854 SrcElementCurrent = 855 Address(SrcElementPHI, 856 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 857 } 858 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 859 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 860 DestElementPHI->addIncoming(DestBegin, EntryBB); 861 Address DestElementCurrent = 862 Address(DestElementPHI, 863 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 864 865 // Emit copy. 866 { 867 CodeGenFunction::RunCleanupsScope InitScope(CGF); 868 if (EmitDeclareReductionInit) { 869 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 870 SrcElementCurrent, ElementTy); 871 } else 872 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 873 /*IsInitializer=*/false); 874 } 875 876 if (DRD) { 877 // Shift the address forward by one element. 878 auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( 879 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 880 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 881 } 882 883 // Shift the address forward by one element. 884 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 885 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 886 // Check whether we've reached the end. 887 auto Done = 888 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 889 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 890 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 891 892 // Done. 893 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 894 } 895 896 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 897 return CGF.EmitOMPSharedLValue(E); 898 } 899 900 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 901 const Expr *E) { 902 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 903 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 904 return LValue(); 905 } 906 907 void ReductionCodeGen::emitAggregateInitialization( 908 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 909 const OMPDeclareReductionDecl *DRD) { 910 // Emit VarDecl with copy init for arrays. 911 // Get the address of the original variable captured in current 912 // captured region. 913 auto *PrivateVD = 914 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 915 bool EmitDeclareReductionInit = 916 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 917 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 918 EmitDeclareReductionInit, 919 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 920 : PrivateVD->getInit(), 921 DRD, SharedLVal.getAddress()); 922 } 923 924 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 925 ArrayRef<const Expr *> Privates, 926 ArrayRef<const Expr *> ReductionOps) { 927 ClausesData.reserve(Shareds.size()); 928 SharedAddresses.reserve(Shareds.size()); 929 Sizes.reserve(Shareds.size()); 930 BaseDecls.reserve(Shareds.size()); 931 auto IPriv = Privates.begin(); 932 auto IRed = ReductionOps.begin(); 933 for (const auto *Ref : Shareds) { 934 ClausesData.emplace_back(Ref, *IPriv, *IRed); 935 std::advance(IPriv, 1); 936 std::advance(IRed, 1); 937 } 938 } 939 940 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 941 assert(SharedAddresses.size() == N && 942 "Number of generated lvalues must be exactly N."); 943 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 944 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 945 SharedAddresses.emplace_back(First, Second); 946 } 947 948 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 949 auto *PrivateVD = 950 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 951 QualType PrivateType = PrivateVD->getType(); 952 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 953 if (!PrivateType->isVariablyModifiedType()) { 954 Sizes.emplace_back( 955 CGF.getTypeSize( 956 SharedAddresses[N].first.getType().getNonReferenceType()), 957 nullptr); 958 return; 959 } 960 llvm::Value *Size; 961 llvm::Value *SizeInChars; 962 llvm::Type *ElemType = 963 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 964 ->getElementType(); 965 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 966 if (AsArraySection) { 967 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 968 SharedAddresses[N].first.getPointer()); 969 Size = CGF.Builder.CreateNUWAdd( 970 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 971 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 972 } else { 973 SizeInChars = CGF.getTypeSize( 974 SharedAddresses[N].first.getType().getNonReferenceType()); 975 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 976 } 977 Sizes.emplace_back(SizeInChars, Size); 978 CodeGenFunction::OpaqueValueMapping OpaqueMap( 979 CGF, 980 cast<OpaqueValueExpr>( 981 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 982 RValue::get(Size)); 983 CGF.EmitVariablyModifiedType(PrivateType); 984 } 985 986 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 987 llvm::Value *Size) { 988 auto *PrivateVD = 989 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 990 QualType PrivateType = PrivateVD->getType(); 991 if (!PrivateType->isVariablyModifiedType()) { 992 assert(!Size && !Sizes[N].second && 993 "Size should be nullptr for non-variably modified reduction " 994 "items."); 995 return; 996 } 997 CodeGenFunction::OpaqueValueMapping OpaqueMap( 998 CGF, 999 cast<OpaqueValueExpr>( 1000 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1001 RValue::get(Size)); 1002 CGF.EmitVariablyModifiedType(PrivateType); 1003 } 1004 1005 void ReductionCodeGen::emitInitialization( 1006 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1007 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1008 assert(SharedAddresses.size() > N && "No variable was generated"); 1009 auto *PrivateVD = 1010 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1011 auto *DRD = getReductionInit(ClausesData[N].ReductionOp); 1012 QualType PrivateType = PrivateVD->getType(); 1013 PrivateAddr = CGF.Builder.CreateElementBitCast( 1014 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1015 QualType SharedType = SharedAddresses[N].first.getType(); 1016 SharedLVal = CGF.MakeAddrLValue( 1017 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1018 CGF.ConvertTypeForMem(SharedType)), 1019 SharedType, SharedAddresses[N].first.getBaseInfo(), 1020 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1021 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1022 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1023 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1024 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1025 PrivateAddr, SharedLVal.getAddress(), 1026 SharedLVal.getType()); 1027 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1028 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1029 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1030 PrivateVD->getType().getQualifiers(), 1031 /*IsInitializer=*/false); 1032 } 1033 } 1034 1035 bool ReductionCodeGen::needCleanups(unsigned N) { 1036 auto *PrivateVD = 1037 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1038 QualType PrivateType = PrivateVD->getType(); 1039 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1040 return DTorKind != QualType::DK_none; 1041 } 1042 1043 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1044 Address PrivateAddr) { 1045 auto *PrivateVD = 1046 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1047 QualType PrivateType = PrivateVD->getType(); 1048 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1049 if (needCleanups(N)) { 1050 PrivateAddr = CGF.Builder.CreateElementBitCast( 1051 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1052 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1053 } 1054 } 1055 1056 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1057 LValue BaseLV) { 1058 BaseTy = BaseTy.getNonReferenceType(); 1059 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1060 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1061 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 1062 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1063 else { 1064 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1065 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1066 } 1067 BaseTy = BaseTy->getPointeeType(); 1068 } 1069 return CGF.MakeAddrLValue( 1070 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1071 CGF.ConvertTypeForMem(ElTy)), 1072 BaseLV.getType(), BaseLV.getBaseInfo(), 1073 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1074 } 1075 1076 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1077 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1078 llvm::Value *Addr) { 1079 Address Tmp = Address::invalid(); 1080 Address TopTmp = Address::invalid(); 1081 Address MostTopTmp = Address::invalid(); 1082 BaseTy = BaseTy.getNonReferenceType(); 1083 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1084 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1085 Tmp = CGF.CreateMemTemp(BaseTy); 1086 if (TopTmp.isValid()) 1087 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1088 else 1089 MostTopTmp = Tmp; 1090 TopTmp = Tmp; 1091 BaseTy = BaseTy->getPointeeType(); 1092 } 1093 llvm::Type *Ty = BaseLVType; 1094 if (Tmp.isValid()) 1095 Ty = Tmp.getElementType(); 1096 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1097 if (Tmp.isValid()) { 1098 CGF.Builder.CreateStore(Addr, Tmp); 1099 return MostTopTmp; 1100 } 1101 return Address(Addr, BaseLVAlignment); 1102 } 1103 1104 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1105 Address PrivateAddr) { 1106 const DeclRefExpr *DE; 1107 const VarDecl *OrigVD = nullptr; 1108 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) { 1109 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 1110 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1111 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1112 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1113 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1114 DE = cast<DeclRefExpr>(Base); 1115 OrigVD = cast<VarDecl>(DE->getDecl()); 1116 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) { 1117 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 1118 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1119 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1120 DE = cast<DeclRefExpr>(Base); 1121 OrigVD = cast<VarDecl>(DE->getDecl()); 1122 } 1123 if (OrigVD) { 1124 BaseDecls.emplace_back(OrigVD); 1125 auto OriginalBaseLValue = CGF.EmitLValue(DE); 1126 LValue BaseLValue = 1127 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1128 OriginalBaseLValue); 1129 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1130 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1131 llvm::Value *PrivatePointer = 1132 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1133 PrivateAddr.getPointer(), 1134 SharedAddresses[N].first.getAddress().getType()); 1135 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1136 return castToBase(CGF, OrigVD->getType(), 1137 SharedAddresses[N].first.getType(), 1138 OriginalBaseLValue.getAddress().getType(), 1139 OriginalBaseLValue.getAlignment(), Ptr); 1140 } 1141 BaseDecls.emplace_back( 1142 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1143 return PrivateAddr; 1144 } 1145 1146 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1147 auto *DRD = getReductionInit(ClausesData[N].ReductionOp); 1148 return DRD && DRD->getInitializer(); 1149 } 1150 1151 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1152 return CGF.EmitLoadOfPointerLValue( 1153 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1154 getThreadIDVariable()->getType()->castAs<PointerType>()); 1155 } 1156 1157 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1158 if (!CGF.HaveInsertPoint()) 1159 return; 1160 // 1.2.2 OpenMP Language Terminology 1161 // Structured block - An executable statement with a single entry at the 1162 // top and a single exit at the bottom. 1163 // The point of exit cannot be a branch out of the structured block. 1164 // longjmp() and throw() must not violate the entry/exit criteria. 1165 CGF.EHStack.pushTerminate(); 1166 CodeGen(CGF); 1167 CGF.EHStack.popTerminate(); 1168 } 1169 1170 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1171 CodeGenFunction &CGF) { 1172 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1173 getThreadIDVariable()->getType(), 1174 AlignmentSource::Decl); 1175 } 1176 1177 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 1178 : CGM(CGM), OffloadEntriesInfoManager(CGM) { 1179 IdentTy = llvm::StructType::create( 1180 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 1181 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 1182 CGM.Int8PtrTy /* psource */); 1183 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1184 1185 loadOffloadInfoMetadata(); 1186 } 1187 1188 void CGOpenMPRuntime::clear() { 1189 InternalVars.clear(); 1190 } 1191 1192 static llvm::Function * 1193 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1194 const Expr *CombinerInitializer, const VarDecl *In, 1195 const VarDecl *Out, bool IsCombiner) { 1196 // void .omp_combiner.(Ty *in, Ty *out); 1197 auto &C = CGM.getContext(); 1198 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1199 FunctionArgList Args; 1200 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1201 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1202 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1203 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1204 Args.push_back(&OmpOutParm); 1205 Args.push_back(&OmpInParm); 1206 auto &FnInfo = 1207 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1208 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1209 auto *Fn = llvm::Function::Create( 1210 FnTy, llvm::GlobalValue::InternalLinkage, 1211 IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); 1212 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 1213 Fn->removeFnAttr(llvm::Attribute::NoInline); 1214 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1215 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1216 CodeGenFunction CGF(CGM); 1217 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1218 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1219 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 1220 CodeGenFunction::OMPPrivateScope Scope(CGF); 1221 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1222 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { 1223 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1224 .getAddress(); 1225 }); 1226 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1227 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { 1228 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1229 .getAddress(); 1230 }); 1231 (void)Scope.Privatize(); 1232 if (!IsCombiner && Out->hasInit() && 1233 !CGF.isTrivialInitializer(Out->getInit())) { 1234 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1235 Out->getType().getQualifiers(), 1236 /*IsInitializer=*/true); 1237 } 1238 if (CombinerInitializer) 1239 CGF.EmitIgnoredExpr(CombinerInitializer); 1240 Scope.ForceCleanup(); 1241 CGF.FinishFunction(); 1242 return Fn; 1243 } 1244 1245 void CGOpenMPRuntime::emitUserDefinedReduction( 1246 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1247 if (UDRMap.count(D) > 0) 1248 return; 1249 auto &C = CGM.getContext(); 1250 if (!In || !Out) { 1251 In = &C.Idents.get("omp_in"); 1252 Out = &C.Idents.get("omp_out"); 1253 } 1254 llvm::Function *Combiner = emitCombinerOrInitializer( 1255 CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), 1256 cast<VarDecl>(D->lookup(Out).front()), 1257 /*IsCombiner=*/true); 1258 llvm::Function *Initializer = nullptr; 1259 if (auto *Init = D->getInitializer()) { 1260 if (!Priv || !Orig) { 1261 Priv = &C.Idents.get("omp_priv"); 1262 Orig = &C.Idents.get("omp_orig"); 1263 } 1264 Initializer = emitCombinerOrInitializer( 1265 CGM, D->getType(), 1266 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1267 : nullptr, 1268 cast<VarDecl>(D->lookup(Orig).front()), 1269 cast<VarDecl>(D->lookup(Priv).front()), 1270 /*IsCombiner=*/false); 1271 } 1272 UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); 1273 if (CGF) { 1274 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1275 Decls.second.push_back(D); 1276 } 1277 } 1278 1279 std::pair<llvm::Function *, llvm::Function *> 1280 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1281 auto I = UDRMap.find(D); 1282 if (I != UDRMap.end()) 1283 return I->second; 1284 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1285 return UDRMap.lookup(D); 1286 } 1287 1288 // Layout information for ident_t. 1289 static CharUnits getIdentAlign(CodeGenModule &CGM) { 1290 return CGM.getPointerAlign(); 1291 } 1292 static CharUnits getIdentSize(CodeGenModule &CGM) { 1293 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 1294 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 1295 } 1296 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { 1297 // All the fields except the last are i32, so this works beautifully. 1298 return unsigned(Field) * CharUnits::fromQuantity(4); 1299 } 1300 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 1301 IdentFieldIndex Field, 1302 const llvm::Twine &Name = "") { 1303 auto Offset = getOffsetOfIdentField(Field); 1304 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 1305 } 1306 1307 static llvm::Value *emitParallelOrTeamsOutlinedFunction( 1308 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1309 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1310 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1311 assert(ThreadIDVar->getType()->isPointerType() && 1312 "thread id variable must be of type kmp_int32 *"); 1313 CodeGenFunction CGF(CGM, true); 1314 bool HasCancel = false; 1315 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1316 HasCancel = OPD->hasCancel(); 1317 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1318 HasCancel = OPSD->hasCancel(); 1319 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1320 HasCancel = OPFD->hasCancel(); 1321 else if (auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1322 HasCancel = OPFD->hasCancel(); 1323 else if (auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1324 HasCancel = OPFD->hasCancel(); 1325 else if (auto *OPFD = dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1326 HasCancel = OPFD->hasCancel(); 1327 else if (auto *OPFD = 1328 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1329 HasCancel = OPFD->hasCancel(); 1330 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1331 HasCancel, OutlinedHelperName); 1332 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1333 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1334 } 1335 1336 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 1337 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1338 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1339 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1340 return emitParallelOrTeamsOutlinedFunction( 1341 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1342 } 1343 1344 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1345 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1346 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1347 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1348 return emitParallelOrTeamsOutlinedFunction( 1349 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1350 } 1351 1352 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 1353 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1354 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1355 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1356 bool Tied, unsigned &NumberOfParts) { 1357 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1358 PrePostActionTy &) { 1359 auto *ThreadID = getThreadID(CGF, D.getLocStart()); 1360 auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); 1361 llvm::Value *TaskArgs[] = { 1362 UpLoc, ThreadID, 1363 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1364 TaskTVar->getType()->castAs<PointerType>()) 1365 .getPointer()}; 1366 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1367 }; 1368 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1369 UntiedCodeGen); 1370 CodeGen.setAction(Action); 1371 assert(!ThreadIDVar->getType()->isPointerType() && 1372 "thread id variable must be of type kmp_int32 for tasks"); 1373 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 1374 auto *TD = dyn_cast<OMPTaskDirective>(&D); 1375 CodeGenFunction CGF(CGM, true); 1376 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1377 InnermostKind, 1378 TD ? TD->hasCancel() : false, Action); 1379 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1380 auto *Res = CGF.GenerateCapturedStmtFunction(*CS); 1381 if (!Tied) 1382 NumberOfParts = Action.getNumberOfParts(); 1383 return Res; 1384 } 1385 1386 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1387 CharUnits Align = getIdentAlign(CGM); 1388 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 1389 if (!Entry) { 1390 if (!DefaultOpenMPPSource) { 1391 // Initialize default location for psource field of ident_t structure of 1392 // all ident_t objects. Format is ";file;function;line;column;;". 1393 // Taken from 1394 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 1395 DefaultOpenMPPSource = 1396 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1397 DefaultOpenMPPSource = 1398 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1399 } 1400 1401 ConstantInitBuilder builder(CGM); 1402 auto fields = builder.beginStruct(IdentTy); 1403 fields.addInt(CGM.Int32Ty, 0); 1404 fields.addInt(CGM.Int32Ty, Flags); 1405 fields.addInt(CGM.Int32Ty, 0); 1406 fields.addInt(CGM.Int32Ty, 0); 1407 fields.add(DefaultOpenMPPSource); 1408 auto DefaultOpenMPLocation = 1409 fields.finishAndCreateGlobal("", Align, /*isConstant*/ true, 1410 llvm::GlobalValue::PrivateLinkage); 1411 DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 1412 1413 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 1414 } 1415 return Address(Entry, Align); 1416 } 1417 1418 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1419 SourceLocation Loc, 1420 unsigned Flags) { 1421 Flags |= OMP_IDENT_KMPC; 1422 // If no debug info is generated - return global default location. 1423 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1424 Loc.isInvalid()) 1425 return getOrCreateDefaultLocation(Flags).getPointer(); 1426 1427 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1428 1429 Address LocValue = Address::invalid(); 1430 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1431 if (I != OpenMPLocThreadIDMap.end()) 1432 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 1433 1434 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1435 // GetOpenMPThreadID was called before this routine. 1436 if (!LocValue.isValid()) { 1437 // Generate "ident_t .kmpc_loc.addr;" 1438 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 1439 ".kmpc_loc.addr"); 1440 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1441 Elem.second.DebugLoc = AI.getPointer(); 1442 LocValue = AI; 1443 1444 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1445 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1446 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1447 CGM.getSize(getIdentSize(CGF.CGM))); 1448 } 1449 1450 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1451 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 1452 1453 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1454 if (OMPDebugLoc == nullptr) { 1455 SmallString<128> Buffer2; 1456 llvm::raw_svector_ostream OS2(Buffer2); 1457 // Build debug location 1458 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1459 OS2 << ";" << PLoc.getFilename() << ";"; 1460 if (const FunctionDecl *FD = 1461 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 1462 OS2 << FD->getQualifiedNameAsString(); 1463 } 1464 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1465 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1466 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1467 } 1468 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1469 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 1470 1471 // Our callers always pass this to a runtime function, so for 1472 // convenience, go ahead and return a naked pointer. 1473 return LocValue.getPointer(); 1474 } 1475 1476 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1477 SourceLocation Loc) { 1478 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1479 1480 llvm::Value *ThreadID = nullptr; 1481 // Check whether we've already cached a load of the thread id in this 1482 // function. 1483 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1484 if (I != OpenMPLocThreadIDMap.end()) { 1485 ThreadID = I->second.ThreadID; 1486 if (ThreadID != nullptr) 1487 return ThreadID; 1488 } 1489 // If exceptions are enabled, do not use parameter to avoid possible crash. 1490 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1491 !CGF.getLangOpts().CXXExceptions || 1492 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1493 if (auto *OMPRegionInfo = 1494 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1495 if (OMPRegionInfo->getThreadIDVariable()) { 1496 // Check if this an outlined function with thread id passed as argument. 1497 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1498 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 1499 // If value loaded in entry block, cache it and use it everywhere in 1500 // function. 1501 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1502 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1503 Elem.second.ThreadID = ThreadID; 1504 } 1505 return ThreadID; 1506 } 1507 } 1508 } 1509 1510 // This is not an outlined function region - need to call __kmpc_int32 1511 // kmpc_global_thread_num(ident_t *loc). 1512 // Generate thread id value and cache this value for use across the 1513 // function. 1514 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1515 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1516 auto *Call = CGF.Builder.CreateCall( 1517 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1518 emitUpdateLocation(CGF, Loc)); 1519 Call->setCallingConv(CGF.getRuntimeCC()); 1520 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1521 Elem.second.ThreadID = Call; 1522 return Call; 1523 } 1524 1525 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1526 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1527 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 1528 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1529 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1530 for(auto *D : FunctionUDRMap[CGF.CurFn]) { 1531 UDRMap.erase(D); 1532 } 1533 FunctionUDRMap.erase(CGF.CurFn); 1534 } 1535 } 1536 1537 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1538 if (!IdentTy) { 1539 } 1540 return llvm::PointerType::getUnqual(IdentTy); 1541 } 1542 1543 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1544 if (!Kmpc_MicroTy) { 1545 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1546 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1547 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1548 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1549 } 1550 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1551 } 1552 1553 llvm::Constant * 1554 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1555 llvm::Constant *RTLFn = nullptr; 1556 switch (static_cast<OpenMPRTLFunction>(Function)) { 1557 case OMPRTL__kmpc_fork_call: { 1558 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1559 // microtask, ...); 1560 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1561 getKmpc_MicroPointerTy()}; 1562 llvm::FunctionType *FnTy = 1563 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1564 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1565 break; 1566 } 1567 case OMPRTL__kmpc_global_thread_num: { 1568 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1569 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1570 llvm::FunctionType *FnTy = 1571 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1572 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1573 break; 1574 } 1575 case OMPRTL__kmpc_threadprivate_cached: { 1576 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1577 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1578 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1579 CGM.VoidPtrTy, CGM.SizeTy, 1580 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1581 llvm::FunctionType *FnTy = 1582 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1583 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1584 break; 1585 } 1586 case OMPRTL__kmpc_critical: { 1587 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1588 // kmp_critical_name *crit); 1589 llvm::Type *TypeParams[] = { 1590 getIdentTyPointerTy(), CGM.Int32Ty, 1591 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1592 llvm::FunctionType *FnTy = 1593 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1594 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1595 break; 1596 } 1597 case OMPRTL__kmpc_critical_with_hint: { 1598 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1599 // kmp_critical_name *crit, uintptr_t hint); 1600 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1601 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1602 CGM.IntPtrTy}; 1603 llvm::FunctionType *FnTy = 1604 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1605 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1606 break; 1607 } 1608 case OMPRTL__kmpc_threadprivate_register: { 1609 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1610 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1611 // typedef void *(*kmpc_ctor)(void *); 1612 auto KmpcCtorTy = 1613 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1614 /*isVarArg*/ false)->getPointerTo(); 1615 // typedef void *(*kmpc_cctor)(void *, void *); 1616 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1617 auto KmpcCopyCtorTy = 1618 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1619 /*isVarArg*/ false)->getPointerTo(); 1620 // typedef void (*kmpc_dtor)(void *); 1621 auto KmpcDtorTy = 1622 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1623 ->getPointerTo(); 1624 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1625 KmpcCopyCtorTy, KmpcDtorTy}; 1626 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1627 /*isVarArg*/ false); 1628 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1629 break; 1630 } 1631 case OMPRTL__kmpc_end_critical: { 1632 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1633 // kmp_critical_name *crit); 1634 llvm::Type *TypeParams[] = { 1635 getIdentTyPointerTy(), CGM.Int32Ty, 1636 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1637 llvm::FunctionType *FnTy = 1638 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1639 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1640 break; 1641 } 1642 case OMPRTL__kmpc_cancel_barrier: { 1643 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1644 // global_tid); 1645 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1646 llvm::FunctionType *FnTy = 1647 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1648 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1649 break; 1650 } 1651 case OMPRTL__kmpc_barrier: { 1652 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1653 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1654 llvm::FunctionType *FnTy = 1655 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1656 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1657 break; 1658 } 1659 case OMPRTL__kmpc_for_static_fini: { 1660 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1661 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1662 llvm::FunctionType *FnTy = 1663 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1664 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1665 break; 1666 } 1667 case OMPRTL__kmpc_push_num_threads: { 1668 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1669 // kmp_int32 num_threads) 1670 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1671 CGM.Int32Ty}; 1672 llvm::FunctionType *FnTy = 1673 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1674 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1675 break; 1676 } 1677 case OMPRTL__kmpc_serialized_parallel: { 1678 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1679 // global_tid); 1680 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1681 llvm::FunctionType *FnTy = 1682 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1683 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1684 break; 1685 } 1686 case OMPRTL__kmpc_end_serialized_parallel: { 1687 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1688 // global_tid); 1689 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1690 llvm::FunctionType *FnTy = 1691 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1692 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1693 break; 1694 } 1695 case OMPRTL__kmpc_flush: { 1696 // Build void __kmpc_flush(ident_t *loc); 1697 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1698 llvm::FunctionType *FnTy = 1699 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1700 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1701 break; 1702 } 1703 case OMPRTL__kmpc_master: { 1704 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1705 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1706 llvm::FunctionType *FnTy = 1707 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1708 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1709 break; 1710 } 1711 case OMPRTL__kmpc_end_master: { 1712 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1713 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1714 llvm::FunctionType *FnTy = 1715 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1716 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1717 break; 1718 } 1719 case OMPRTL__kmpc_omp_taskyield: { 1720 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1721 // int end_part); 1722 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1723 llvm::FunctionType *FnTy = 1724 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1725 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1726 break; 1727 } 1728 case OMPRTL__kmpc_single: { 1729 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1730 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1731 llvm::FunctionType *FnTy = 1732 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1733 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1734 break; 1735 } 1736 case OMPRTL__kmpc_end_single: { 1737 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1738 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1739 llvm::FunctionType *FnTy = 1740 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1741 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1742 break; 1743 } 1744 case OMPRTL__kmpc_omp_task_alloc: { 1745 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1746 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1747 // kmp_routine_entry_t *task_entry); 1748 assert(KmpRoutineEntryPtrTy != nullptr && 1749 "Type kmp_routine_entry_t must be created."); 1750 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1751 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1752 // Return void * and then cast to particular kmp_task_t type. 1753 llvm::FunctionType *FnTy = 1754 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1755 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1756 break; 1757 } 1758 case OMPRTL__kmpc_omp_task: { 1759 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1760 // *new_task); 1761 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1762 CGM.VoidPtrTy}; 1763 llvm::FunctionType *FnTy = 1764 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1765 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1766 break; 1767 } 1768 case OMPRTL__kmpc_copyprivate: { 1769 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1770 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1771 // kmp_int32 didit); 1772 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1773 auto *CpyFnTy = 1774 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1775 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1776 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1777 CGM.Int32Ty}; 1778 llvm::FunctionType *FnTy = 1779 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1780 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1781 break; 1782 } 1783 case OMPRTL__kmpc_reduce: { 1784 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1785 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1786 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1787 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1788 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1789 /*isVarArg=*/false); 1790 llvm::Type *TypeParams[] = { 1791 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1792 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1793 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1794 llvm::FunctionType *FnTy = 1795 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1796 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1797 break; 1798 } 1799 case OMPRTL__kmpc_reduce_nowait: { 1800 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1801 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1802 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1803 // *lck); 1804 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1805 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1806 /*isVarArg=*/false); 1807 llvm::Type *TypeParams[] = { 1808 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1809 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1810 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1811 llvm::FunctionType *FnTy = 1812 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1813 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1814 break; 1815 } 1816 case OMPRTL__kmpc_end_reduce: { 1817 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1818 // kmp_critical_name *lck); 1819 llvm::Type *TypeParams[] = { 1820 getIdentTyPointerTy(), CGM.Int32Ty, 1821 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1822 llvm::FunctionType *FnTy = 1823 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1824 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1825 break; 1826 } 1827 case OMPRTL__kmpc_end_reduce_nowait: { 1828 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1829 // kmp_critical_name *lck); 1830 llvm::Type *TypeParams[] = { 1831 getIdentTyPointerTy(), CGM.Int32Ty, 1832 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1833 llvm::FunctionType *FnTy = 1834 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1835 RTLFn = 1836 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1837 break; 1838 } 1839 case OMPRTL__kmpc_omp_task_begin_if0: { 1840 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1841 // *new_task); 1842 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1843 CGM.VoidPtrTy}; 1844 llvm::FunctionType *FnTy = 1845 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1846 RTLFn = 1847 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1848 break; 1849 } 1850 case OMPRTL__kmpc_omp_task_complete_if0: { 1851 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1852 // *new_task); 1853 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1854 CGM.VoidPtrTy}; 1855 llvm::FunctionType *FnTy = 1856 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1857 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1858 /*Name=*/"__kmpc_omp_task_complete_if0"); 1859 break; 1860 } 1861 case OMPRTL__kmpc_ordered: { 1862 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1863 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1864 llvm::FunctionType *FnTy = 1865 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1866 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1867 break; 1868 } 1869 case OMPRTL__kmpc_end_ordered: { 1870 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 1871 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1872 llvm::FunctionType *FnTy = 1873 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1874 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 1875 break; 1876 } 1877 case OMPRTL__kmpc_omp_taskwait: { 1878 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 1879 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1880 llvm::FunctionType *FnTy = 1881 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1882 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 1883 break; 1884 } 1885 case OMPRTL__kmpc_taskgroup: { 1886 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 1887 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1888 llvm::FunctionType *FnTy = 1889 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1890 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 1891 break; 1892 } 1893 case OMPRTL__kmpc_end_taskgroup: { 1894 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 1895 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1896 llvm::FunctionType *FnTy = 1897 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1898 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 1899 break; 1900 } 1901 case OMPRTL__kmpc_push_proc_bind: { 1902 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 1903 // int proc_bind) 1904 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1905 llvm::FunctionType *FnTy = 1906 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1907 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 1908 break; 1909 } 1910 case OMPRTL__kmpc_omp_task_with_deps: { 1911 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 1912 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 1913 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 1914 llvm::Type *TypeParams[] = { 1915 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 1916 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 1917 llvm::FunctionType *FnTy = 1918 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1919 RTLFn = 1920 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 1921 break; 1922 } 1923 case OMPRTL__kmpc_omp_wait_deps: { 1924 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 1925 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 1926 // kmp_depend_info_t *noalias_dep_list); 1927 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1928 CGM.Int32Ty, CGM.VoidPtrTy, 1929 CGM.Int32Ty, CGM.VoidPtrTy}; 1930 llvm::FunctionType *FnTy = 1931 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1932 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 1933 break; 1934 } 1935 case OMPRTL__kmpc_cancellationpoint: { 1936 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 1937 // global_tid, kmp_int32 cncl_kind) 1938 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1939 llvm::FunctionType *FnTy = 1940 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1941 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 1942 break; 1943 } 1944 case OMPRTL__kmpc_cancel: { 1945 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 1946 // kmp_int32 cncl_kind) 1947 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1948 llvm::FunctionType *FnTy = 1949 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1950 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 1951 break; 1952 } 1953 case OMPRTL__kmpc_push_num_teams: { 1954 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 1955 // kmp_int32 num_teams, kmp_int32 num_threads) 1956 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1957 CGM.Int32Ty}; 1958 llvm::FunctionType *FnTy = 1959 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1960 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 1961 break; 1962 } 1963 case OMPRTL__kmpc_fork_teams: { 1964 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 1965 // microtask, ...); 1966 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1967 getKmpc_MicroPointerTy()}; 1968 llvm::FunctionType *FnTy = 1969 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1970 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 1971 break; 1972 } 1973 case OMPRTL__kmpc_taskloop: { 1974 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 1975 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 1976 // sched, kmp_uint64 grainsize, void *task_dup); 1977 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1978 CGM.IntTy, 1979 CGM.VoidPtrTy, 1980 CGM.IntTy, 1981 CGM.Int64Ty->getPointerTo(), 1982 CGM.Int64Ty->getPointerTo(), 1983 CGM.Int64Ty, 1984 CGM.IntTy, 1985 CGM.IntTy, 1986 CGM.Int64Ty, 1987 CGM.VoidPtrTy}; 1988 llvm::FunctionType *FnTy = 1989 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1990 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 1991 break; 1992 } 1993 case OMPRTL__kmpc_doacross_init: { 1994 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 1995 // num_dims, struct kmp_dim *dims); 1996 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1997 CGM.Int32Ty, 1998 CGM.Int32Ty, 1999 CGM.VoidPtrTy}; 2000 llvm::FunctionType *FnTy = 2001 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2002 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2003 break; 2004 } 2005 case OMPRTL__kmpc_doacross_fini: { 2006 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2007 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2008 llvm::FunctionType *FnTy = 2009 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2010 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2011 break; 2012 } 2013 case OMPRTL__kmpc_doacross_post: { 2014 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2015 // *vec); 2016 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2017 CGM.Int64Ty->getPointerTo()}; 2018 llvm::FunctionType *FnTy = 2019 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2020 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2021 break; 2022 } 2023 case OMPRTL__kmpc_doacross_wait: { 2024 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2025 // *vec); 2026 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2027 CGM.Int64Ty->getPointerTo()}; 2028 llvm::FunctionType *FnTy = 2029 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2030 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2031 break; 2032 } 2033 case OMPRTL__kmpc_task_reduction_init: { 2034 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2035 // *data); 2036 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2037 llvm::FunctionType *FnTy = 2038 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2039 RTLFn = 2040 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2041 break; 2042 } 2043 case OMPRTL__kmpc_task_reduction_get_th_data: { 2044 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2045 // *d); 2046 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2047 llvm::FunctionType *FnTy = 2048 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2049 RTLFn = CGM.CreateRuntimeFunction( 2050 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2051 break; 2052 } 2053 case OMPRTL__tgt_target: { 2054 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2055 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2056 // *arg_types); 2057 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2058 CGM.VoidPtrTy, 2059 CGM.Int32Ty, 2060 CGM.VoidPtrPtrTy, 2061 CGM.VoidPtrPtrTy, 2062 CGM.SizeTy->getPointerTo(), 2063 CGM.Int64Ty->getPointerTo()}; 2064 llvm::FunctionType *FnTy = 2065 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2066 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2067 break; 2068 } 2069 case OMPRTL__tgt_target_nowait: { 2070 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2071 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2072 // int64_t *arg_types); 2073 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2074 CGM.VoidPtrTy, 2075 CGM.Int32Ty, 2076 CGM.VoidPtrPtrTy, 2077 CGM.VoidPtrPtrTy, 2078 CGM.SizeTy->getPointerTo(), 2079 CGM.Int64Ty->getPointerTo()}; 2080 llvm::FunctionType *FnTy = 2081 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2082 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2083 break; 2084 } 2085 case OMPRTL__tgt_target_teams: { 2086 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2087 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2088 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2089 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2090 CGM.VoidPtrTy, 2091 CGM.Int32Ty, 2092 CGM.VoidPtrPtrTy, 2093 CGM.VoidPtrPtrTy, 2094 CGM.SizeTy->getPointerTo(), 2095 CGM.Int64Ty->getPointerTo(), 2096 CGM.Int32Ty, 2097 CGM.Int32Ty}; 2098 llvm::FunctionType *FnTy = 2099 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2100 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2101 break; 2102 } 2103 case OMPRTL__tgt_target_teams_nowait: { 2104 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2105 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 2106 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2107 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2108 CGM.VoidPtrTy, 2109 CGM.Int32Ty, 2110 CGM.VoidPtrPtrTy, 2111 CGM.VoidPtrPtrTy, 2112 CGM.SizeTy->getPointerTo(), 2113 CGM.Int64Ty->getPointerTo(), 2114 CGM.Int32Ty, 2115 CGM.Int32Ty}; 2116 llvm::FunctionType *FnTy = 2117 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2118 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2119 break; 2120 } 2121 case OMPRTL__tgt_register_lib: { 2122 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2123 QualType ParamTy = 2124 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2125 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2126 llvm::FunctionType *FnTy = 2127 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2128 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2129 break; 2130 } 2131 case OMPRTL__tgt_unregister_lib: { 2132 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2133 QualType ParamTy = 2134 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2135 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2136 llvm::FunctionType *FnTy = 2137 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2138 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2139 break; 2140 } 2141 case OMPRTL__tgt_target_data_begin: { 2142 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2143 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2144 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2145 CGM.Int32Ty, 2146 CGM.VoidPtrPtrTy, 2147 CGM.VoidPtrPtrTy, 2148 CGM.SizeTy->getPointerTo(), 2149 CGM.Int64Ty->getPointerTo()}; 2150 llvm::FunctionType *FnTy = 2151 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2152 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2153 break; 2154 } 2155 case OMPRTL__tgt_target_data_begin_nowait: { 2156 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2157 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2158 // *arg_types); 2159 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2160 CGM.Int32Ty, 2161 CGM.VoidPtrPtrTy, 2162 CGM.VoidPtrPtrTy, 2163 CGM.SizeTy->getPointerTo(), 2164 CGM.Int64Ty->getPointerTo()}; 2165 auto *FnTy = 2166 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2167 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2168 break; 2169 } 2170 case OMPRTL__tgt_target_data_end: { 2171 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2172 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2173 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2174 CGM.Int32Ty, 2175 CGM.VoidPtrPtrTy, 2176 CGM.VoidPtrPtrTy, 2177 CGM.SizeTy->getPointerTo(), 2178 CGM.Int64Ty->getPointerTo()}; 2179 llvm::FunctionType *FnTy = 2180 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2181 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2182 break; 2183 } 2184 case OMPRTL__tgt_target_data_end_nowait: { 2185 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2186 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2187 // *arg_types); 2188 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2189 CGM.Int32Ty, 2190 CGM.VoidPtrPtrTy, 2191 CGM.VoidPtrPtrTy, 2192 CGM.SizeTy->getPointerTo(), 2193 CGM.Int64Ty->getPointerTo()}; 2194 auto *FnTy = 2195 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2196 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2197 break; 2198 } 2199 case OMPRTL__tgt_target_data_update: { 2200 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2201 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2202 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2203 CGM.Int32Ty, 2204 CGM.VoidPtrPtrTy, 2205 CGM.VoidPtrPtrTy, 2206 CGM.SizeTy->getPointerTo(), 2207 CGM.Int64Ty->getPointerTo()}; 2208 llvm::FunctionType *FnTy = 2209 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2210 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2211 break; 2212 } 2213 case OMPRTL__tgt_target_data_update_nowait: { 2214 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2215 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2216 // *arg_types); 2217 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2218 CGM.Int32Ty, 2219 CGM.VoidPtrPtrTy, 2220 CGM.VoidPtrPtrTy, 2221 CGM.SizeTy->getPointerTo(), 2222 CGM.Int64Ty->getPointerTo()}; 2223 auto *FnTy = 2224 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2225 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2226 break; 2227 } 2228 } 2229 assert(RTLFn && "Unable to find OpenMP runtime function"); 2230 return RTLFn; 2231 } 2232 2233 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 2234 bool IVSigned) { 2235 assert((IVSize == 32 || IVSize == 64) && 2236 "IV size is not compatible with the omp runtime"); 2237 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2238 : "__kmpc_for_static_init_4u") 2239 : (IVSigned ? "__kmpc_for_static_init_8" 2240 : "__kmpc_for_static_init_8u"); 2241 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2242 auto PtrTy = llvm::PointerType::getUnqual(ITy); 2243 llvm::Type *TypeParams[] = { 2244 getIdentTyPointerTy(), // loc 2245 CGM.Int32Ty, // tid 2246 CGM.Int32Ty, // schedtype 2247 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2248 PtrTy, // p_lower 2249 PtrTy, // p_upper 2250 PtrTy, // p_stride 2251 ITy, // incr 2252 ITy // chunk 2253 }; 2254 llvm::FunctionType *FnTy = 2255 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2256 return CGM.CreateRuntimeFunction(FnTy, Name); 2257 } 2258 2259 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 2260 bool IVSigned) { 2261 assert((IVSize == 32 || IVSize == 64) && 2262 "IV size is not compatible with the omp runtime"); 2263 auto Name = 2264 IVSize == 32 2265 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2266 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2267 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2268 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2269 CGM.Int32Ty, // tid 2270 CGM.Int32Ty, // schedtype 2271 ITy, // lower 2272 ITy, // upper 2273 ITy, // stride 2274 ITy // chunk 2275 }; 2276 llvm::FunctionType *FnTy = 2277 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2278 return CGM.CreateRuntimeFunction(FnTy, Name); 2279 } 2280 2281 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 2282 bool IVSigned) { 2283 assert((IVSize == 32 || IVSize == 64) && 2284 "IV size is not compatible with the omp runtime"); 2285 auto Name = 2286 IVSize == 32 2287 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2288 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2289 llvm::Type *TypeParams[] = { 2290 getIdentTyPointerTy(), // loc 2291 CGM.Int32Ty, // tid 2292 }; 2293 llvm::FunctionType *FnTy = 2294 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2295 return CGM.CreateRuntimeFunction(FnTy, Name); 2296 } 2297 2298 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 2299 bool IVSigned) { 2300 assert((IVSize == 32 || IVSize == 64) && 2301 "IV size is not compatible with the omp runtime"); 2302 auto Name = 2303 IVSize == 32 2304 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2305 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2306 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2307 auto PtrTy = llvm::PointerType::getUnqual(ITy); 2308 llvm::Type *TypeParams[] = { 2309 getIdentTyPointerTy(), // loc 2310 CGM.Int32Ty, // tid 2311 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2312 PtrTy, // p_lower 2313 PtrTy, // p_upper 2314 PtrTy // p_stride 2315 }; 2316 llvm::FunctionType *FnTy = 2317 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2318 return CGM.CreateRuntimeFunction(FnTy, Name); 2319 } 2320 2321 llvm::Constant * 2322 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2323 assert(!CGM.getLangOpts().OpenMPUseTLS || 2324 !CGM.getContext().getTargetInfo().isTLSSupported()); 2325 // Lookup the entry, lazily creating it if necessary. 2326 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 2327 Twine(CGM.getMangledName(VD)) + ".cache."); 2328 } 2329 2330 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2331 const VarDecl *VD, 2332 Address VDAddr, 2333 SourceLocation Loc) { 2334 if (CGM.getLangOpts().OpenMPUseTLS && 2335 CGM.getContext().getTargetInfo().isTLSSupported()) 2336 return VDAddr; 2337 2338 auto VarTy = VDAddr.getElementType(); 2339 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2340 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2341 CGM.Int8PtrTy), 2342 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2343 getOrCreateThreadPrivateCache(VD)}; 2344 return Address(CGF.EmitRuntimeCall( 2345 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2346 VDAddr.getAlignment()); 2347 } 2348 2349 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2350 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2351 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2352 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2353 // library. 2354 auto OMPLoc = emitUpdateLocation(CGF, Loc); 2355 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2356 OMPLoc); 2357 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2358 // to register constructor/destructor for variable. 2359 llvm::Value *Args[] = {OMPLoc, 2360 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2361 CGM.VoidPtrTy), 2362 Ctor, CopyCtor, Dtor}; 2363 CGF.EmitRuntimeCall( 2364 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2365 } 2366 2367 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2368 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2369 bool PerformInit, CodeGenFunction *CGF) { 2370 if (CGM.getLangOpts().OpenMPUseTLS && 2371 CGM.getContext().getTargetInfo().isTLSSupported()) 2372 return nullptr; 2373 2374 VD = VD->getDefinition(CGM.getContext()); 2375 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 2376 ThreadPrivateWithDefinition.insert(VD); 2377 QualType ASTTy = VD->getType(); 2378 2379 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2380 auto Init = VD->getAnyInitializer(); 2381 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2382 // Generate function that re-emits the declaration's initializer into the 2383 // threadprivate copy of the variable VD 2384 CodeGenFunction CtorCGF(CGM); 2385 FunctionArgList Args; 2386 ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy, 2387 ImplicitParamDecl::Other); 2388 Args.push_back(&Dst); 2389 2390 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2391 CGM.getContext().VoidPtrTy, Args); 2392 auto FTy = CGM.getTypes().GetFunctionType(FI); 2393 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 2394 FTy, ".__kmpc_global_ctor_.", FI, Loc); 2395 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2396 Args, SourceLocation()); 2397 auto ArgVal = CtorCGF.EmitLoadOfScalar( 2398 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2399 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2400 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2401 Arg = CtorCGF.Builder.CreateElementBitCast(Arg, 2402 CtorCGF.ConvertTypeForMem(ASTTy)); 2403 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2404 /*IsInitializer=*/true); 2405 ArgVal = CtorCGF.EmitLoadOfScalar( 2406 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2407 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2408 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2409 CtorCGF.FinishFunction(); 2410 Ctor = Fn; 2411 } 2412 if (VD->getType().isDestructedType() != QualType::DK_none) { 2413 // Generate function that emits destructor call for the threadprivate copy 2414 // of the variable VD 2415 CodeGenFunction DtorCGF(CGM); 2416 FunctionArgList Args; 2417 ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy, 2418 ImplicitParamDecl::Other); 2419 Args.push_back(&Dst); 2420 2421 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2422 CGM.getContext().VoidTy, Args); 2423 auto FTy = CGM.getTypes().GetFunctionType(FI); 2424 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 2425 FTy, ".__kmpc_global_dtor_.", FI, Loc); 2426 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2427 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2428 SourceLocation()); 2429 // Create a scope with an artificial location for the body of this function. 2430 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2431 auto ArgVal = DtorCGF.EmitLoadOfScalar( 2432 DtorCGF.GetAddrOfLocalVar(&Dst), 2433 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2434 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2435 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2436 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2437 DtorCGF.FinishFunction(); 2438 Dtor = Fn; 2439 } 2440 // Do not emit init function if it is not required. 2441 if (!Ctor && !Dtor) 2442 return nullptr; 2443 2444 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2445 auto CopyCtorTy = 2446 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2447 /*isVarArg=*/false)->getPointerTo(); 2448 // Copying constructor for the threadprivate variable. 2449 // Must be NULL - reserved by runtime, but currently it requires that this 2450 // parameter is always NULL. Otherwise it fires assertion. 2451 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2452 if (Ctor == nullptr) { 2453 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2454 /*isVarArg=*/false)->getPointerTo(); 2455 Ctor = llvm::Constant::getNullValue(CtorTy); 2456 } 2457 if (Dtor == nullptr) { 2458 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2459 /*isVarArg=*/false)->getPointerTo(); 2460 Dtor = llvm::Constant::getNullValue(DtorTy); 2461 } 2462 if (!CGF) { 2463 auto InitFunctionTy = 2464 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2465 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2466 InitFunctionTy, ".__omp_threadprivate_init_.", 2467 CGM.getTypes().arrangeNullaryFunction()); 2468 CodeGenFunction InitCGF(CGM); 2469 FunctionArgList ArgList; 2470 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2471 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2472 Loc); 2473 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2474 InitCGF.FinishFunction(); 2475 return InitFunction; 2476 } 2477 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2478 } 2479 return nullptr; 2480 } 2481 2482 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2483 QualType VarType, 2484 StringRef Name) { 2485 llvm::Twine VarName(Name, ".artificial."); 2486 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2487 llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName); 2488 llvm::Value *Args[] = { 2489 emitUpdateLocation(CGF, SourceLocation()), 2490 getThreadID(CGF, SourceLocation()), 2491 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2492 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2493 /*IsSigned=*/false), 2494 getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")}; 2495 return Address( 2496 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2497 CGF.EmitRuntimeCall( 2498 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2499 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2500 CGM.getPointerAlign()); 2501 } 2502 2503 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 2504 /// function. Here is the logic: 2505 /// if (Cond) { 2506 /// ThenGen(); 2507 /// } else { 2508 /// ElseGen(); 2509 /// } 2510 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2511 const RegionCodeGenTy &ThenGen, 2512 const RegionCodeGenTy &ElseGen) { 2513 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2514 2515 // If the condition constant folds and can be elided, try to avoid emitting 2516 // the condition and the dead arm of the if/else. 2517 bool CondConstant; 2518 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2519 if (CondConstant) 2520 ThenGen(CGF); 2521 else 2522 ElseGen(CGF); 2523 return; 2524 } 2525 2526 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2527 // emit the conditional branch. 2528 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 2529 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 2530 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 2531 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2532 2533 // Emit the 'then' code. 2534 CGF.EmitBlock(ThenBlock); 2535 ThenGen(CGF); 2536 CGF.EmitBranch(ContBlock); 2537 // Emit the 'else' code if present. 2538 // There is no need to emit line number for unconditional branch. 2539 (void)ApplyDebugLocation::CreateEmpty(CGF); 2540 CGF.EmitBlock(ElseBlock); 2541 ElseGen(CGF); 2542 // There is no need to emit line number for unconditional branch. 2543 (void)ApplyDebugLocation::CreateEmpty(CGF); 2544 CGF.EmitBranch(ContBlock); 2545 // Emit the continuation block for code after the if. 2546 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2547 } 2548 2549 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2550 llvm::Value *OutlinedFn, 2551 ArrayRef<llvm::Value *> CapturedVars, 2552 const Expr *IfCond) { 2553 if (!CGF.HaveInsertPoint()) 2554 return; 2555 auto *RTLoc = emitUpdateLocation(CGF, Loc); 2556 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2557 PrePostActionTy &) { 2558 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2559 auto &RT = CGF.CGM.getOpenMPRuntime(); 2560 llvm::Value *Args[] = { 2561 RTLoc, 2562 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2563 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2564 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2565 RealArgs.append(std::begin(Args), std::end(Args)); 2566 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2567 2568 auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 2569 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2570 }; 2571 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 2572 PrePostActionTy &) { 2573 auto &RT = CGF.CGM.getOpenMPRuntime(); 2574 auto ThreadID = RT.getThreadID(CGF, Loc); 2575 // Build calls: 2576 // __kmpc_serialized_parallel(&Loc, GTid); 2577 llvm::Value *Args[] = {RTLoc, ThreadID}; 2578 CGF.EmitRuntimeCall( 2579 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 2580 2581 // OutlinedFn(>id, &zero, CapturedStruct); 2582 auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2583 Address ZeroAddr = 2584 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 2585 /*Name*/ ".zero.addr"); 2586 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 2587 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2588 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2589 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2590 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2591 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2592 2593 // __kmpc_end_serialized_parallel(&Loc, GTid); 2594 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2595 CGF.EmitRuntimeCall( 2596 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2597 EndArgs); 2598 }; 2599 if (IfCond) 2600 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 2601 else { 2602 RegionCodeGenTy ThenRCG(ThenGen); 2603 ThenRCG(CGF); 2604 } 2605 } 2606 2607 // If we're inside an (outlined) parallel region, use the region info's 2608 // thread-ID variable (it is passed in a first argument of the outlined function 2609 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2610 // regular serial code region, get thread ID by calling kmp_int32 2611 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2612 // return the address of that temp. 2613 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2614 SourceLocation Loc) { 2615 if (auto *OMPRegionInfo = 2616 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2617 if (OMPRegionInfo->getThreadIDVariable()) 2618 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2619 2620 auto ThreadID = getThreadID(CGF, Loc); 2621 auto Int32Ty = 2622 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2623 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2624 CGF.EmitStoreOfScalar(ThreadID, 2625 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2626 2627 return ThreadIDTemp; 2628 } 2629 2630 llvm::Constant * 2631 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 2632 const llvm::Twine &Name) { 2633 SmallString<256> Buffer; 2634 llvm::raw_svector_ostream Out(Buffer); 2635 Out << Name; 2636 auto RuntimeName = Out.str(); 2637 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 2638 if (Elem.second) { 2639 assert(Elem.second->getType()->getPointerElementType() == Ty && 2640 "OMP internal variable has different type than requested"); 2641 return &*Elem.second; 2642 } 2643 2644 return Elem.second = new llvm::GlobalVariable( 2645 CGM.getModule(), Ty, /*IsConstant*/ false, 2646 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2647 Elem.first()); 2648 } 2649 2650 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2651 llvm::Twine Name(".gomp_critical_user_", CriticalName); 2652 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 2653 } 2654 2655 namespace { 2656 /// Common pre(post)-action for different OpenMP constructs. 2657 class CommonActionTy final : public PrePostActionTy { 2658 llvm::Value *EnterCallee; 2659 ArrayRef<llvm::Value *> EnterArgs; 2660 llvm::Value *ExitCallee; 2661 ArrayRef<llvm::Value *> ExitArgs; 2662 bool Conditional; 2663 llvm::BasicBlock *ContBlock = nullptr; 2664 2665 public: 2666 CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, 2667 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, 2668 bool Conditional = false) 2669 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2670 ExitArgs(ExitArgs), Conditional(Conditional) {} 2671 void Enter(CodeGenFunction &CGF) override { 2672 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2673 if (Conditional) { 2674 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2675 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2676 ContBlock = CGF.createBasicBlock("omp_if.end"); 2677 // Generate the branch (If-stmt) 2678 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2679 CGF.EmitBlock(ThenBlock); 2680 } 2681 } 2682 void Done(CodeGenFunction &CGF) { 2683 // Emit the rest of blocks/branches 2684 CGF.EmitBranch(ContBlock); 2685 CGF.EmitBlock(ContBlock, true); 2686 } 2687 void Exit(CodeGenFunction &CGF) override { 2688 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2689 } 2690 }; 2691 } // anonymous namespace 2692 2693 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2694 StringRef CriticalName, 2695 const RegionCodeGenTy &CriticalOpGen, 2696 SourceLocation Loc, const Expr *Hint) { 2697 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2698 // CriticalOpGen(); 2699 // __kmpc_end_critical(ident_t *, gtid, Lock); 2700 // Prepare arguments and build a call to __kmpc_critical 2701 if (!CGF.HaveInsertPoint()) 2702 return; 2703 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2704 getCriticalRegionLock(CriticalName)}; 2705 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2706 std::end(Args)); 2707 if (Hint) { 2708 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2709 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 2710 } 2711 CommonActionTy Action( 2712 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 2713 : OMPRTL__kmpc_critical), 2714 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 2715 CriticalOpGen.setAction(Action); 2716 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2717 } 2718 2719 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2720 const RegionCodeGenTy &MasterOpGen, 2721 SourceLocation Loc) { 2722 if (!CGF.HaveInsertPoint()) 2723 return; 2724 // if(__kmpc_master(ident_t *, gtid)) { 2725 // MasterOpGen(); 2726 // __kmpc_end_master(ident_t *, gtid); 2727 // } 2728 // Prepare arguments and build a call to __kmpc_master 2729 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2730 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 2731 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 2732 /*Conditional=*/true); 2733 MasterOpGen.setAction(Action); 2734 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2735 Action.Done(CGF); 2736 } 2737 2738 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2739 SourceLocation Loc) { 2740 if (!CGF.HaveInsertPoint()) 2741 return; 2742 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2743 llvm::Value *Args[] = { 2744 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2745 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2746 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 2747 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2748 Region->emitUntiedSwitch(CGF); 2749 } 2750 2751 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2752 const RegionCodeGenTy &TaskgroupOpGen, 2753 SourceLocation Loc) { 2754 if (!CGF.HaveInsertPoint()) 2755 return; 2756 // __kmpc_taskgroup(ident_t *, gtid); 2757 // TaskgroupOpGen(); 2758 // __kmpc_end_taskgroup(ident_t *, gtid); 2759 // Prepare arguments and build a call to __kmpc_taskgroup 2760 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2761 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 2762 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 2763 Args); 2764 TaskgroupOpGen.setAction(Action); 2765 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2766 } 2767 2768 /// Given an array of pointers to variables, project the address of a 2769 /// given variable. 2770 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2771 unsigned Index, const VarDecl *Var) { 2772 // Pull out the pointer to the variable. 2773 Address PtrAddr = 2774 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 2775 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2776 2777 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2778 Addr = CGF.Builder.CreateElementBitCast( 2779 Addr, CGF.ConvertTypeForMem(Var->getType())); 2780 return Addr; 2781 } 2782 2783 static llvm::Value *emitCopyprivateCopyFunction( 2784 CodeGenModule &CGM, llvm::Type *ArgsType, 2785 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2786 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 2787 auto &C = CGM.getContext(); 2788 // void copy_func(void *LHSArg, void *RHSArg); 2789 FunctionArgList Args; 2790 ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); 2791 ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); 2792 Args.push_back(&LHSArg); 2793 Args.push_back(&RHSArg); 2794 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2795 auto *Fn = llvm::Function::Create( 2796 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2797 ".omp.copyprivate.copy_func", &CGM.getModule()); 2798 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 2799 CodeGenFunction CGF(CGM); 2800 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 2801 // Dest = (void*[n])(LHSArg); 2802 // Src = (void*[n])(RHSArg); 2803 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2804 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2805 ArgsType), CGF.getPointerAlign()); 2806 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2807 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2808 ArgsType), CGF.getPointerAlign()); 2809 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2810 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2811 // ... 2812 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2813 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2814 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2815 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2816 2817 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2818 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2819 2820 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2821 QualType Type = VD->getType(); 2822 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2823 } 2824 CGF.FinishFunction(); 2825 return Fn; 2826 } 2827 2828 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2829 const RegionCodeGenTy &SingleOpGen, 2830 SourceLocation Loc, 2831 ArrayRef<const Expr *> CopyprivateVars, 2832 ArrayRef<const Expr *> SrcExprs, 2833 ArrayRef<const Expr *> DstExprs, 2834 ArrayRef<const Expr *> AssignmentOps) { 2835 if (!CGF.HaveInsertPoint()) 2836 return; 2837 assert(CopyprivateVars.size() == SrcExprs.size() && 2838 CopyprivateVars.size() == DstExprs.size() && 2839 CopyprivateVars.size() == AssignmentOps.size()); 2840 auto &C = CGM.getContext(); 2841 // int32 did_it = 0; 2842 // if(__kmpc_single(ident_t *, gtid)) { 2843 // SingleOpGen(); 2844 // __kmpc_end_single(ident_t *, gtid); 2845 // did_it = 1; 2846 // } 2847 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2848 // <copy_func>, did_it); 2849 2850 Address DidIt = Address::invalid(); 2851 if (!CopyprivateVars.empty()) { 2852 // int32 did_it = 0; 2853 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2854 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2855 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2856 } 2857 // Prepare arguments and build a call to __kmpc_single 2858 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2859 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 2860 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 2861 /*Conditional=*/true); 2862 SingleOpGen.setAction(Action); 2863 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2864 if (DidIt.isValid()) { 2865 // did_it = 1; 2866 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2867 } 2868 Action.Done(CGF); 2869 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2870 // <copy_func>, did_it); 2871 if (DidIt.isValid()) { 2872 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2873 auto CopyprivateArrayTy = 2874 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2875 /*IndexTypeQuals=*/0); 2876 // Create a list of all private variables for copyprivate. 2877 Address CopyprivateList = 2878 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2879 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2880 Address Elem = CGF.Builder.CreateConstArrayGEP( 2881 CopyprivateList, I, CGF.getPointerSize()); 2882 CGF.Builder.CreateStore( 2883 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2884 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 2885 Elem); 2886 } 2887 // Build function that copies private values from single region to all other 2888 // threads in the corresponding parallel region. 2889 auto *CpyFn = emitCopyprivateCopyFunction( 2890 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2891 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 2892 auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2893 Address CL = 2894 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2895 CGF.VoidPtrTy); 2896 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 2897 llvm::Value *Args[] = { 2898 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2899 getThreadID(CGF, Loc), // i32 <gtid> 2900 BufSize, // size_t <buf_size> 2901 CL.getPointer(), // void *<copyprivate list> 2902 CpyFn, // void (*) (void *, void *) <copy_func> 2903 DidItVal // i32 did_it 2904 }; 2905 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 2906 } 2907 } 2908 2909 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2910 const RegionCodeGenTy &OrderedOpGen, 2911 SourceLocation Loc, bool IsThreads) { 2912 if (!CGF.HaveInsertPoint()) 2913 return; 2914 // __kmpc_ordered(ident_t *, gtid); 2915 // OrderedOpGen(); 2916 // __kmpc_end_ordered(ident_t *, gtid); 2917 // Prepare arguments and build a call to __kmpc_ordered 2918 if (IsThreads) { 2919 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2920 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 2921 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 2922 Args); 2923 OrderedOpGen.setAction(Action); 2924 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2925 return; 2926 } 2927 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2928 } 2929 2930 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2931 OpenMPDirectiveKind Kind, bool EmitChecks, 2932 bool ForceSimpleCall) { 2933 if (!CGF.HaveInsertPoint()) 2934 return; 2935 // Build call __kmpc_cancel_barrier(loc, thread_id); 2936 // Build call __kmpc_barrier(loc, thread_id); 2937 unsigned Flags; 2938 if (Kind == OMPD_for) 2939 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2940 else if (Kind == OMPD_sections) 2941 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2942 else if (Kind == OMPD_single) 2943 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2944 else if (Kind == OMPD_barrier) 2945 Flags = OMP_IDENT_BARRIER_EXPL; 2946 else 2947 Flags = OMP_IDENT_BARRIER_IMPL; 2948 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2949 // thread_id); 2950 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2951 getThreadID(CGF, Loc)}; 2952 if (auto *OMPRegionInfo = 2953 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2954 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2955 auto *Result = CGF.EmitRuntimeCall( 2956 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 2957 if (EmitChecks) { 2958 // if (__kmpc_cancel_barrier()) { 2959 // exit from construct; 2960 // } 2961 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2962 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2963 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2964 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2965 CGF.EmitBlock(ExitBB); 2966 // exit from construct; 2967 auto CancelDestination = 2968 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2969 CGF.EmitBranchThroughCleanup(CancelDestination); 2970 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2971 } 2972 return; 2973 } 2974 } 2975 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 2976 } 2977 2978 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 2979 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2980 bool Chunked, bool Ordered) { 2981 switch (ScheduleKind) { 2982 case OMPC_SCHEDULE_static: 2983 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2984 : (Ordered ? OMP_ord_static : OMP_sch_static); 2985 case OMPC_SCHEDULE_dynamic: 2986 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2987 case OMPC_SCHEDULE_guided: 2988 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2989 case OMPC_SCHEDULE_runtime: 2990 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2991 case OMPC_SCHEDULE_auto: 2992 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2993 case OMPC_SCHEDULE_unknown: 2994 assert(!Chunked && "chunk was specified but schedule kind not known"); 2995 return Ordered ? OMP_ord_static : OMP_sch_static; 2996 } 2997 llvm_unreachable("Unexpected runtime schedule"); 2998 } 2999 3000 /// \brief Map the OpenMP distribute schedule to the runtime enumeration. 3001 static OpenMPSchedType 3002 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3003 // only static is allowed for dist_schedule 3004 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3005 } 3006 3007 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3008 bool Chunked) const { 3009 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3010 return Schedule == OMP_sch_static; 3011 } 3012 3013 bool CGOpenMPRuntime::isStaticNonchunked( 3014 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3015 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3016 return Schedule == OMP_dist_sch_static; 3017 } 3018 3019 3020 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3021 auto Schedule = 3022 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3023 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3024 return Schedule != OMP_sch_static; 3025 } 3026 3027 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 3028 OpenMPScheduleClauseModifier M1, 3029 OpenMPScheduleClauseModifier M2) { 3030 int Modifier = 0; 3031 switch (M1) { 3032 case OMPC_SCHEDULE_MODIFIER_monotonic: 3033 Modifier = OMP_sch_modifier_monotonic; 3034 break; 3035 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3036 Modifier = OMP_sch_modifier_nonmonotonic; 3037 break; 3038 case OMPC_SCHEDULE_MODIFIER_simd: 3039 if (Schedule == OMP_sch_static_chunked) 3040 Schedule = OMP_sch_static_balanced_chunked; 3041 break; 3042 case OMPC_SCHEDULE_MODIFIER_last: 3043 case OMPC_SCHEDULE_MODIFIER_unknown: 3044 break; 3045 } 3046 switch (M2) { 3047 case OMPC_SCHEDULE_MODIFIER_monotonic: 3048 Modifier = OMP_sch_modifier_monotonic; 3049 break; 3050 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3051 Modifier = OMP_sch_modifier_nonmonotonic; 3052 break; 3053 case OMPC_SCHEDULE_MODIFIER_simd: 3054 if (Schedule == OMP_sch_static_chunked) 3055 Schedule = OMP_sch_static_balanced_chunked; 3056 break; 3057 case OMPC_SCHEDULE_MODIFIER_last: 3058 case OMPC_SCHEDULE_MODIFIER_unknown: 3059 break; 3060 } 3061 return Schedule | Modifier; 3062 } 3063 3064 void CGOpenMPRuntime::emitForDispatchInit( 3065 CodeGenFunction &CGF, SourceLocation Loc, 3066 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3067 bool Ordered, const DispatchRTInput &DispatchValues) { 3068 if (!CGF.HaveInsertPoint()) 3069 return; 3070 OpenMPSchedType Schedule = getRuntimeSchedule( 3071 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3072 assert(Ordered || 3073 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3074 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3075 Schedule != OMP_sch_static_balanced_chunked)); 3076 // Call __kmpc_dispatch_init( 3077 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3078 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3079 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3080 3081 // If the Chunk was not specified in the clause - use default value 1. 3082 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3083 : CGF.Builder.getIntN(IVSize, 1); 3084 llvm::Value *Args[] = { 3085 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3086 CGF.Builder.getInt32(addMonoNonMonoModifier( 3087 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3088 DispatchValues.LB, // Lower 3089 DispatchValues.UB, // Upper 3090 CGF.Builder.getIntN(IVSize, 1), // Stride 3091 Chunk // Chunk 3092 }; 3093 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3094 } 3095 3096 static void emitForStaticInitCall( 3097 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3098 llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, 3099 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3100 const CGOpenMPRuntime::StaticRTInput &Values) { 3101 if (!CGF.HaveInsertPoint()) 3102 return; 3103 3104 assert(!Values.Ordered); 3105 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3106 Schedule == OMP_sch_static_balanced_chunked || 3107 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3108 Schedule == OMP_dist_sch_static || 3109 Schedule == OMP_dist_sch_static_chunked); 3110 3111 // Call __kmpc_for_static_init( 3112 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3113 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3114 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3115 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3116 llvm::Value *Chunk = Values.Chunk; 3117 if (Chunk == nullptr) { 3118 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3119 Schedule == OMP_dist_sch_static) && 3120 "expected static non-chunked schedule"); 3121 // If the Chunk was not specified in the clause - use default value 1. 3122 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3123 } else { 3124 assert((Schedule == OMP_sch_static_chunked || 3125 Schedule == OMP_sch_static_balanced_chunked || 3126 Schedule == OMP_ord_static_chunked || 3127 Schedule == OMP_dist_sch_static_chunked) && 3128 "expected static chunked schedule"); 3129 } 3130 llvm::Value *Args[] = { 3131 UpdateLocation, 3132 ThreadId, 3133 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, 3134 M2)), // Schedule type 3135 Values.IL.getPointer(), // &isLastIter 3136 Values.LB.getPointer(), // &LB 3137 Values.UB.getPointer(), // &UB 3138 Values.ST.getPointer(), // &Stride 3139 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3140 Chunk // Chunk 3141 }; 3142 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3143 } 3144 3145 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3146 SourceLocation Loc, 3147 OpenMPDirectiveKind DKind, 3148 const OpenMPScheduleTy &ScheduleKind, 3149 const StaticRTInput &Values) { 3150 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3151 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3152 assert(isOpenMPWorksharingDirective(DKind) && 3153 "Expected loop-based or sections-based directive."); 3154 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3155 isOpenMPLoopDirective(DKind) 3156 ? OMP_IDENT_WORK_LOOP 3157 : OMP_IDENT_WORK_SECTIONS); 3158 auto *ThreadId = getThreadID(CGF, Loc); 3159 auto *StaticInitFunction = 3160 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3161 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3162 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3163 } 3164 3165 void CGOpenMPRuntime::emitDistributeStaticInit( 3166 CodeGenFunction &CGF, SourceLocation Loc, 3167 OpenMPDistScheduleClauseKind SchedKind, 3168 const CGOpenMPRuntime::StaticRTInput &Values) { 3169 OpenMPSchedType ScheduleNum = 3170 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3171 auto *UpdatedLocation = 3172 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3173 auto *ThreadId = getThreadID(CGF, Loc); 3174 auto *StaticInitFunction = 3175 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3176 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3177 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3178 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3179 } 3180 3181 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3182 SourceLocation Loc, 3183 OpenMPDirectiveKind DKind) { 3184 if (!CGF.HaveInsertPoint()) 3185 return; 3186 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3187 llvm::Value *Args[] = { 3188 emitUpdateLocation(CGF, Loc, 3189 isOpenMPDistributeDirective(DKind) 3190 ? OMP_IDENT_WORK_DISTRIBUTE 3191 : isOpenMPLoopDirective(DKind) 3192 ? OMP_IDENT_WORK_LOOP 3193 : OMP_IDENT_WORK_SECTIONS), 3194 getThreadID(CGF, Loc)}; 3195 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3196 Args); 3197 } 3198 3199 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3200 SourceLocation Loc, 3201 unsigned IVSize, 3202 bool IVSigned) { 3203 if (!CGF.HaveInsertPoint()) 3204 return; 3205 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3206 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3207 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3208 } 3209 3210 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3211 SourceLocation Loc, unsigned IVSize, 3212 bool IVSigned, Address IL, 3213 Address LB, Address UB, 3214 Address ST) { 3215 // Call __kmpc_dispatch_next( 3216 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3217 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3218 // kmp_int[32|64] *p_stride); 3219 llvm::Value *Args[] = { 3220 emitUpdateLocation(CGF, Loc), 3221 getThreadID(CGF, Loc), 3222 IL.getPointer(), // &isLastIter 3223 LB.getPointer(), // &Lower 3224 UB.getPointer(), // &Upper 3225 ST.getPointer() // &Stride 3226 }; 3227 llvm::Value *Call = 3228 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3229 return CGF.EmitScalarConversion( 3230 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 3231 CGF.getContext().BoolTy, Loc); 3232 } 3233 3234 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3235 llvm::Value *NumThreads, 3236 SourceLocation Loc) { 3237 if (!CGF.HaveInsertPoint()) 3238 return; 3239 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3240 llvm::Value *Args[] = { 3241 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3242 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3243 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3244 Args); 3245 } 3246 3247 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3248 OpenMPProcBindClauseKind ProcBind, 3249 SourceLocation Loc) { 3250 if (!CGF.HaveInsertPoint()) 3251 return; 3252 // Constants for proc bind value accepted by the runtime. 3253 enum ProcBindTy { 3254 ProcBindFalse = 0, 3255 ProcBindTrue, 3256 ProcBindMaster, 3257 ProcBindClose, 3258 ProcBindSpread, 3259 ProcBindIntel, 3260 ProcBindDefault 3261 } RuntimeProcBind; 3262 switch (ProcBind) { 3263 case OMPC_PROC_BIND_master: 3264 RuntimeProcBind = ProcBindMaster; 3265 break; 3266 case OMPC_PROC_BIND_close: 3267 RuntimeProcBind = ProcBindClose; 3268 break; 3269 case OMPC_PROC_BIND_spread: 3270 RuntimeProcBind = ProcBindSpread; 3271 break; 3272 case OMPC_PROC_BIND_unknown: 3273 llvm_unreachable("Unsupported proc_bind value."); 3274 } 3275 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3276 llvm::Value *Args[] = { 3277 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3278 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3279 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3280 } 3281 3282 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3283 SourceLocation Loc) { 3284 if (!CGF.HaveInsertPoint()) 3285 return; 3286 // Build call void __kmpc_flush(ident_t *loc) 3287 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3288 emitUpdateLocation(CGF, Loc)); 3289 } 3290 3291 namespace { 3292 /// \brief Indexes of fields for type kmp_task_t. 3293 enum KmpTaskTFields { 3294 /// \brief List of shared variables. 3295 KmpTaskTShareds, 3296 /// \brief Task routine. 3297 KmpTaskTRoutine, 3298 /// \brief Partition id for the untied tasks. 3299 KmpTaskTPartId, 3300 /// Function with call of destructors for private variables. 3301 Data1, 3302 /// Task priority. 3303 Data2, 3304 /// (Taskloops only) Lower bound. 3305 KmpTaskTLowerBound, 3306 /// (Taskloops only) Upper bound. 3307 KmpTaskTUpperBound, 3308 /// (Taskloops only) Stride. 3309 KmpTaskTStride, 3310 /// (Taskloops only) Is last iteration flag. 3311 KmpTaskTLastIter, 3312 /// (Taskloops only) Reduction data. 3313 KmpTaskTReductions, 3314 }; 3315 } // anonymous namespace 3316 3317 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3318 // FIXME: Add other entries type when they become supported. 3319 return OffloadEntriesTargetRegion.empty(); 3320 } 3321 3322 /// \brief Initialize target region entry. 3323 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3324 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3325 StringRef ParentName, unsigned LineNum, 3326 unsigned Order) { 3327 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3328 "only required for the device " 3329 "code generation."); 3330 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3331 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3332 /*Flags=*/0); 3333 ++OffloadingEntriesNum; 3334 } 3335 3336 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3337 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3338 StringRef ParentName, unsigned LineNum, 3339 llvm::Constant *Addr, llvm::Constant *ID, 3340 int32_t Flags) { 3341 // If we are emitting code for a target, the entry is already initialized, 3342 // only has to be registered. 3343 if (CGM.getLangOpts().OpenMPIsDevice) { 3344 assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3345 "Entry must exist."); 3346 auto &Entry = 3347 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3348 assert(Entry.isValid() && "Entry not initialized!"); 3349 Entry.setAddress(Addr); 3350 Entry.setID(ID); 3351 Entry.setFlags(Flags); 3352 return; 3353 } else { 3354 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags); 3355 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3356 } 3357 } 3358 3359 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3360 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3361 unsigned LineNum) const { 3362 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3363 if (PerDevice == OffloadEntriesTargetRegion.end()) 3364 return false; 3365 auto PerFile = PerDevice->second.find(FileID); 3366 if (PerFile == PerDevice->second.end()) 3367 return false; 3368 auto PerParentName = PerFile->second.find(ParentName); 3369 if (PerParentName == PerFile->second.end()) 3370 return false; 3371 auto PerLine = PerParentName->second.find(LineNum); 3372 if (PerLine == PerParentName->second.end()) 3373 return false; 3374 // Fail if this entry is already registered. 3375 if (PerLine->second.getAddress() || PerLine->second.getID()) 3376 return false; 3377 return true; 3378 } 3379 3380 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3381 const OffloadTargetRegionEntryInfoActTy &Action) { 3382 // Scan all target region entries and perform the provided action. 3383 for (auto &D : OffloadEntriesTargetRegion) 3384 for (auto &F : D.second) 3385 for (auto &P : F.second) 3386 for (auto &L : P.second) 3387 Action(D.first, F.first, P.first(), L.first, L.second); 3388 } 3389 3390 /// \brief Create a Ctor/Dtor-like function whose body is emitted through 3391 /// \a Codegen. This is used to emit the two functions that register and 3392 /// unregister the descriptor of the current compilation unit. 3393 static llvm::Function * 3394 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, 3395 const RegionCodeGenTy &Codegen) { 3396 auto &C = CGM.getContext(); 3397 FunctionArgList Args; 3398 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 3399 Args.push_back(&DummyPtr); 3400 3401 CodeGenFunction CGF(CGM); 3402 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3403 auto FTy = CGM.getTypes().GetFunctionType(FI); 3404 auto *Fn = 3405 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); 3406 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); 3407 Codegen(CGF); 3408 CGF.FinishFunction(); 3409 return Fn; 3410 } 3411 3412 llvm::Function * 3413 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3414 3415 // If we don't have entries or if we are emitting code for the device, we 3416 // don't need to do anything. 3417 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3418 return nullptr; 3419 3420 auto &M = CGM.getModule(); 3421 auto &C = CGM.getContext(); 3422 3423 // Get list of devices we care about 3424 auto &Devices = CGM.getLangOpts().OMPTargetTriples; 3425 3426 // We should be creating an offloading descriptor only if there are devices 3427 // specified. 3428 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3429 3430 // Create the external variables that will point to the begin and end of the 3431 // host entries section. These will be defined by the linker. 3432 auto *OffloadEntryTy = 3433 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3434 llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( 3435 M, OffloadEntryTy, /*isConstant=*/true, 3436 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3437 ".omp_offloading.entries_begin"); 3438 llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( 3439 M, OffloadEntryTy, /*isConstant=*/true, 3440 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3441 ".omp_offloading.entries_end"); 3442 3443 // Create all device images 3444 auto *DeviceImageTy = cast<llvm::StructType>( 3445 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3446 ConstantInitBuilder DeviceImagesBuilder(CGM); 3447 auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy); 3448 3449 for (unsigned i = 0; i < Devices.size(); ++i) { 3450 StringRef T = Devices[i].getTriple(); 3451 auto *ImgBegin = new llvm::GlobalVariable( 3452 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 3453 /*Initializer=*/nullptr, 3454 Twine(".omp_offloading.img_start.") + Twine(T)); 3455 auto *ImgEnd = new llvm::GlobalVariable( 3456 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 3457 /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); 3458 3459 auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy); 3460 Dev.add(ImgBegin); 3461 Dev.add(ImgEnd); 3462 Dev.add(HostEntriesBegin); 3463 Dev.add(HostEntriesEnd); 3464 Dev.finishAndAddTo(DeviceImagesEntries); 3465 } 3466 3467 // Create device images global array. 3468 llvm::GlobalVariable *DeviceImages = 3469 DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images", 3470 CGM.getPointerAlign(), 3471 /*isConstant=*/true); 3472 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3473 3474 // This is a Zero array to be used in the creation of the constant expressions 3475 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 3476 llvm::Constant::getNullValue(CGM.Int32Ty)}; 3477 3478 // Create the target region descriptor. 3479 auto *BinaryDescriptorTy = cast<llvm::StructType>( 3480 CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); 3481 ConstantInitBuilder DescBuilder(CGM); 3482 auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy); 3483 DescInit.addInt(CGM.Int32Ty, Devices.size()); 3484 DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 3485 DeviceImages, 3486 Index)); 3487 DescInit.add(HostEntriesBegin); 3488 DescInit.add(HostEntriesEnd); 3489 3490 auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor", 3491 CGM.getPointerAlign(), 3492 /*isConstant=*/true); 3493 3494 // Emit code to register or unregister the descriptor at execution 3495 // startup or closing, respectively. 3496 3497 // Create a variable to drive the registration and unregistration of the 3498 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 3499 auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); 3500 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), 3501 IdentInfo, C.CharTy, ImplicitParamDecl::Other); 3502 3503 auto *UnRegFn = createOffloadingBinaryDescriptorFunction( 3504 CGM, ".omp_offloading.descriptor_unreg", 3505 [&](CodeGenFunction &CGF, PrePostActionTy &) { 3506 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 3507 Desc); 3508 }); 3509 auto *RegFn = createOffloadingBinaryDescriptorFunction( 3510 CGM, ".omp_offloading.descriptor_reg", 3511 [&](CodeGenFunction &CGF, PrePostActionTy &) { 3512 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), 3513 Desc); 3514 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 3515 }); 3516 if (CGM.supportsCOMDAT()) { 3517 // It is sufficient to call registration function only once, so create a 3518 // COMDAT group for registration/unregistration functions and associated 3519 // data. That would reduce startup time and code size. Registration 3520 // function serves as a COMDAT group key. 3521 auto ComdatKey = M.getOrInsertComdat(RegFn->getName()); 3522 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 3523 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 3524 RegFn->setComdat(ComdatKey); 3525 UnRegFn->setComdat(ComdatKey); 3526 DeviceImages->setComdat(ComdatKey); 3527 Desc->setComdat(ComdatKey); 3528 } 3529 return RegFn; 3530 } 3531 3532 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, 3533 llvm::Constant *Addr, uint64_t Size, 3534 int32_t Flags) { 3535 StringRef Name = Addr->getName(); 3536 auto *TgtOffloadEntryType = cast<llvm::StructType>( 3537 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); 3538 llvm::LLVMContext &C = CGM.getModule().getContext(); 3539 llvm::Module &M = CGM.getModule(); 3540 3541 // Make sure the address has the right type. 3542 llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); 3543 3544 // Create constant string with the name. 3545 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3546 3547 llvm::GlobalVariable *Str = 3548 new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, 3549 llvm::GlobalValue::InternalLinkage, StrPtrInit, 3550 ".omp_offloading.entry_name"); 3551 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3552 llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); 3553 3554 // We can't have any padding between symbols, so we need to have 1-byte 3555 // alignment. 3556 auto Align = CharUnits::fromQuantity(1); 3557 3558 // Create the entry struct. 3559 ConstantInitBuilder EntryBuilder(CGM); 3560 auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType); 3561 EntryInit.add(AddrPtr); 3562 EntryInit.add(StrPtr); 3563 EntryInit.addInt(CGM.SizeTy, Size); 3564 EntryInit.addInt(CGM.Int32Ty, Flags); 3565 EntryInit.addInt(CGM.Int32Ty, 0); 3566 llvm::GlobalVariable *Entry = 3567 EntryInit.finishAndCreateGlobal(".omp_offloading.entry", 3568 Align, 3569 /*constant*/ true, 3570 llvm::GlobalValue::ExternalLinkage); 3571 3572 // The entry has to be created in the section the linker expects it to be. 3573 Entry->setSection(".omp_offloading.entries"); 3574 } 3575 3576 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3577 // Emit the offloading entries and metadata so that the device codegen side 3578 // can easily figure out what to emit. The produced metadata looks like 3579 // this: 3580 // 3581 // !omp_offload.info = !{!1, ...} 3582 // 3583 // Right now we only generate metadata for function that contain target 3584 // regions. 3585 3586 // If we do not have entries, we dont need to do anything. 3587 if (OffloadEntriesInfoManager.empty()) 3588 return; 3589 3590 llvm::Module &M = CGM.getModule(); 3591 llvm::LLVMContext &C = M.getContext(); 3592 SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 3593 OrderedEntries(OffloadEntriesInfoManager.size()); 3594 3595 // Create the offloading info metadata node. 3596 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3597 3598 // Auxiliary methods to create metadata values and strings. 3599 auto getMDInt = [&](unsigned v) { 3600 return llvm::ConstantAsMetadata::get( 3601 llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); 3602 }; 3603 3604 auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; 3605 3606 // Create function that emits metadata for each target region entry; 3607 auto &&TargetRegionMetadataEmitter = [&]( 3608 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, 3609 OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3610 llvm::SmallVector<llvm::Metadata *, 32> Ops; 3611 // Generate metadata for target regions. Each entry of this metadata 3612 // contains: 3613 // - Entry 0 -> Kind of this type of metadata (0). 3614 // - Entry 1 -> Device ID of the file where the entry was identified. 3615 // - Entry 2 -> File ID of the file where the entry was identified. 3616 // - Entry 3 -> Mangled name of the function where the entry was identified. 3617 // - Entry 4 -> Line in the file where the entry was identified. 3618 // - Entry 5 -> Order the entry was created. 3619 // The first element of the metadata node is the kind. 3620 Ops.push_back(getMDInt(E.getKind())); 3621 Ops.push_back(getMDInt(DeviceID)); 3622 Ops.push_back(getMDInt(FileID)); 3623 Ops.push_back(getMDString(ParentName)); 3624 Ops.push_back(getMDInt(Line)); 3625 Ops.push_back(getMDInt(E.getOrder())); 3626 3627 // Save this entry in the right position of the ordered entries array. 3628 OrderedEntries[E.getOrder()] = &E; 3629 3630 // Add metadata to the named metadata node. 3631 MD->addOperand(llvm::MDNode::get(C, Ops)); 3632 }; 3633 3634 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3635 TargetRegionMetadataEmitter); 3636 3637 for (auto *E : OrderedEntries) { 3638 assert(E && "All ordered entries must exist!"); 3639 if (auto *CE = 3640 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3641 E)) { 3642 assert(CE->getID() && CE->getAddress() && 3643 "Entry ID and Addr are invalid!"); 3644 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0); 3645 } else 3646 llvm_unreachable("Unsupported entry kind."); 3647 } 3648 } 3649 3650 /// \brief Loads all the offload entries information from the host IR 3651 /// metadata. 3652 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3653 // If we are in target mode, load the metadata from the host IR. This code has 3654 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3655 3656 if (!CGM.getLangOpts().OpenMPIsDevice) 3657 return; 3658 3659 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3660 return; 3661 3662 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3663 if (Buf.getError()) 3664 return; 3665 3666 llvm::LLVMContext C; 3667 auto ME = expectedToErrorOrAndEmitErrors( 3668 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3669 3670 if (ME.getError()) 3671 return; 3672 3673 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3674 if (!MD) 3675 return; 3676 3677 for (auto I : MD->operands()) { 3678 llvm::MDNode *MN = cast<llvm::MDNode>(I); 3679 3680 auto getMDInt = [&](unsigned Idx) { 3681 llvm::ConstantAsMetadata *V = 3682 cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3683 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3684 }; 3685 3686 auto getMDString = [&](unsigned Idx) { 3687 llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3688 return V->getString(); 3689 }; 3690 3691 switch (getMDInt(0)) { 3692 default: 3693 llvm_unreachable("Unexpected metadata!"); 3694 break; 3695 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3696 OFFLOAD_ENTRY_INFO_TARGET_REGION: 3697 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3698 /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), 3699 /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), 3700 /*Order=*/getMDInt(5)); 3701 break; 3702 } 3703 } 3704 } 3705 3706 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3707 if (!KmpRoutineEntryPtrTy) { 3708 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3709 auto &C = CGM.getContext(); 3710 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3711 FunctionProtoType::ExtProtoInfo EPI; 3712 KmpRoutineEntryPtrQTy = C.getPointerType( 3713 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3714 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3715 } 3716 } 3717 3718 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 3719 QualType FieldTy) { 3720 auto *Field = FieldDecl::Create( 3721 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 3722 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 3723 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 3724 Field->setAccess(AS_public); 3725 DC->addDecl(Field); 3726 return Field; 3727 } 3728 3729 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3730 3731 // Make sure the type of the entry is already created. This is the type we 3732 // have to create: 3733 // struct __tgt_offload_entry{ 3734 // void *addr; // Pointer to the offload entry info. 3735 // // (function or global) 3736 // char *name; // Name of the function or global. 3737 // size_t size; // Size of the entry info (0 if it a function). 3738 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3739 // int32_t reserved; // Reserved, to use by the runtime library. 3740 // }; 3741 if (TgtOffloadEntryQTy.isNull()) { 3742 ASTContext &C = CGM.getContext(); 3743 auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3744 RD->startDefinition(); 3745 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3746 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3747 addFieldToRecordDecl(C, RD, C.getSizeType()); 3748 addFieldToRecordDecl( 3749 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3750 addFieldToRecordDecl( 3751 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3752 RD->completeDefinition(); 3753 TgtOffloadEntryQTy = C.getRecordType(RD); 3754 } 3755 return TgtOffloadEntryQTy; 3756 } 3757 3758 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 3759 // These are the types we need to build: 3760 // struct __tgt_device_image{ 3761 // void *ImageStart; // Pointer to the target code start. 3762 // void *ImageEnd; // Pointer to the target code end. 3763 // // We also add the host entries to the device image, as it may be useful 3764 // // for the target runtime to have access to that information. 3765 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 3766 // // the entries. 3767 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3768 // // entries (non inclusive). 3769 // }; 3770 if (TgtDeviceImageQTy.isNull()) { 3771 ASTContext &C = CGM.getContext(); 3772 auto *RD = C.buildImplicitRecord("__tgt_device_image"); 3773 RD->startDefinition(); 3774 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3775 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3776 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3777 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3778 RD->completeDefinition(); 3779 TgtDeviceImageQTy = C.getRecordType(RD); 3780 } 3781 return TgtDeviceImageQTy; 3782 } 3783 3784 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 3785 // struct __tgt_bin_desc{ 3786 // int32_t NumDevices; // Number of devices supported. 3787 // __tgt_device_image *DeviceImages; // Arrays of device images 3788 // // (one per device). 3789 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 3790 // // entries. 3791 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3792 // // entries (non inclusive). 3793 // }; 3794 if (TgtBinaryDescriptorQTy.isNull()) { 3795 ASTContext &C = CGM.getContext(); 3796 auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); 3797 RD->startDefinition(); 3798 addFieldToRecordDecl( 3799 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3800 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 3801 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3802 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3803 RD->completeDefinition(); 3804 TgtBinaryDescriptorQTy = C.getRecordType(RD); 3805 } 3806 return TgtBinaryDescriptorQTy; 3807 } 3808 3809 namespace { 3810 struct PrivateHelpersTy { 3811 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 3812 const VarDecl *PrivateElemInit) 3813 : Original(Original), PrivateCopy(PrivateCopy), 3814 PrivateElemInit(PrivateElemInit) {} 3815 const VarDecl *Original; 3816 const VarDecl *PrivateCopy; 3817 const VarDecl *PrivateElemInit; 3818 }; 3819 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3820 } // anonymous namespace 3821 3822 static RecordDecl * 3823 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3824 if (!Privates.empty()) { 3825 auto &C = CGM.getContext(); 3826 // Build struct .kmp_privates_t. { 3827 // /* private vars */ 3828 // }; 3829 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 3830 RD->startDefinition(); 3831 for (auto &&Pair : Privates) { 3832 auto *VD = Pair.second.Original; 3833 auto Type = VD->getType(); 3834 Type = Type.getNonReferenceType(); 3835 auto *FD = addFieldToRecordDecl(C, RD, Type); 3836 if (VD->hasAttrs()) { 3837 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3838 E(VD->getAttrs().end()); 3839 I != E; ++I) 3840 FD->addAttr(*I); 3841 } 3842 } 3843 RD->completeDefinition(); 3844 return RD; 3845 } 3846 return nullptr; 3847 } 3848 3849 static RecordDecl * 3850 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3851 QualType KmpInt32Ty, 3852 QualType KmpRoutineEntryPointerQTy) { 3853 auto &C = CGM.getContext(); 3854 // Build struct kmp_task_t { 3855 // void * shareds; 3856 // kmp_routine_entry_t routine; 3857 // kmp_int32 part_id; 3858 // kmp_cmplrdata_t data1; 3859 // kmp_cmplrdata_t data2; 3860 // For taskloops additional fields: 3861 // kmp_uint64 lb; 3862 // kmp_uint64 ub; 3863 // kmp_int64 st; 3864 // kmp_int32 liter; 3865 // void * reductions; 3866 // }; 3867 auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3868 UD->startDefinition(); 3869 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3870 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3871 UD->completeDefinition(); 3872 QualType KmpCmplrdataTy = C.getRecordType(UD); 3873 auto *RD = C.buildImplicitRecord("kmp_task_t"); 3874 RD->startDefinition(); 3875 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3876 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3877 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3878 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3879 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3880 if (isOpenMPTaskLoopDirective(Kind)) { 3881 QualType KmpUInt64Ty = 3882 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3883 QualType KmpInt64Ty = 3884 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3885 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3886 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3887 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3888 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3889 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3890 } 3891 RD->completeDefinition(); 3892 return RD; 3893 } 3894 3895 static RecordDecl * 3896 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3897 ArrayRef<PrivateDataTy> Privates) { 3898 auto &C = CGM.getContext(); 3899 // Build struct kmp_task_t_with_privates { 3900 // kmp_task_t task_data; 3901 // .kmp_privates_t. privates; 3902 // }; 3903 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3904 RD->startDefinition(); 3905 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3906 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 3907 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3908 } 3909 RD->completeDefinition(); 3910 return RD; 3911 } 3912 3913 /// \brief Emit a proxy function which accepts kmp_task_t as the second 3914 /// argument. 3915 /// \code 3916 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3917 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3918 /// For taskloops: 3919 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3920 /// tt->reductions, tt->shareds); 3921 /// return 0; 3922 /// } 3923 /// \endcode 3924 static llvm::Value * 3925 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3926 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3927 QualType KmpTaskTWithPrivatesPtrQTy, 3928 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3929 QualType SharedsPtrTy, llvm::Value *TaskFunction, 3930 llvm::Value *TaskPrivatesMap) { 3931 auto &C = CGM.getContext(); 3932 FunctionArgList Args; 3933 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3934 ImplicitParamDecl::Other); 3935 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3936 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3937 ImplicitParamDecl::Other); 3938 Args.push_back(&GtidArg); 3939 Args.push_back(&TaskTypeArg); 3940 auto &TaskEntryFnInfo = 3941 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3942 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3943 auto *TaskEntry = 3944 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 3945 ".omp_task_entry.", &CGM.getModule()); 3946 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); 3947 CodeGenFunction CGF(CGM); 3948 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 3949 3950 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3951 // tt, 3952 // For taskloops: 3953 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3954 // tt->task_data.shareds); 3955 auto *GtidParam = CGF.EmitLoadOfScalar( 3956 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3957 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3958 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3959 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3960 auto *KmpTaskTWithPrivatesQTyRD = 3961 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3962 LValue Base = 3963 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3964 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3965 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3966 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3967 auto *PartidParam = PartIdLVal.getPointer(); 3968 3969 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3970 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3971 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3972 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 3973 CGF.ConvertTypeForMem(SharedsPtrTy)); 3974 3975 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3976 llvm::Value *PrivatesParam; 3977 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3978 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3979 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3980 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 3981 } else 3982 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3983 3984 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3985 TaskPrivatesMap, 3986 CGF.Builder 3987 .CreatePointerBitCastOrAddrSpaceCast( 3988 TDBase.getAddress(), CGF.VoidPtrTy) 3989 .getPointer()}; 3990 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3991 std::end(CommonArgs)); 3992 if (isOpenMPTaskLoopDirective(Kind)) { 3993 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3994 auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3995 auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal(); 3996 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3997 auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3998 auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal(); 3999 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4000 auto StLVal = CGF.EmitLValueForField(Base, *StFI); 4001 auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal(); 4002 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4003 auto LILVal = CGF.EmitLValueForField(Base, *LIFI); 4004 auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); 4005 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4006 auto RLVal = CGF.EmitLValueForField(Base, *RFI); 4007 auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal(); 4008 CallArgs.push_back(LBParam); 4009 CallArgs.push_back(UBParam); 4010 CallArgs.push_back(StParam); 4011 CallArgs.push_back(LIParam); 4012 CallArgs.push_back(RParam); 4013 } 4014 CallArgs.push_back(SharedsParam); 4015 4016 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4017 CallArgs); 4018 CGF.EmitStoreThroughLValue( 4019 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4020 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4021 CGF.FinishFunction(); 4022 return TaskEntry; 4023 } 4024 4025 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4026 SourceLocation Loc, 4027 QualType KmpInt32Ty, 4028 QualType KmpTaskTWithPrivatesPtrQTy, 4029 QualType KmpTaskTWithPrivatesQTy) { 4030 auto &C = CGM.getContext(); 4031 FunctionArgList Args; 4032 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4033 ImplicitParamDecl::Other); 4034 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4035 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4036 ImplicitParamDecl::Other); 4037 Args.push_back(&GtidArg); 4038 Args.push_back(&TaskTypeArg); 4039 auto &DestructorFnInfo = 4040 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4041 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 4042 auto *DestructorFn = 4043 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4044 ".omp_task_destructor.", &CGM.getModule()); 4045 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, 4046 DestructorFnInfo); 4047 CodeGenFunction CGF(CGM); 4048 CGF.disableDebugInfo(); 4049 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4050 Args); 4051 4052 LValue Base = CGF.EmitLoadOfPointerLValue( 4053 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4054 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4055 auto *KmpTaskTWithPrivatesQTyRD = 4056 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4057 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4058 Base = CGF.EmitLValueForField(Base, *FI); 4059 for (auto *Field : 4060 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4061 if (auto DtorKind = Field->getType().isDestructedType()) { 4062 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 4063 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4064 } 4065 } 4066 CGF.FinishFunction(); 4067 return DestructorFn; 4068 } 4069 4070 /// \brief Emit a privates mapping function for correct handling of private and 4071 /// firstprivate variables. 4072 /// \code 4073 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4074 /// **noalias priv1,..., <tyn> **noalias privn) { 4075 /// *priv1 = &.privates.priv1; 4076 /// ...; 4077 /// *privn = &.privates.privn; 4078 /// } 4079 /// \endcode 4080 static llvm::Value * 4081 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4082 ArrayRef<const Expr *> PrivateVars, 4083 ArrayRef<const Expr *> FirstprivateVars, 4084 ArrayRef<const Expr *> LastprivateVars, 4085 QualType PrivatesQTy, 4086 ArrayRef<PrivateDataTy> Privates) { 4087 auto &C = CGM.getContext(); 4088 FunctionArgList Args; 4089 ImplicitParamDecl TaskPrivatesArg( 4090 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4091 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4092 ImplicitParamDecl::Other); 4093 Args.push_back(&TaskPrivatesArg); 4094 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4095 unsigned Counter = 1; 4096 for (auto *E: PrivateVars) { 4097 Args.push_back(ImplicitParamDecl::Create( 4098 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4099 C.getPointerType(C.getPointerType(E->getType())) 4100 .withConst() 4101 .withRestrict(), 4102 ImplicitParamDecl::Other)); 4103 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4104 PrivateVarsPos[VD] = Counter; 4105 ++Counter; 4106 } 4107 for (auto *E : FirstprivateVars) { 4108 Args.push_back(ImplicitParamDecl::Create( 4109 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4110 C.getPointerType(C.getPointerType(E->getType())) 4111 .withConst() 4112 .withRestrict(), 4113 ImplicitParamDecl::Other)); 4114 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4115 PrivateVarsPos[VD] = Counter; 4116 ++Counter; 4117 } 4118 for (auto *E: LastprivateVars) { 4119 Args.push_back(ImplicitParamDecl::Create( 4120 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4121 C.getPointerType(C.getPointerType(E->getType())) 4122 .withConst() 4123 .withRestrict(), 4124 ImplicitParamDecl::Other)); 4125 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4126 PrivateVarsPos[VD] = Counter; 4127 ++Counter; 4128 } 4129 auto &TaskPrivatesMapFnInfo = 4130 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4131 auto *TaskPrivatesMapTy = 4132 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4133 auto *TaskPrivatesMap = llvm::Function::Create( 4134 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 4135 ".omp_task_privates_map.", &CGM.getModule()); 4136 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, 4137 TaskPrivatesMapFnInfo); 4138 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4139 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4140 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4141 CodeGenFunction CGF(CGM); 4142 CGF.disableDebugInfo(); 4143 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4144 TaskPrivatesMapFnInfo, Args); 4145 4146 // *privi = &.privates.privi; 4147 LValue Base = CGF.EmitLoadOfPointerLValue( 4148 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4149 TaskPrivatesArg.getType()->castAs<PointerType>()); 4150 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4151 Counter = 0; 4152 for (auto *Field : PrivatesQTyRD->fields()) { 4153 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 4154 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4155 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4156 auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4157 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4158 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4159 ++Counter; 4160 } 4161 CGF.FinishFunction(); 4162 return TaskPrivatesMap; 4163 } 4164 4165 static bool stable_sort_comparator(const PrivateDataTy P1, 4166 const PrivateDataTy P2) { 4167 return P1.first > P2.first; 4168 } 4169 4170 /// Emit initialization for private variables in task-based directives. 4171 static void emitPrivatesInit(CodeGenFunction &CGF, 4172 const OMPExecutableDirective &D, 4173 Address KmpTaskSharedsPtr, LValue TDBase, 4174 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4175 QualType SharedsTy, QualType SharedsPtrTy, 4176 const OMPTaskDataTy &Data, 4177 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4178 auto &C = CGF.getContext(); 4179 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4180 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4181 LValue SrcBase; 4182 bool IsTargetTask = 4183 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4184 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4185 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4186 // PointersArray and SizesArray. The original variables for these arrays are 4187 // not captured and we get their addresses explicitly. 4188 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4189 (IsTargetTask && Data.FirstprivateVars.size() > 3)) { 4190 SrcBase = CGF.MakeAddrLValue( 4191 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4192 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4193 SharedsTy); 4194 } 4195 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4196 ? OMPD_taskloop 4197 : OMPD_task; 4198 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(*D.getCapturedStmt(Kind)); 4199 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4200 for (auto &&Pair : Privates) { 4201 auto *VD = Pair.second.PrivateCopy; 4202 auto *Init = VD->getAnyInitializer(); 4203 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4204 !CGF.isTrivialInitializer(Init)))) { 4205 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4206 if (auto *Elem = Pair.second.PrivateElemInit) { 4207 auto *OriginalVD = Pair.second.Original; 4208 // Check if the variable is the target-based BasePointersArray, 4209 // PointersArray or SizesArray. 4210 LValue SharedRefLValue; 4211 QualType Type = OriginalVD->getType(); 4212 if (IsTargetTask && isa<ImplicitParamDecl>(OriginalVD) && 4213 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4214 cast<CapturedDecl>(OriginalVD->getDeclContext())->getNumParams() == 4215 0 && 4216 isa<TranslationUnitDecl>( 4217 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4218 ->getDeclContext())) { 4219 SharedRefLValue = 4220 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4221 } else { 4222 auto *SharedField = CapturesInfo.lookup(OriginalVD); 4223 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4224 SharedRefLValue = CGF.MakeAddrLValue( 4225 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4226 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4227 SharedRefLValue.getTBAAInfo()); 4228 } 4229 if (Type->isArrayType()) { 4230 // Initialize firstprivate array. 4231 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4232 // Perform simple memcpy. 4233 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 4234 SharedRefLValue.getAddress(), Type); 4235 } else { 4236 // Initialize firstprivate array using element-by-element 4237 // initialization. 4238 CGF.EmitOMPAggregateAssign( 4239 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4240 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4241 Address SrcElement) { 4242 // Clean up any temporaries needed by the initialization. 4243 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4244 InitScope.addPrivate( 4245 Elem, [SrcElement]() -> Address { return SrcElement; }); 4246 (void)InitScope.Privatize(); 4247 // Emit initialization for single element. 4248 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4249 CGF, &CapturesInfo); 4250 CGF.EmitAnyExprToMem(Init, DestElement, 4251 Init->getType().getQualifiers(), 4252 /*IsInitializer=*/false); 4253 }); 4254 } 4255 } else { 4256 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4257 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4258 return SharedRefLValue.getAddress(); 4259 }); 4260 (void)InitScope.Privatize(); 4261 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4262 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4263 /*capturedByInit=*/false); 4264 } 4265 } else 4266 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4267 } 4268 ++FI; 4269 } 4270 } 4271 4272 /// Check if duplication function is required for taskloops. 4273 static bool checkInitIsRequired(CodeGenFunction &CGF, 4274 ArrayRef<PrivateDataTy> Privates) { 4275 bool InitRequired = false; 4276 for (auto &&Pair : Privates) { 4277 auto *VD = Pair.second.PrivateCopy; 4278 auto *Init = VD->getAnyInitializer(); 4279 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4280 !CGF.isTrivialInitializer(Init)); 4281 } 4282 return InitRequired; 4283 } 4284 4285 4286 /// Emit task_dup function (for initialization of 4287 /// private/firstprivate/lastprivate vars and last_iter flag) 4288 /// \code 4289 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4290 /// lastpriv) { 4291 /// // setup lastprivate flag 4292 /// task_dst->last = lastpriv; 4293 /// // could be constructor calls here... 4294 /// } 4295 /// \endcode 4296 static llvm::Value * 4297 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4298 const OMPExecutableDirective &D, 4299 QualType KmpTaskTWithPrivatesPtrQTy, 4300 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4301 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4302 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4303 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4304 auto &C = CGM.getContext(); 4305 FunctionArgList Args; 4306 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4307 KmpTaskTWithPrivatesPtrQTy, 4308 ImplicitParamDecl::Other); 4309 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4310 KmpTaskTWithPrivatesPtrQTy, 4311 ImplicitParamDecl::Other); 4312 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4313 ImplicitParamDecl::Other); 4314 Args.push_back(&DstArg); 4315 Args.push_back(&SrcArg); 4316 Args.push_back(&LastprivArg); 4317 auto &TaskDupFnInfo = 4318 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4319 auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4320 auto *TaskDup = 4321 llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, 4322 ".omp_task_dup.", &CGM.getModule()); 4323 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo); 4324 CodeGenFunction CGF(CGM); 4325 CGF.disableDebugInfo(); 4326 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args); 4327 4328 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4329 CGF.GetAddrOfLocalVar(&DstArg), 4330 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4331 // task_dst->liter = lastpriv; 4332 if (WithLastIter) { 4333 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4334 LValue Base = CGF.EmitLValueForField( 4335 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4336 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4337 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4338 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4339 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4340 } 4341 4342 // Emit initial values for private copies (if any). 4343 assert(!Privates.empty()); 4344 Address KmpTaskSharedsPtr = Address::invalid(); 4345 if (!Data.FirstprivateVars.empty()) { 4346 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4347 CGF.GetAddrOfLocalVar(&SrcArg), 4348 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4349 LValue Base = CGF.EmitLValueForField( 4350 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4351 KmpTaskSharedsPtr = Address( 4352 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4353 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4354 KmpTaskTShareds)), 4355 Loc), 4356 CGF.getNaturalTypeAlignment(SharedsTy)); 4357 } 4358 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4359 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4360 CGF.FinishFunction(); 4361 return TaskDup; 4362 } 4363 4364 /// Checks if destructor function is required to be generated. 4365 /// \return true if cleanups are required, false otherwise. 4366 static bool 4367 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4368 bool NeedsCleanup = false; 4369 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4370 auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4371 for (auto *FD : PrivateRD->fields()) { 4372 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4373 if (NeedsCleanup) 4374 break; 4375 } 4376 return NeedsCleanup; 4377 } 4378 4379 CGOpenMPRuntime::TaskResultTy 4380 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4381 const OMPExecutableDirective &D, 4382 llvm::Value *TaskFunction, QualType SharedsTy, 4383 Address Shareds, const OMPTaskDataTy &Data) { 4384 auto &C = CGM.getContext(); 4385 llvm::SmallVector<PrivateDataTy, 4> Privates; 4386 // Aggregate privates and sort them by the alignment. 4387 auto I = Data.PrivateCopies.begin(); 4388 for (auto *E : Data.PrivateVars) { 4389 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4390 Privates.push_back(std::make_pair( 4391 C.getDeclAlign(VD), 4392 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4393 /*PrivateElemInit=*/nullptr))); 4394 ++I; 4395 } 4396 I = Data.FirstprivateCopies.begin(); 4397 auto IElemInitRef = Data.FirstprivateInits.begin(); 4398 for (auto *E : Data.FirstprivateVars) { 4399 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4400 Privates.push_back(std::make_pair( 4401 C.getDeclAlign(VD), 4402 PrivateHelpersTy( 4403 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4404 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 4405 ++I; 4406 ++IElemInitRef; 4407 } 4408 I = Data.LastprivateCopies.begin(); 4409 for (auto *E : Data.LastprivateVars) { 4410 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4411 Privates.push_back(std::make_pair( 4412 C.getDeclAlign(VD), 4413 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4414 /*PrivateElemInit=*/nullptr))); 4415 ++I; 4416 } 4417 std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator); 4418 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4419 // Build type kmp_routine_entry_t (if not built yet). 4420 emitKmpRoutineEntryT(KmpInt32Ty); 4421 // Build type kmp_task_t (if not built yet). 4422 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4423 if (SavedKmpTaskloopTQTy.isNull()) { 4424 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4425 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4426 } 4427 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4428 } else { 4429 assert((D.getDirectiveKind() == OMPD_task || 4430 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4431 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4432 "Expected taskloop, task or target directive"); 4433 if (SavedKmpTaskTQTy.isNull()) { 4434 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4435 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4436 } 4437 KmpTaskTQTy = SavedKmpTaskTQTy; 4438 } 4439 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4440 // Build particular struct kmp_task_t for the given task. 4441 auto *KmpTaskTWithPrivatesQTyRD = 4442 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4443 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4444 QualType KmpTaskTWithPrivatesPtrQTy = 4445 C.getPointerType(KmpTaskTWithPrivatesQTy); 4446 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4447 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 4448 auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4449 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4450 4451 // Emit initial values for private copies (if any). 4452 llvm::Value *TaskPrivatesMap = nullptr; 4453 auto *TaskPrivatesMapTy = 4454 std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); 4455 if (!Privates.empty()) { 4456 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4457 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4458 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4459 FI->getType(), Privates); 4460 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4461 TaskPrivatesMap, TaskPrivatesMapTy); 4462 } else { 4463 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4464 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4465 } 4466 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4467 // kmp_task_t *tt); 4468 auto *TaskEntry = emitProxyTaskFunction( 4469 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4470 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4471 TaskPrivatesMap); 4472 4473 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4474 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4475 // kmp_routine_entry_t *task_entry); 4476 // Task flags. Format is taken from 4477 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 4478 // description of kmp_tasking_flags struct. 4479 enum { 4480 TiedFlag = 0x1, 4481 FinalFlag = 0x2, 4482 DestructorsFlag = 0x8, 4483 PriorityFlag = 0x20 4484 }; 4485 unsigned Flags = Data.Tied ? TiedFlag : 0; 4486 bool NeedsCleanup = false; 4487 if (!Privates.empty()) { 4488 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4489 if (NeedsCleanup) 4490 Flags = Flags | DestructorsFlag; 4491 } 4492 if (Data.Priority.getInt()) 4493 Flags = Flags | PriorityFlag; 4494 auto *TaskFlags = 4495 Data.Final.getPointer() 4496 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4497 CGF.Builder.getInt32(FinalFlag), 4498 CGF.Builder.getInt32(/*C=*/0)) 4499 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4500 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4501 auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4502 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 4503 getThreadID(CGF, Loc), TaskFlags, 4504 KmpTaskTWithPrivatesTySize, SharedsSize, 4505 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4506 TaskEntry, KmpRoutineEntryPtrTy)}; 4507 auto *NewTask = CGF.EmitRuntimeCall( 4508 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 4509 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4510 NewTask, KmpTaskTWithPrivatesPtrTy); 4511 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4512 KmpTaskTWithPrivatesQTy); 4513 LValue TDBase = 4514 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4515 // Fill the data in the resulting kmp_task_t record. 4516 // Copy shareds if there are any. 4517 Address KmpTaskSharedsPtr = Address::invalid(); 4518 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4519 KmpTaskSharedsPtr = 4520 Address(CGF.EmitLoadOfScalar( 4521 CGF.EmitLValueForField( 4522 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4523 KmpTaskTShareds)), 4524 Loc), 4525 CGF.getNaturalTypeAlignment(SharedsTy)); 4526 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 4527 } 4528 // Emit initial values for private copies (if any). 4529 TaskResultTy Result; 4530 if (!Privates.empty()) { 4531 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4532 SharedsTy, SharedsPtrTy, Data, Privates, 4533 /*ForDup=*/false); 4534 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4535 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4536 Result.TaskDupFn = emitTaskDupFunction( 4537 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4538 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4539 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4540 } 4541 } 4542 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4543 enum { Priority = 0, Destructors = 1 }; 4544 // Provide pointer to function with destructors for privates. 4545 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4546 auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl(); 4547 if (NeedsCleanup) { 4548 llvm::Value *DestructorFn = emitDestructorsFunction( 4549 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4550 KmpTaskTWithPrivatesQTy); 4551 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4552 LValue DestructorsLV = CGF.EmitLValueForField( 4553 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4554 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4555 DestructorFn, KmpRoutineEntryPtrTy), 4556 DestructorsLV); 4557 } 4558 // Set priority. 4559 if (Data.Priority.getInt()) { 4560 LValue Data2LV = CGF.EmitLValueForField( 4561 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4562 LValue PriorityLV = CGF.EmitLValueForField( 4563 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4564 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4565 } 4566 Result.NewTask = NewTask; 4567 Result.TaskEntry = TaskEntry; 4568 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4569 Result.TDBase = TDBase; 4570 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4571 return Result; 4572 } 4573 4574 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 4575 const OMPExecutableDirective &D, 4576 llvm::Value *TaskFunction, 4577 QualType SharedsTy, Address Shareds, 4578 const Expr *IfCond, 4579 const OMPTaskDataTy &Data) { 4580 if (!CGF.HaveInsertPoint()) 4581 return; 4582 4583 TaskResultTy Result = 4584 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4585 llvm::Value *NewTask = Result.NewTask; 4586 llvm::Value *TaskEntry = Result.TaskEntry; 4587 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 4588 LValue TDBase = Result.TDBase; 4589 RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 4590 auto &C = CGM.getContext(); 4591 // Process list of dependences. 4592 Address DependenciesArray = Address::invalid(); 4593 unsigned NumDependencies = Data.Dependences.size(); 4594 if (NumDependencies) { 4595 // Dependence kind for RTL. 4596 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 4597 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4598 RecordDecl *KmpDependInfoRD; 4599 QualType FlagsTy = 4600 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4601 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4602 if (KmpDependInfoTy.isNull()) { 4603 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4604 KmpDependInfoRD->startDefinition(); 4605 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4606 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4607 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4608 KmpDependInfoRD->completeDefinition(); 4609 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4610 } else 4611 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4612 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 4613 // Define type kmp_depend_info[<Dependences.size()>]; 4614 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4615 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 4616 ArrayType::Normal, /*IndexTypeQuals=*/0); 4617 // kmp_depend_info[<Dependences.size()>] deps; 4618 DependenciesArray = 4619 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4620 for (unsigned i = 0; i < NumDependencies; ++i) { 4621 const Expr *E = Data.Dependences[i].second; 4622 auto Addr = CGF.EmitLValue(E); 4623 llvm::Value *Size; 4624 QualType Ty = E->getType(); 4625 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4626 LValue UpAddrLVal = 4627 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 4628 llvm::Value *UpAddr = 4629 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 4630 llvm::Value *LowIntPtr = 4631 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 4632 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 4633 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4634 } else 4635 Size = CGF.getTypeSize(Ty); 4636 auto Base = CGF.MakeAddrLValue( 4637 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 4638 KmpDependInfoTy); 4639 // deps[i].base_addr = &<Dependences[i].second>; 4640 auto BaseAddrLVal = CGF.EmitLValueForField( 4641 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4642 CGF.EmitStoreOfScalar( 4643 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 4644 BaseAddrLVal); 4645 // deps[i].len = sizeof(<Dependences[i].second>); 4646 auto LenLVal = CGF.EmitLValueForField( 4647 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4648 CGF.EmitStoreOfScalar(Size, LenLVal); 4649 // deps[i].flags = <Dependences[i].first>; 4650 RTLDependenceKindTy DepKind; 4651 switch (Data.Dependences[i].first) { 4652 case OMPC_DEPEND_in: 4653 DepKind = DepIn; 4654 break; 4655 // Out and InOut dependencies must use the same code. 4656 case OMPC_DEPEND_out: 4657 case OMPC_DEPEND_inout: 4658 DepKind = DepInOut; 4659 break; 4660 case OMPC_DEPEND_source: 4661 case OMPC_DEPEND_sink: 4662 case OMPC_DEPEND_unknown: 4663 llvm_unreachable("Unknown task dependence type"); 4664 } 4665 auto FlagsLVal = CGF.EmitLValueForField( 4666 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4667 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4668 FlagsLVal); 4669 } 4670 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4671 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 4672 CGF.VoidPtrTy); 4673 } 4674 4675 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 4676 // libcall. 4677 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 4678 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 4679 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 4680 // list is not empty 4681 auto *ThreadID = getThreadID(CGF, Loc); 4682 auto *UpLoc = emitUpdateLocation(CGF, Loc); 4683 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 4684 llvm::Value *DepTaskArgs[7]; 4685 if (NumDependencies) { 4686 DepTaskArgs[0] = UpLoc; 4687 DepTaskArgs[1] = ThreadID; 4688 DepTaskArgs[2] = NewTask; 4689 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 4690 DepTaskArgs[4] = DependenciesArray.getPointer(); 4691 DepTaskArgs[5] = CGF.Builder.getInt32(0); 4692 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4693 } 4694 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 4695 &TaskArgs, 4696 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 4697 if (!Data.Tied) { 4698 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4699 auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 4700 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4701 } 4702 if (NumDependencies) { 4703 CGF.EmitRuntimeCall( 4704 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 4705 } else { 4706 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 4707 TaskArgs); 4708 } 4709 // Check if parent region is untied and build return for untied task; 4710 if (auto *Region = 4711 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4712 Region->emitUntiedSwitch(CGF); 4713 }; 4714 4715 llvm::Value *DepWaitTaskArgs[6]; 4716 if (NumDependencies) { 4717 DepWaitTaskArgs[0] = UpLoc; 4718 DepWaitTaskArgs[1] = ThreadID; 4719 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 4720 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 4721 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4722 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4723 } 4724 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 4725 NumDependencies, &DepWaitTaskArgs, 4726 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 4727 auto &RT = CGF.CGM.getOpenMPRuntime(); 4728 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4729 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 4730 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 4731 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 4732 // is specified. 4733 if (NumDependencies) 4734 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 4735 DepWaitTaskArgs); 4736 // Call proxy_task_entry(gtid, new_task); 4737 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 4738 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 4739 Action.Enter(CGF); 4740 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 4741 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 4742 OutlinedFnArgs); 4743 }; 4744 4745 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 4746 // kmp_task_t *new_task); 4747 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 4748 // kmp_task_t *new_task); 4749 RegionCodeGenTy RCG(CodeGen); 4750 CommonActionTy Action( 4751 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 4752 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 4753 RCG.setAction(Action); 4754 RCG(CGF); 4755 }; 4756 4757 if (IfCond) 4758 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 4759 else { 4760 RegionCodeGenTy ThenRCG(ThenCodeGen); 4761 ThenRCG(CGF); 4762 } 4763 } 4764 4765 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 4766 const OMPLoopDirective &D, 4767 llvm::Value *TaskFunction, 4768 QualType SharedsTy, Address Shareds, 4769 const Expr *IfCond, 4770 const OMPTaskDataTy &Data) { 4771 if (!CGF.HaveInsertPoint()) 4772 return; 4773 TaskResultTy Result = 4774 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4775 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 4776 // libcall. 4777 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 4778 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 4779 // sched, kmp_uint64 grainsize, void *task_dup); 4780 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4781 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4782 llvm::Value *IfVal; 4783 if (IfCond) { 4784 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 4785 /*isSigned=*/true); 4786 } else 4787 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 4788 4789 LValue LBLVal = CGF.EmitLValueForField( 4790 Result.TDBase, 4791 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 4792 auto *LBVar = 4793 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 4794 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 4795 /*IsInitializer=*/true); 4796 LValue UBLVal = CGF.EmitLValueForField( 4797 Result.TDBase, 4798 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 4799 auto *UBVar = 4800 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 4801 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 4802 /*IsInitializer=*/true); 4803 LValue StLVal = CGF.EmitLValueForField( 4804 Result.TDBase, 4805 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 4806 auto *StVar = 4807 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 4808 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 4809 /*IsInitializer=*/true); 4810 // Store reductions address. 4811 LValue RedLVal = CGF.EmitLValueForField( 4812 Result.TDBase, 4813 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 4814 if (Data.Reductions) 4815 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 4816 else { 4817 CGF.EmitNullInitialization(RedLVal.getAddress(), 4818 CGF.getContext().VoidPtrTy); 4819 } 4820 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 4821 llvm::Value *TaskArgs[] = { 4822 UpLoc, 4823 ThreadID, 4824 Result.NewTask, 4825 IfVal, 4826 LBLVal.getPointer(), 4827 UBLVal.getPointer(), 4828 CGF.EmitLoadOfScalar(StLVal, SourceLocation()), 4829 llvm::ConstantInt::getNullValue( 4830 CGF.IntTy), // Always 0 because taskgroup emitted by the compiler 4831 llvm::ConstantInt::getSigned( 4832 CGF.IntTy, Data.Schedule.getPointer() 4833 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4834 : NoSchedule), 4835 Data.Schedule.getPointer() 4836 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4837 /*isSigned=*/false) 4838 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 4839 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4840 Result.TaskDupFn, CGF.VoidPtrTy) 4841 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 4842 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 4843 } 4844 4845 /// \brief Emit reduction operation for each element of array (required for 4846 /// array sections) LHS op = RHS. 4847 /// \param Type Type of array. 4848 /// \param LHSVar Variable on the left side of the reduction operation 4849 /// (references element of array in original variable). 4850 /// \param RHSVar Variable on the right side of the reduction operation 4851 /// (references element of array in original variable). 4852 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4853 /// RHSVar. 4854 static void EmitOMPAggregateReduction( 4855 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4856 const VarDecl *RHSVar, 4857 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4858 const Expr *, const Expr *)> &RedOpGen, 4859 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4860 const Expr *UpExpr = nullptr) { 4861 // Perform element-by-element initialization. 4862 QualType ElementTy; 4863 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4864 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4865 4866 // Drill down to the base element type on both arrays. 4867 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 4868 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4869 4870 auto RHSBegin = RHSAddr.getPointer(); 4871 auto LHSBegin = LHSAddr.getPointer(); 4872 // Cast from pointer to array type to pointer to single element. 4873 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 4874 // The basic structure here is a while-do loop. 4875 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4876 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4877 auto IsEmpty = 4878 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4879 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4880 4881 // Enter the loop body, making that address the current address. 4882 auto EntryBB = CGF.Builder.GetInsertBlock(); 4883 CGF.EmitBlock(BodyBB); 4884 4885 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4886 4887 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4888 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4889 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4890 Address RHSElementCurrent = 4891 Address(RHSElementPHI, 4892 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4893 4894 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4895 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4896 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4897 Address LHSElementCurrent = 4898 Address(LHSElementPHI, 4899 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4900 4901 // Emit copy. 4902 CodeGenFunction::OMPPrivateScope Scope(CGF); 4903 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 4904 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 4905 Scope.Privatize(); 4906 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4907 Scope.ForceCleanup(); 4908 4909 // Shift the address forward by one element. 4910 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4911 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 4912 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4913 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 4914 // Check whether we've reached the end. 4915 auto Done = 4916 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4917 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4918 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4919 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4920 4921 // Done. 4922 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4923 } 4924 4925 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4926 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4927 /// UDR combiner function. 4928 static void emitReductionCombiner(CodeGenFunction &CGF, 4929 const Expr *ReductionOp) { 4930 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4931 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4932 if (auto *DRE = 4933 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4934 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4935 std::pair<llvm::Function *, llvm::Function *> Reduction = 4936 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4937 RValue Func = RValue::get(Reduction.first); 4938 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 4939 CGF.EmitIgnoredExpr(ReductionOp); 4940 return; 4941 } 4942 CGF.EmitIgnoredExpr(ReductionOp); 4943 } 4944 4945 llvm::Value *CGOpenMPRuntime::emitReductionFunction( 4946 CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 4947 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 4948 ArrayRef<const Expr *> ReductionOps) { 4949 auto &C = CGM.getContext(); 4950 4951 // void reduction_func(void *LHSArg, void *RHSArg); 4952 FunctionArgList Args; 4953 ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); 4954 ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); 4955 Args.push_back(&LHSArg); 4956 Args.push_back(&RHSArg); 4957 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4958 auto *Fn = llvm::Function::Create( 4959 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 4960 ".omp.reduction.reduction_func", &CGM.getModule()); 4961 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 4962 CodeGenFunction CGF(CGM); 4963 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 4964 4965 // Dst = (void*[n])(LHSArg); 4966 // Src = (void*[n])(RHSArg); 4967 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4968 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 4969 ArgsType), CGF.getPointerAlign()); 4970 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4971 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 4972 ArgsType), CGF.getPointerAlign()); 4973 4974 // ... 4975 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 4976 // ... 4977 CodeGenFunction::OMPPrivateScope Scope(CGF); 4978 auto IPriv = Privates.begin(); 4979 unsigned Idx = 0; 4980 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 4981 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 4982 Scope.addPrivate(RHSVar, [&]() -> Address { 4983 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 4984 }); 4985 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 4986 Scope.addPrivate(LHSVar, [&]() -> Address { 4987 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 4988 }); 4989 QualType PrivTy = (*IPriv)->getType(); 4990 if (PrivTy->isVariablyModifiedType()) { 4991 // Get array size and emit VLA type. 4992 ++Idx; 4993 Address Elem = 4994 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 4995 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 4996 auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); 4997 auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 4998 CodeGenFunction::OpaqueValueMapping OpaqueMap( 4999 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5000 CGF.EmitVariablyModifiedType(PrivTy); 5001 } 5002 } 5003 Scope.Privatize(); 5004 IPriv = Privates.begin(); 5005 auto ILHS = LHSExprs.begin(); 5006 auto IRHS = RHSExprs.begin(); 5007 for (auto *E : ReductionOps) { 5008 if ((*IPriv)->getType()->isArrayType()) { 5009 // Emit reduction for array section. 5010 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5011 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5012 EmitOMPAggregateReduction( 5013 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5014 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5015 emitReductionCombiner(CGF, E); 5016 }); 5017 } else 5018 // Emit reduction for array subscript or single variable. 5019 emitReductionCombiner(CGF, E); 5020 ++IPriv; 5021 ++ILHS; 5022 ++IRHS; 5023 } 5024 Scope.ForceCleanup(); 5025 CGF.FinishFunction(); 5026 return Fn; 5027 } 5028 5029 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5030 const Expr *ReductionOp, 5031 const Expr *PrivateRef, 5032 const DeclRefExpr *LHS, 5033 const DeclRefExpr *RHS) { 5034 if (PrivateRef->getType()->isArrayType()) { 5035 // Emit reduction for array section. 5036 auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5037 auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5038 EmitOMPAggregateReduction( 5039 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5040 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5041 emitReductionCombiner(CGF, ReductionOp); 5042 }); 5043 } else 5044 // Emit reduction for array subscript or single variable. 5045 emitReductionCombiner(CGF, ReductionOp); 5046 } 5047 5048 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5049 ArrayRef<const Expr *> Privates, 5050 ArrayRef<const Expr *> LHSExprs, 5051 ArrayRef<const Expr *> RHSExprs, 5052 ArrayRef<const Expr *> ReductionOps, 5053 ReductionOptionsTy Options) { 5054 if (!CGF.HaveInsertPoint()) 5055 return; 5056 5057 bool WithNowait = Options.WithNowait; 5058 bool SimpleReduction = Options.SimpleReduction; 5059 5060 // Next code should be emitted for reduction: 5061 // 5062 // static kmp_critical_name lock = { 0 }; 5063 // 5064 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5065 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5066 // ... 5067 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5068 // *(Type<n>-1*)rhs[<n>-1]); 5069 // } 5070 // 5071 // ... 5072 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5073 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5074 // RedList, reduce_func, &<lock>)) { 5075 // case 1: 5076 // ... 5077 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5078 // ... 5079 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5080 // break; 5081 // case 2: 5082 // ... 5083 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5084 // ... 5085 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5086 // break; 5087 // default:; 5088 // } 5089 // 5090 // if SimpleReduction is true, only the next code is generated: 5091 // ... 5092 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5093 // ... 5094 5095 auto &C = CGM.getContext(); 5096 5097 if (SimpleReduction) { 5098 CodeGenFunction::RunCleanupsScope Scope(CGF); 5099 auto IPriv = Privates.begin(); 5100 auto ILHS = LHSExprs.begin(); 5101 auto IRHS = RHSExprs.begin(); 5102 for (auto *E : ReductionOps) { 5103 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5104 cast<DeclRefExpr>(*IRHS)); 5105 ++IPriv; 5106 ++ILHS; 5107 ++IRHS; 5108 } 5109 return; 5110 } 5111 5112 // 1. Build a list of reduction variables. 5113 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5114 auto Size = RHSExprs.size(); 5115 for (auto *E : Privates) { 5116 if (E->getType()->isVariablyModifiedType()) 5117 // Reserve place for array size. 5118 ++Size; 5119 } 5120 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5121 QualType ReductionArrayTy = 5122 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5123 /*IndexTypeQuals=*/0); 5124 Address ReductionList = 5125 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5126 auto IPriv = Privates.begin(); 5127 unsigned Idx = 0; 5128 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5129 Address Elem = 5130 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 5131 CGF.Builder.CreateStore( 5132 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5133 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5134 Elem); 5135 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5136 // Store array size. 5137 ++Idx; 5138 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 5139 CGF.getPointerSize()); 5140 llvm::Value *Size = CGF.Builder.CreateIntCast( 5141 CGF.getVLASize( 5142 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5143 .first, 5144 CGF.SizeTy, /*isSigned=*/false); 5145 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5146 Elem); 5147 } 5148 } 5149 5150 // 2. Emit reduce_func(). 5151 auto *ReductionFn = emitReductionFunction( 5152 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5153 LHSExprs, RHSExprs, ReductionOps); 5154 5155 // 3. Create static kmp_critical_name lock = { 0 }; 5156 auto *Lock = getCriticalRegionLock(".reduction"); 5157 5158 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5159 // RedList, reduce_func, &<lock>); 5160 auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5161 auto *ThreadId = getThreadID(CGF, Loc); 5162 auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5163 auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5164 ReductionList.getPointer(), CGF.VoidPtrTy); 5165 llvm::Value *Args[] = { 5166 IdentTLoc, // ident_t *<loc> 5167 ThreadId, // i32 <gtid> 5168 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5169 ReductionArrayTySize, // size_type sizeof(RedList) 5170 RL, // void *RedList 5171 ReductionFn, // void (*) (void *, void *) <reduce_func> 5172 Lock // kmp_critical_name *&<lock> 5173 }; 5174 auto Res = CGF.EmitRuntimeCall( 5175 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5176 : OMPRTL__kmpc_reduce), 5177 Args); 5178 5179 // 5. Build switch(res) 5180 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5181 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5182 5183 // 6. Build case 1: 5184 // ... 5185 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5186 // ... 5187 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5188 // break; 5189 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5190 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5191 CGF.EmitBlock(Case1BB); 5192 5193 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5194 llvm::Value *EndArgs[] = { 5195 IdentTLoc, // ident_t *<loc> 5196 ThreadId, // i32 <gtid> 5197 Lock // kmp_critical_name *&<lock> 5198 }; 5199 auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 5200 CodeGenFunction &CGF, PrePostActionTy &Action) { 5201 auto &RT = CGF.CGM.getOpenMPRuntime(); 5202 auto IPriv = Privates.begin(); 5203 auto ILHS = LHSExprs.begin(); 5204 auto IRHS = RHSExprs.begin(); 5205 for (auto *E : ReductionOps) { 5206 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5207 cast<DeclRefExpr>(*IRHS)); 5208 ++IPriv; 5209 ++ILHS; 5210 ++IRHS; 5211 } 5212 }; 5213 RegionCodeGenTy RCG(CodeGen); 5214 CommonActionTy Action( 5215 nullptr, llvm::None, 5216 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5217 : OMPRTL__kmpc_end_reduce), 5218 EndArgs); 5219 RCG.setAction(Action); 5220 RCG(CGF); 5221 5222 CGF.EmitBranch(DefaultBB); 5223 5224 // 7. Build case 2: 5225 // ... 5226 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5227 // ... 5228 // break; 5229 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5230 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5231 CGF.EmitBlock(Case2BB); 5232 5233 auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 5234 CodeGenFunction &CGF, PrePostActionTy &Action) { 5235 auto ILHS = LHSExprs.begin(); 5236 auto IRHS = RHSExprs.begin(); 5237 auto IPriv = Privates.begin(); 5238 for (auto *E : ReductionOps) { 5239 const Expr *XExpr = nullptr; 5240 const Expr *EExpr = nullptr; 5241 const Expr *UpExpr = nullptr; 5242 BinaryOperatorKind BO = BO_Comma; 5243 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 5244 if (BO->getOpcode() == BO_Assign) { 5245 XExpr = BO->getLHS(); 5246 UpExpr = BO->getRHS(); 5247 } 5248 } 5249 // Try to emit update expression as a simple atomic. 5250 auto *RHSExpr = UpExpr; 5251 if (RHSExpr) { 5252 // Analyze RHS part of the whole expression. 5253 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 5254 RHSExpr->IgnoreParenImpCasts())) { 5255 // If this is a conditional operator, analyze its condition for 5256 // min/max reduction operator. 5257 RHSExpr = ACO->getCond(); 5258 } 5259 if (auto *BORHS = 5260 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5261 EExpr = BORHS->getRHS(); 5262 BO = BORHS->getOpcode(); 5263 } 5264 } 5265 if (XExpr) { 5266 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5267 auto &&AtomicRedGen = [BO, VD, 5268 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5269 const Expr *EExpr, const Expr *UpExpr) { 5270 LValue X = CGF.EmitLValue(XExpr); 5271 RValue E; 5272 if (EExpr) 5273 E = CGF.EmitAnyExpr(EExpr); 5274 CGF.EmitOMPAtomicSimpleUpdateExpr( 5275 X, E, BO, /*IsXLHSInRHSPart=*/true, 5276 llvm::AtomicOrdering::Monotonic, Loc, 5277 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5278 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5279 PrivateScope.addPrivate( 5280 VD, [&CGF, VD, XRValue, Loc]() -> Address { 5281 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5282 CGF.emitOMPSimpleStore( 5283 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5284 VD->getType().getNonReferenceType(), Loc); 5285 return LHSTemp; 5286 }); 5287 (void)PrivateScope.Privatize(); 5288 return CGF.EmitAnyExpr(UpExpr); 5289 }); 5290 }; 5291 if ((*IPriv)->getType()->isArrayType()) { 5292 // Emit atomic reduction for array section. 5293 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5294 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5295 AtomicRedGen, XExpr, EExpr, UpExpr); 5296 } else 5297 // Emit atomic reduction for array subscript or single variable. 5298 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5299 } else { 5300 // Emit as a critical region. 5301 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5302 const Expr *, const Expr *) { 5303 auto &RT = CGF.CGM.getOpenMPRuntime(); 5304 RT.emitCriticalRegion( 5305 CGF, ".atomic_reduction", 5306 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5307 Action.Enter(CGF); 5308 emitReductionCombiner(CGF, E); 5309 }, 5310 Loc); 5311 }; 5312 if ((*IPriv)->getType()->isArrayType()) { 5313 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5314 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5315 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5316 CritRedGen); 5317 } else 5318 CritRedGen(CGF, nullptr, nullptr, nullptr); 5319 } 5320 ++ILHS; 5321 ++IRHS; 5322 ++IPriv; 5323 } 5324 }; 5325 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5326 if (!WithNowait) { 5327 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5328 llvm::Value *EndArgs[] = { 5329 IdentTLoc, // ident_t *<loc> 5330 ThreadId, // i32 <gtid> 5331 Lock // kmp_critical_name *&<lock> 5332 }; 5333 CommonActionTy Action(nullptr, llvm::None, 5334 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5335 EndArgs); 5336 AtomicRCG.setAction(Action); 5337 AtomicRCG(CGF); 5338 } else 5339 AtomicRCG(CGF); 5340 5341 CGF.EmitBranch(DefaultBB); 5342 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5343 } 5344 5345 /// Generates unique name for artificial threadprivate variables. 5346 /// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N> 5347 static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc, 5348 unsigned N) { 5349 SmallString<256> Buffer; 5350 llvm::raw_svector_ostream Out(Buffer); 5351 Out << Prefix << "." << Loc.getRawEncoding() << "_" << N; 5352 return Out.str(); 5353 } 5354 5355 /// Emits reduction initializer function: 5356 /// \code 5357 /// void @.red_init(void* %arg) { 5358 /// %0 = bitcast void* %arg to <type>* 5359 /// store <type> <init>, <type>* %0 5360 /// ret void 5361 /// } 5362 /// \endcode 5363 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5364 SourceLocation Loc, 5365 ReductionCodeGen &RCG, unsigned N) { 5366 auto &C = CGM.getContext(); 5367 FunctionArgList Args; 5368 ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); 5369 Args.emplace_back(&Param); 5370 auto &FnInfo = 5371 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5372 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5373 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5374 ".red_init.", &CGM.getModule()); 5375 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 5376 CodeGenFunction CGF(CGM); 5377 CGF.disableDebugInfo(); 5378 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 5379 Address PrivateAddr = CGF.EmitLoadOfPointer( 5380 CGF.GetAddrOfLocalVar(&Param), 5381 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5382 llvm::Value *Size = nullptr; 5383 // If the size of the reduction item is non-constant, load it from global 5384 // threadprivate variable. 5385 if (RCG.getSizes(N).second) { 5386 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5387 CGF, CGM.getContext().getSizeType(), 5388 generateUniqueName("reduction_size", Loc, N)); 5389 Size = 5390 CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5391 CGM.getContext().getSizeType(), SourceLocation()); 5392 } 5393 RCG.emitAggregateType(CGF, N, Size); 5394 LValue SharedLVal; 5395 // If initializer uses initializer from declare reduction construct, emit a 5396 // pointer to the address of the original reduction item (reuired by reduction 5397 // initializer) 5398 if (RCG.usesReductionInitializer(N)) { 5399 Address SharedAddr = 5400 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5401 CGF, CGM.getContext().VoidPtrTy, 5402 generateUniqueName("reduction", Loc, N)); 5403 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5404 } else { 5405 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 5406 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5407 CGM.getContext().VoidPtrTy); 5408 } 5409 // Emit the initializer: 5410 // %0 = bitcast void* %arg to <type>* 5411 // store <type> <init>, <type>* %0 5412 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 5413 [](CodeGenFunction &) { return false; }); 5414 CGF.FinishFunction(); 5415 return Fn; 5416 } 5417 5418 /// Emits reduction combiner function: 5419 /// \code 5420 /// void @.red_comb(void* %arg0, void* %arg1) { 5421 /// %lhs = bitcast void* %arg0 to <type>* 5422 /// %rhs = bitcast void* %arg1 to <type>* 5423 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5424 /// store <type> %2, <type>* %lhs 5425 /// ret void 5426 /// } 5427 /// \endcode 5428 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5429 SourceLocation Loc, 5430 ReductionCodeGen &RCG, unsigned N, 5431 const Expr *ReductionOp, 5432 const Expr *LHS, const Expr *RHS, 5433 const Expr *PrivateRef) { 5434 auto &C = CGM.getContext(); 5435 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5436 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5437 FunctionArgList Args; 5438 ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other); 5439 ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other); 5440 Args.emplace_back(&ParamInOut); 5441 Args.emplace_back(&ParamIn); 5442 auto &FnInfo = 5443 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5444 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5445 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5446 ".red_comb.", &CGM.getModule()); 5447 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 5448 CodeGenFunction CGF(CGM); 5449 CGF.disableDebugInfo(); 5450 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 5451 llvm::Value *Size = nullptr; 5452 // If the size of the reduction item is non-constant, load it from global 5453 // threadprivate variable. 5454 if (RCG.getSizes(N).second) { 5455 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5456 CGF, CGM.getContext().getSizeType(), 5457 generateUniqueName("reduction_size", Loc, N)); 5458 Size = 5459 CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5460 CGM.getContext().getSizeType(), SourceLocation()); 5461 } 5462 RCG.emitAggregateType(CGF, N, Size); 5463 // Remap lhs and rhs variables to the addresses of the function arguments. 5464 // %lhs = bitcast void* %arg0 to <type>* 5465 // %rhs = bitcast void* %arg1 to <type>* 5466 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5467 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address { 5468 // Pull out the pointer to the variable. 5469 Address PtrAddr = CGF.EmitLoadOfPointer( 5470 CGF.GetAddrOfLocalVar(&ParamInOut), 5471 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5472 return CGF.Builder.CreateElementBitCast( 5473 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5474 }); 5475 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address { 5476 // Pull out the pointer to the variable. 5477 Address PtrAddr = CGF.EmitLoadOfPointer( 5478 CGF.GetAddrOfLocalVar(&ParamIn), 5479 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5480 return CGF.Builder.CreateElementBitCast( 5481 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5482 }); 5483 PrivateScope.Privatize(); 5484 // Emit the combiner body: 5485 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5486 // store <type> %2, <type>* %lhs 5487 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5488 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5489 cast<DeclRefExpr>(RHS)); 5490 CGF.FinishFunction(); 5491 return Fn; 5492 } 5493 5494 /// Emits reduction finalizer function: 5495 /// \code 5496 /// void @.red_fini(void* %arg) { 5497 /// %0 = bitcast void* %arg to <type>* 5498 /// <destroy>(<type>* %0) 5499 /// ret void 5500 /// } 5501 /// \endcode 5502 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5503 SourceLocation Loc, 5504 ReductionCodeGen &RCG, unsigned N) { 5505 if (!RCG.needCleanups(N)) 5506 return nullptr; 5507 auto &C = CGM.getContext(); 5508 FunctionArgList Args; 5509 ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); 5510 Args.emplace_back(&Param); 5511 auto &FnInfo = 5512 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5513 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5514 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5515 ".red_fini.", &CGM.getModule()); 5516 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 5517 CodeGenFunction CGF(CGM); 5518 CGF.disableDebugInfo(); 5519 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 5520 Address PrivateAddr = CGF.EmitLoadOfPointer( 5521 CGF.GetAddrOfLocalVar(&Param), 5522 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5523 llvm::Value *Size = nullptr; 5524 // If the size of the reduction item is non-constant, load it from global 5525 // threadprivate variable. 5526 if (RCG.getSizes(N).second) { 5527 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5528 CGF, CGM.getContext().getSizeType(), 5529 generateUniqueName("reduction_size", Loc, N)); 5530 Size = 5531 CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5532 CGM.getContext().getSizeType(), SourceLocation()); 5533 } 5534 RCG.emitAggregateType(CGF, N, Size); 5535 // Emit the finalizer body: 5536 // <destroy>(<type>* %0) 5537 RCG.emitCleanups(CGF, N, PrivateAddr); 5538 CGF.FinishFunction(); 5539 return Fn; 5540 } 5541 5542 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5543 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5544 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5545 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5546 return nullptr; 5547 5548 // Build typedef struct: 5549 // kmp_task_red_input { 5550 // void *reduce_shar; // shared reduction item 5551 // size_t reduce_size; // size of data item 5552 // void *reduce_init; // data initialization routine 5553 // void *reduce_fini; // data finalization routine 5554 // void *reduce_comb; // data combiner routine 5555 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5556 // } kmp_task_red_input_t; 5557 ASTContext &C = CGM.getContext(); 5558 auto *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 5559 RD->startDefinition(); 5560 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5561 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5562 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5563 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5564 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5565 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5566 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5567 RD->completeDefinition(); 5568 QualType RDType = C.getRecordType(RD); 5569 unsigned Size = Data.ReductionVars.size(); 5570 llvm::APInt ArraySize(/*numBits=*/64, Size); 5571 QualType ArrayRDType = C.getConstantArrayType( 5572 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 5573 // kmp_task_red_input_t .rd_input.[Size]; 5574 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5575 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 5576 Data.ReductionOps); 5577 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5578 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5579 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5580 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5581 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5582 TaskRedInput.getPointer(), Idxs, 5583 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5584 ".rd_input.gep."); 5585 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 5586 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5587 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5588 RCG.emitSharedLValue(CGF, Cnt); 5589 llvm::Value *CastedShared = 5590 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 5591 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 5592 RCG.emitAggregateType(CGF, Cnt); 5593 llvm::Value *SizeValInChars; 5594 llvm::Value *SizeVal; 5595 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 5596 // We use delayed creation/initialization for VLAs, array sections and 5597 // custom reduction initializations. It is required because runtime does not 5598 // provide the way to pass the sizes of VLAs/array sections to 5599 // initializer/combiner/finalizer functions and does not pass the pointer to 5600 // original reduction item to the initializer. Instead threadprivate global 5601 // variables are used to store these values and use them in the functions. 5602 bool DelayedCreation = !!SizeVal; 5603 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 5604 /*isSigned=*/false); 5605 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 5606 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 5607 // ElemLVal.reduce_init = init; 5608 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 5609 llvm::Value *InitAddr = 5610 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 5611 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 5612 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 5613 // ElemLVal.reduce_fini = fini; 5614 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 5615 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 5616 llvm::Value *FiniAddr = Fini 5617 ? CGF.EmitCastToVoidPtr(Fini) 5618 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 5619 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 5620 // ElemLVal.reduce_comb = comb; 5621 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 5622 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 5623 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 5624 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 5625 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 5626 // ElemLVal.flags = 0; 5627 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 5628 if (DelayedCreation) { 5629 CGF.EmitStoreOfScalar( 5630 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), 5631 FlagsLVal); 5632 } else 5633 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 5634 } 5635 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 5636 // *data); 5637 llvm::Value *Args[] = { 5638 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 5639 /*isSigned=*/true), 5640 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5641 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 5642 CGM.VoidPtrTy)}; 5643 return CGF.EmitRuntimeCall( 5644 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 5645 } 5646 5647 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 5648 SourceLocation Loc, 5649 ReductionCodeGen &RCG, 5650 unsigned N) { 5651 auto Sizes = RCG.getSizes(N); 5652 // Emit threadprivate global variable if the type is non-constant 5653 // (Sizes.second = nullptr). 5654 if (Sizes.second) { 5655 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 5656 /*isSigned=*/false); 5657 Address SizeAddr = getAddrOfArtificialThreadPrivate( 5658 CGF, CGM.getContext().getSizeType(), 5659 generateUniqueName("reduction_size", Loc, N)); 5660 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 5661 } 5662 // Store address of the original reduction item if custom initializer is used. 5663 if (RCG.usesReductionInitializer(N)) { 5664 Address SharedAddr = getAddrOfArtificialThreadPrivate( 5665 CGF, CGM.getContext().VoidPtrTy, 5666 generateUniqueName("reduction", Loc, N)); 5667 CGF.Builder.CreateStore( 5668 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5669 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 5670 SharedAddr, /*IsVolatile=*/false); 5671 } 5672 } 5673 5674 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 5675 SourceLocation Loc, 5676 llvm::Value *ReductionsPtr, 5677 LValue SharedLVal) { 5678 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 5679 // *d); 5680 llvm::Value *Args[] = { 5681 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 5682 /*isSigned=*/true), 5683 ReductionsPtr, 5684 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 5685 CGM.VoidPtrTy)}; 5686 return Address( 5687 CGF.EmitRuntimeCall( 5688 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 5689 SharedLVal.getAlignment()); 5690 } 5691 5692 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 5693 SourceLocation Loc) { 5694 if (!CGF.HaveInsertPoint()) 5695 return; 5696 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 5697 // global_tid); 5698 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 5699 // Ignore return result until untied tasks are supported. 5700 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 5701 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5702 Region->emitUntiedSwitch(CGF); 5703 } 5704 5705 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 5706 OpenMPDirectiveKind InnerKind, 5707 const RegionCodeGenTy &CodeGen, 5708 bool HasCancel) { 5709 if (!CGF.HaveInsertPoint()) 5710 return; 5711 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 5712 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 5713 } 5714 5715 namespace { 5716 enum RTCancelKind { 5717 CancelNoreq = 0, 5718 CancelParallel = 1, 5719 CancelLoop = 2, 5720 CancelSections = 3, 5721 CancelTaskgroup = 4 5722 }; 5723 } // anonymous namespace 5724 5725 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 5726 RTCancelKind CancelKind = CancelNoreq; 5727 if (CancelRegion == OMPD_parallel) 5728 CancelKind = CancelParallel; 5729 else if (CancelRegion == OMPD_for) 5730 CancelKind = CancelLoop; 5731 else if (CancelRegion == OMPD_sections) 5732 CancelKind = CancelSections; 5733 else { 5734 assert(CancelRegion == OMPD_taskgroup); 5735 CancelKind = CancelTaskgroup; 5736 } 5737 return CancelKind; 5738 } 5739 5740 void CGOpenMPRuntime::emitCancellationPointCall( 5741 CodeGenFunction &CGF, SourceLocation Loc, 5742 OpenMPDirectiveKind CancelRegion) { 5743 if (!CGF.HaveInsertPoint()) 5744 return; 5745 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 5746 // global_tid, kmp_int32 cncl_kind); 5747 if (auto *OMPRegionInfo = 5748 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5749 // For 'cancellation point taskgroup', the task region info may not have a 5750 // cancel. This may instead happen in another adjacent task. 5751 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 5752 llvm::Value *Args[] = { 5753 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 5754 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5755 // Ignore return result until untied tasks are supported. 5756 auto *Result = CGF.EmitRuntimeCall( 5757 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 5758 // if (__kmpc_cancellationpoint()) { 5759 // exit from construct; 5760 // } 5761 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5762 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 5763 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 5764 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5765 CGF.EmitBlock(ExitBB); 5766 // exit from construct; 5767 auto CancelDest = 5768 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5769 CGF.EmitBranchThroughCleanup(CancelDest); 5770 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5771 } 5772 } 5773 } 5774 5775 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 5776 const Expr *IfCond, 5777 OpenMPDirectiveKind CancelRegion) { 5778 if (!CGF.HaveInsertPoint()) 5779 return; 5780 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 5781 // kmp_int32 cncl_kind); 5782 if (auto *OMPRegionInfo = 5783 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5784 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 5785 PrePostActionTy &) { 5786 auto &RT = CGF.CGM.getOpenMPRuntime(); 5787 llvm::Value *Args[] = { 5788 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 5789 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5790 // Ignore return result until untied tasks are supported. 5791 auto *Result = CGF.EmitRuntimeCall( 5792 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 5793 // if (__kmpc_cancel()) { 5794 // exit from construct; 5795 // } 5796 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5797 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 5798 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 5799 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5800 CGF.EmitBlock(ExitBB); 5801 // exit from construct; 5802 auto CancelDest = 5803 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5804 CGF.EmitBranchThroughCleanup(CancelDest); 5805 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5806 }; 5807 if (IfCond) 5808 emitOMPIfClause(CGF, IfCond, ThenGen, 5809 [](CodeGenFunction &, PrePostActionTy &) {}); 5810 else { 5811 RegionCodeGenTy ThenRCG(ThenGen); 5812 ThenRCG(CGF); 5813 } 5814 } 5815 } 5816 5817 /// \brief Obtain information that uniquely identifies a target entry. This 5818 /// consists of the file and device IDs as well as line number associated with 5819 /// the relevant entry source location. 5820 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 5821 unsigned &DeviceID, unsigned &FileID, 5822 unsigned &LineNum) { 5823 5824 auto &SM = C.getSourceManager(); 5825 5826 // The loc should be always valid and have a file ID (the user cannot use 5827 // #pragma directives in macros) 5828 5829 assert(Loc.isValid() && "Source location is expected to be always valid."); 5830 assert(Loc.isFileID() && "Source location is expected to refer to a file."); 5831 5832 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 5833 assert(PLoc.isValid() && "Source location is expected to be always valid."); 5834 5835 llvm::sys::fs::UniqueID ID; 5836 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 5837 llvm_unreachable("Source file with target region no longer exists!"); 5838 5839 DeviceID = ID.getDevice(); 5840 FileID = ID.getFile(); 5841 LineNum = PLoc.getLine(); 5842 } 5843 5844 void CGOpenMPRuntime::emitTargetOutlinedFunction( 5845 const OMPExecutableDirective &D, StringRef ParentName, 5846 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5847 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5848 assert(!ParentName.empty() && "Invalid target region parent name!"); 5849 5850 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 5851 IsOffloadEntry, CodeGen); 5852 } 5853 5854 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 5855 const OMPExecutableDirective &D, StringRef ParentName, 5856 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5857 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5858 // Create a unique name for the entry function using the source location 5859 // information of the current target region. The name will be something like: 5860 // 5861 // __omp_offloading_DD_FFFF_PP_lBB 5862 // 5863 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 5864 // mangled name of the function that encloses the target region and BB is the 5865 // line number of the target region. 5866 5867 unsigned DeviceID; 5868 unsigned FileID; 5869 unsigned Line; 5870 getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, 5871 Line); 5872 SmallString<64> EntryFnName; 5873 { 5874 llvm::raw_svector_ostream OS(EntryFnName); 5875 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 5876 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 5877 } 5878 5879 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 5880 5881 CodeGenFunction CGF(CGM, true); 5882 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 5883 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 5884 5885 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 5886 5887 // If this target outline function is not an offload entry, we don't need to 5888 // register it. 5889 if (!IsOffloadEntry) 5890 return; 5891 5892 // The target region ID is used by the runtime library to identify the current 5893 // target region, so it only has to be unique and not necessarily point to 5894 // anything. It could be the pointer to the outlined function that implements 5895 // the target region, but we aren't using that so that the compiler doesn't 5896 // need to keep that, and could therefore inline the host function if proven 5897 // worthwhile during optimization. In the other hand, if emitting code for the 5898 // device, the ID has to be the function address so that it can retrieved from 5899 // the offloading entry and launched by the runtime library. We also mark the 5900 // outlined function to have external linkage in case we are emitting code for 5901 // the device, because these functions will be entry points to the device. 5902 5903 if (CGM.getLangOpts().OpenMPIsDevice) { 5904 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 5905 OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); 5906 } else 5907 OutlinedFnID = new llvm::GlobalVariable( 5908 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 5909 llvm::GlobalValue::PrivateLinkage, 5910 llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); 5911 5912 // Register the information for the entry associated with this target region. 5913 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 5914 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 5915 /*Flags=*/0); 5916 } 5917 5918 /// discard all CompoundStmts intervening between two constructs 5919 static const Stmt *ignoreCompoundStmts(const Stmt *Body) { 5920 while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) 5921 Body = CS->body_front(); 5922 5923 return Body; 5924 } 5925 5926 /// Emit the number of teams for a target directive. Inspect the num_teams 5927 /// clause associated with a teams construct combined or closely nested 5928 /// with the target directive. 5929 /// 5930 /// Emit a team of size one for directives such as 'target parallel' that 5931 /// have no associated teams construct. 5932 /// 5933 /// Otherwise, return nullptr. 5934 static llvm::Value * 5935 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 5936 CodeGenFunction &CGF, 5937 const OMPExecutableDirective &D) { 5938 5939 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 5940 "teams directive expected to be " 5941 "emitted only for the host!"); 5942 5943 auto &Bld = CGF.Builder; 5944 5945 // If the target directive is combined with a teams directive: 5946 // Return the value in the num_teams clause, if any. 5947 // Otherwise, return 0 to denote the runtime default. 5948 if (isOpenMPTeamsDirective(D.getDirectiveKind())) { 5949 if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { 5950 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 5951 auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), 5952 /*IgnoreResultAssign*/ true); 5953 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 5954 /*IsSigned=*/true); 5955 } 5956 5957 // The default value is 0. 5958 return Bld.getInt32(0); 5959 } 5960 5961 // If the target directive is combined with a parallel directive but not a 5962 // teams directive, start one team. 5963 if (isOpenMPParallelDirective(D.getDirectiveKind())) 5964 return Bld.getInt32(1); 5965 5966 // If the current target region has a teams region enclosed, we need to get 5967 // the number of teams to pass to the runtime function call. This is done 5968 // by generating the expression in a inlined region. This is required because 5969 // the expression is captured in the enclosing target environment when the 5970 // teams directive is not combined with target. 5971 5972 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 5973 5974 if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( 5975 ignoreCompoundStmts(CS.getCapturedStmt()))) { 5976 if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { 5977 if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 5978 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 5979 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 5980 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 5981 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 5982 /*IsSigned=*/true); 5983 } 5984 5985 // If we have an enclosed teams directive but no num_teams clause we use 5986 // the default value 0. 5987 return Bld.getInt32(0); 5988 } 5989 } 5990 5991 // No teams associated with the directive. 5992 return nullptr; 5993 } 5994 5995 /// Emit the number of threads for a target directive. Inspect the 5996 /// thread_limit clause associated with a teams construct combined or closely 5997 /// nested with the target directive. 5998 /// 5999 /// Emit the num_threads clause for directives such as 'target parallel' that 6000 /// have no associated teams construct. 6001 /// 6002 /// Otherwise, return nullptr. 6003 static llvm::Value * 6004 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 6005 CodeGenFunction &CGF, 6006 const OMPExecutableDirective &D) { 6007 6008 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 6009 "teams directive expected to be " 6010 "emitted only for the host!"); 6011 6012 auto &Bld = CGF.Builder; 6013 6014 // 6015 // If the target directive is combined with a teams directive: 6016 // Return the value in the thread_limit clause, if any. 6017 // 6018 // If the target directive is combined with a parallel directive: 6019 // Return the value in the num_threads clause, if any. 6020 // 6021 // If both clauses are set, select the minimum of the two. 6022 // 6023 // If neither teams or parallel combined directives set the number of threads 6024 // in a team, return 0 to denote the runtime default. 6025 // 6026 // If this is not a teams directive return nullptr. 6027 6028 if (isOpenMPTeamsDirective(D.getDirectiveKind()) || 6029 isOpenMPParallelDirective(D.getDirectiveKind())) { 6030 llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); 6031 llvm::Value *NumThreadsVal = nullptr; 6032 llvm::Value *ThreadLimitVal = nullptr; 6033 6034 if (const auto *ThreadLimitClause = 6035 D.getSingleClause<OMPThreadLimitClause>()) { 6036 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6037 auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), 6038 /*IgnoreResultAssign*/ true); 6039 ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, 6040 /*IsSigned=*/true); 6041 } 6042 6043 if (const auto *NumThreadsClause = 6044 D.getSingleClause<OMPNumThreadsClause>()) { 6045 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6046 llvm::Value *NumThreads = 6047 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 6048 /*IgnoreResultAssign*/ true); 6049 NumThreadsVal = 6050 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); 6051 } 6052 6053 // Select the lesser of thread_limit and num_threads. 6054 if (NumThreadsVal) 6055 ThreadLimitVal = ThreadLimitVal 6056 ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, 6057 ThreadLimitVal), 6058 NumThreadsVal, ThreadLimitVal) 6059 : NumThreadsVal; 6060 6061 // Set default value passed to the runtime if either teams or a target 6062 // parallel type directive is found but no clause is specified. 6063 if (!ThreadLimitVal) 6064 ThreadLimitVal = DefaultThreadLimitVal; 6065 6066 return ThreadLimitVal; 6067 } 6068 6069 // If the current target region has a teams region enclosed, we need to get 6070 // the thread limit to pass to the runtime function call. This is done 6071 // by generating the expression in a inlined region. This is required because 6072 // the expression is captured in the enclosing target environment when the 6073 // teams directive is not combined with target. 6074 6075 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 6076 6077 if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( 6078 ignoreCompoundStmts(CS.getCapturedStmt()))) { 6079 if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { 6080 if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 6081 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 6082 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6083 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 6084 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 6085 /*IsSigned=*/true); 6086 } 6087 6088 // If we have an enclosed teams directive but no thread_limit clause we 6089 // use the default value 0. 6090 return CGF.Builder.getInt32(0); 6091 } 6092 } 6093 6094 // No teams associated with the directive. 6095 return nullptr; 6096 } 6097 6098 namespace { 6099 // \brief Utility to handle information from clauses associated with a given 6100 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6101 // It provides a convenient interface to obtain the information and generate 6102 // code for that information. 6103 class MappableExprsHandler { 6104 public: 6105 /// \brief Values for bit flags used to specify the mapping type for 6106 /// offloading. 6107 enum OpenMPOffloadMappingFlags { 6108 /// \brief Allocate memory on the device and move data from host to device. 6109 OMP_MAP_TO = 0x01, 6110 /// \brief Allocate memory on the device and move data from device to host. 6111 OMP_MAP_FROM = 0x02, 6112 /// \brief Always perform the requested mapping action on the element, even 6113 /// if it was already mapped before. 6114 OMP_MAP_ALWAYS = 0x04, 6115 /// \brief Delete the element from the device environment, ignoring the 6116 /// current reference count associated with the element. 6117 OMP_MAP_DELETE = 0x08, 6118 /// \brief The element being mapped is a pointer-pointee pair; both the 6119 /// pointer and the pointee should be mapped. 6120 OMP_MAP_PTR_AND_OBJ = 0x10, 6121 /// \brief This flags signals that the base address of an entry should be 6122 /// passed to the target kernel as an argument. 6123 OMP_MAP_TARGET_PARAM = 0x20, 6124 /// \brief Signal that the runtime library has to return the device pointer 6125 /// in the current position for the data being mapped. Used when we have the 6126 /// use_device_ptr clause. 6127 OMP_MAP_RETURN_PARAM = 0x40, 6128 /// \brief This flag signals that the reference being passed is a pointer to 6129 /// private data. 6130 OMP_MAP_PRIVATE = 0x80, 6131 /// \brief Pass the element to the device by value. 6132 OMP_MAP_LITERAL = 0x100, 6133 /// Implicit map 6134 OMP_MAP_IMPLICIT = 0x200, 6135 }; 6136 6137 /// Class that associates information with a base pointer to be passed to the 6138 /// runtime library. 6139 class BasePointerInfo { 6140 /// The base pointer. 6141 llvm::Value *Ptr = nullptr; 6142 /// The base declaration that refers to this device pointer, or null if 6143 /// there is none. 6144 const ValueDecl *DevPtrDecl = nullptr; 6145 6146 public: 6147 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 6148 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 6149 llvm::Value *operator*() const { return Ptr; } 6150 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 6151 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 6152 }; 6153 6154 typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy; 6155 typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; 6156 typedef SmallVector<uint64_t, 16> MapFlagsArrayTy; 6157 6158 private: 6159 /// \brief Directive from where the map clauses were extracted. 6160 const OMPExecutableDirective &CurDir; 6161 6162 /// \brief Function the directive is being generated for. 6163 CodeGenFunction &CGF; 6164 6165 /// \brief Set of all first private variables in the current directive. 6166 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 6167 /// Set of all reduction variables in the current directive. 6168 llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls; 6169 6170 /// Map between device pointer declarations and their expression components. 6171 /// The key value for declarations in 'this' is null. 6172 llvm::DenseMap< 6173 const ValueDecl *, 6174 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6175 DevPointersMap; 6176 6177 llvm::Value *getExprTypeSize(const Expr *E) const { 6178 auto ExprTy = E->getType().getCanonicalType(); 6179 6180 // Reference types are ignored for mapping purposes. 6181 if (auto *RefTy = ExprTy->getAs<ReferenceType>()) 6182 ExprTy = RefTy->getPointeeType().getCanonicalType(); 6183 6184 // Given that an array section is considered a built-in type, we need to 6185 // do the calculation based on the length of the section instead of relying 6186 // on CGF.getTypeSize(E->getType()). 6187 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 6188 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 6189 OAE->getBase()->IgnoreParenImpCasts()) 6190 .getCanonicalType(); 6191 6192 // If there is no length associated with the expression, that means we 6193 // are using the whole length of the base. 6194 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 6195 return CGF.getTypeSize(BaseTy); 6196 6197 llvm::Value *ElemSize; 6198 if (auto *PTy = BaseTy->getAs<PointerType>()) 6199 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 6200 else { 6201 auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 6202 assert(ATy && "Expecting array type if not a pointer type."); 6203 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 6204 } 6205 6206 // If we don't have a length at this point, that is because we have an 6207 // array section with a single element. 6208 if (!OAE->getLength()) 6209 return ElemSize; 6210 6211 auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 6212 LengthVal = 6213 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 6214 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 6215 } 6216 return CGF.getTypeSize(ExprTy); 6217 } 6218 6219 /// \brief Return the corresponding bits for a given map clause modifier. Add 6220 /// a flag marking the map as a pointer if requested. Add a flag marking the 6221 /// map as the first one of a series of maps that relate to the same map 6222 /// expression. 6223 uint64_t getMapTypeBits(OpenMPMapClauseKind MapType, 6224 OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, 6225 bool AddIsTargetParamFlag) const { 6226 uint64_t Bits = 0u; 6227 switch (MapType) { 6228 case OMPC_MAP_alloc: 6229 case OMPC_MAP_release: 6230 // alloc and release is the default behavior in the runtime library, i.e. 6231 // if we don't pass any bits alloc/release that is what the runtime is 6232 // going to do. Therefore, we don't need to signal anything for these two 6233 // type modifiers. 6234 break; 6235 case OMPC_MAP_to: 6236 Bits = OMP_MAP_TO; 6237 break; 6238 case OMPC_MAP_from: 6239 Bits = OMP_MAP_FROM; 6240 break; 6241 case OMPC_MAP_tofrom: 6242 Bits = OMP_MAP_TO | OMP_MAP_FROM; 6243 break; 6244 case OMPC_MAP_delete: 6245 Bits = OMP_MAP_DELETE; 6246 break; 6247 default: 6248 llvm_unreachable("Unexpected map type!"); 6249 break; 6250 } 6251 if (AddPtrFlag) 6252 Bits |= OMP_MAP_PTR_AND_OBJ; 6253 if (AddIsTargetParamFlag) 6254 Bits |= OMP_MAP_TARGET_PARAM; 6255 if (MapTypeModifier == OMPC_MAP_always) 6256 Bits |= OMP_MAP_ALWAYS; 6257 return Bits; 6258 } 6259 6260 /// \brief Return true if the provided expression is a final array section. A 6261 /// final array section, is one whose length can't be proved to be one. 6262 bool isFinalArraySectionExpression(const Expr *E) const { 6263 auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 6264 6265 // It is not an array section and therefore not a unity-size one. 6266 if (!OASE) 6267 return false; 6268 6269 // An array section with no colon always refer to a single element. 6270 if (OASE->getColonLoc().isInvalid()) 6271 return false; 6272 6273 auto *Length = OASE->getLength(); 6274 6275 // If we don't have a length we have to check if the array has size 1 6276 // for this dimension. Also, we should always expect a length if the 6277 // base type is pointer. 6278 if (!Length) { 6279 auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 6280 OASE->getBase()->IgnoreParenImpCasts()) 6281 .getCanonicalType(); 6282 if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 6283 return ATy->getSize().getSExtValue() != 1; 6284 // If we don't have a constant dimension length, we have to consider 6285 // the current section as having any size, so it is not necessarily 6286 // unitary. If it happen to be unity size, that's user fault. 6287 return true; 6288 } 6289 6290 // Check if the length evaluates to 1. 6291 llvm::APSInt ConstLength; 6292 if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) 6293 return true; // Can have more that size 1. 6294 6295 return ConstLength.getSExtValue() != 1; 6296 } 6297 6298 /// \brief Generate the base pointers, section pointers, sizes and map type 6299 /// bits for the provided map type, map modifier, and expression components. 6300 /// \a IsFirstComponent should be set to true if the provided set of 6301 /// components is the first associated with a capture. 6302 void generateInfoForComponentList( 6303 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 6304 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6305 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 6306 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 6307 bool IsFirstComponentList, bool IsImplicit) const { 6308 6309 // The following summarizes what has to be generated for each map and the 6310 // types bellow. The generated information is expressed in this order: 6311 // base pointer, section pointer, size, flags 6312 // (to add to the ones that come from the map type and modifier). 6313 // 6314 // double d; 6315 // int i[100]; 6316 // float *p; 6317 // 6318 // struct S1 { 6319 // int i; 6320 // float f[50]; 6321 // } 6322 // struct S2 { 6323 // int i; 6324 // float f[50]; 6325 // S1 s; 6326 // double *p; 6327 // struct S2 *ps; 6328 // } 6329 // S2 s; 6330 // S2 *ps; 6331 // 6332 // map(d) 6333 // &d, &d, sizeof(double), noflags 6334 // 6335 // map(i) 6336 // &i, &i, 100*sizeof(int), noflags 6337 // 6338 // map(i[1:23]) 6339 // &i(=&i[0]), &i[1], 23*sizeof(int), noflags 6340 // 6341 // map(p) 6342 // &p, &p, sizeof(float*), noflags 6343 // 6344 // map(p[1:24]) 6345 // p, &p[1], 24*sizeof(float), noflags 6346 // 6347 // map(s) 6348 // &s, &s, sizeof(S2), noflags 6349 // 6350 // map(s.i) 6351 // &s, &(s.i), sizeof(int), noflags 6352 // 6353 // map(s.s.f) 6354 // &s, &(s.i.f), 50*sizeof(int), noflags 6355 // 6356 // map(s.p) 6357 // &s, &(s.p), sizeof(double*), noflags 6358 // 6359 // map(s.p[:22], s.a s.b) 6360 // &s, &(s.p), sizeof(double*), noflags 6361 // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag 6362 // 6363 // map(s.ps) 6364 // &s, &(s.ps), sizeof(S2*), noflags 6365 // 6366 // map(s.ps->s.i) 6367 // &s, &(s.ps), sizeof(S2*), noflags 6368 // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag 6369 // 6370 // map(s.ps->ps) 6371 // &s, &(s.ps), sizeof(S2*), noflags 6372 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag 6373 // 6374 // map(s.ps->ps->ps) 6375 // &s, &(s.ps), sizeof(S2*), noflags 6376 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag 6377 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag 6378 // 6379 // map(s.ps->ps->s.f[:22]) 6380 // &s, &(s.ps), sizeof(S2*), noflags 6381 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag 6382 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag 6383 // 6384 // map(ps) 6385 // &ps, &ps, sizeof(S2*), noflags 6386 // 6387 // map(ps->i) 6388 // ps, &(ps->i), sizeof(int), noflags 6389 // 6390 // map(ps->s.f) 6391 // ps, &(ps->s.f[0]), 50*sizeof(float), noflags 6392 // 6393 // map(ps->p) 6394 // ps, &(ps->p), sizeof(double*), noflags 6395 // 6396 // map(ps->p[:22]) 6397 // ps, &(ps->p), sizeof(double*), noflags 6398 // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag 6399 // 6400 // map(ps->ps) 6401 // ps, &(ps->ps), sizeof(S2*), noflags 6402 // 6403 // map(ps->ps->s.i) 6404 // ps, &(ps->ps), sizeof(S2*), noflags 6405 // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag 6406 // 6407 // map(ps->ps->ps) 6408 // ps, &(ps->ps), sizeof(S2*), noflags 6409 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag 6410 // 6411 // map(ps->ps->ps->ps) 6412 // ps, &(ps->ps), sizeof(S2*), noflags 6413 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag 6414 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag 6415 // 6416 // map(ps->ps->ps->s.f[:22]) 6417 // ps, &(ps->ps), sizeof(S2*), noflags 6418 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag 6419 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag 6420 6421 // Track if the map information being generated is the first for a capture. 6422 bool IsCaptureFirstInfo = IsFirstComponentList; 6423 6424 // Scan the components from the base to the complete expression. 6425 auto CI = Components.rbegin(); 6426 auto CE = Components.rend(); 6427 auto I = CI; 6428 6429 // Track if the map information being generated is the first for a list of 6430 // components. 6431 bool IsExpressionFirstInfo = true; 6432 llvm::Value *BP = nullptr; 6433 6434 if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) { 6435 // The base is the 'this' pointer. The content of the pointer is going 6436 // to be the base of the field being mapped. 6437 BP = CGF.EmitScalarExpr(ME->getBase()); 6438 } else { 6439 // The base is the reference to the variable. 6440 // BP = &Var. 6441 BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); 6442 6443 // If the variable is a pointer and is being dereferenced (i.e. is not 6444 // the last component), the base has to be the pointer itself, not its 6445 // reference. References are ignored for mapping purposes. 6446 QualType Ty = 6447 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 6448 if (Ty->isAnyPointerType() && std::next(I) != CE) { 6449 auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty); 6450 BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), 6451 Ty->castAs<PointerType>()) 6452 .getPointer(); 6453 6454 // We do not need to generate individual map information for the 6455 // pointer, it can be associated with the combined storage. 6456 ++I; 6457 } 6458 } 6459 6460 uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0; 6461 for (; I != CE; ++I) { 6462 auto Next = std::next(I); 6463 6464 // We need to generate the addresses and sizes if this is the last 6465 // component, if the component is a pointer or if it is an array section 6466 // whose length can't be proved to be one. If this is a pointer, it 6467 // becomes the base address for the following components. 6468 6469 // A final array section, is one whose length can't be proved to be one. 6470 bool IsFinalArraySection = 6471 isFinalArraySectionExpression(I->getAssociatedExpression()); 6472 6473 // Get information on whether the element is a pointer. Have to do a 6474 // special treatment for array sections given that they are built-in 6475 // types. 6476 const auto *OASE = 6477 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 6478 bool IsPointer = 6479 (OASE && 6480 OMPArraySectionExpr::getBaseOriginalType(OASE) 6481 .getCanonicalType() 6482 ->isAnyPointerType()) || 6483 I->getAssociatedExpression()->getType()->isAnyPointerType(); 6484 6485 if (Next == CE || IsPointer || IsFinalArraySection) { 6486 6487 // If this is not the last component, we expect the pointer to be 6488 // associated with an array expression or member expression. 6489 assert((Next == CE || 6490 isa<MemberExpr>(Next->getAssociatedExpression()) || 6491 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 6492 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 6493 "Unexpected expression"); 6494 6495 llvm::Value *LB = 6496 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); 6497 auto *Size = getExprTypeSize(I->getAssociatedExpression()); 6498 6499 // If we have a member expression and the current component is a 6500 // reference, we have to map the reference too. Whenever we have a 6501 // reference, the section that reference refers to is going to be a 6502 // load instruction from the storage assigned to the reference. 6503 if (isa<MemberExpr>(I->getAssociatedExpression()) && 6504 I->getAssociatedDeclaration()->getType()->isReferenceType()) { 6505 auto *LI = cast<llvm::LoadInst>(LB); 6506 auto *RefAddr = LI->getPointerOperand(); 6507 6508 BasePointers.push_back(BP); 6509 Pointers.push_back(RefAddr); 6510 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 6511 Types.push_back(DefaultFlags | 6512 getMapTypeBits( 6513 /*MapType*/ OMPC_MAP_alloc, 6514 /*MapTypeModifier=*/OMPC_MAP_unknown, 6515 !IsExpressionFirstInfo, IsCaptureFirstInfo)); 6516 IsExpressionFirstInfo = false; 6517 IsCaptureFirstInfo = false; 6518 // The reference will be the next base address. 6519 BP = RefAddr; 6520 } 6521 6522 BasePointers.push_back(BP); 6523 Pointers.push_back(LB); 6524 Sizes.push_back(Size); 6525 6526 // We need to add a pointer flag for each map that comes from the 6527 // same expression except for the first one. We also need to signal 6528 // this map is the first one that relates with the current capture 6529 // (there is a set of entries for each capture). 6530 Types.push_back(DefaultFlags | getMapTypeBits(MapType, MapTypeModifier, 6531 !IsExpressionFirstInfo, 6532 IsCaptureFirstInfo)); 6533 6534 // If we have a final array section, we are done with this expression. 6535 if (IsFinalArraySection) 6536 break; 6537 6538 // The pointer becomes the base for the next element. 6539 if (Next != CE) 6540 BP = LB; 6541 6542 IsExpressionFirstInfo = false; 6543 IsCaptureFirstInfo = false; 6544 } 6545 } 6546 } 6547 6548 /// \brief Return the adjusted map modifiers if the declaration a capture 6549 /// refers to appears in a first-private clause. This is expected to be used 6550 /// only with directives that start with 'target'. 6551 unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap, 6552 unsigned CurrentModifiers) { 6553 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 6554 6555 // A first private variable captured by reference will use only the 6556 // 'private ptr' and 'map to' flag. Return the right flags if the captured 6557 // declaration is known as first-private in this handler. 6558 if (FirstPrivateDecls.count(Cap.getCapturedVar())) 6559 return MappableExprsHandler::OMP_MAP_PRIVATE | 6560 MappableExprsHandler::OMP_MAP_TO; 6561 // Reduction variable will use only the 'private ptr' and 'map to_from' 6562 // flag. 6563 if (ReductionDecls.count(Cap.getCapturedVar())) { 6564 return MappableExprsHandler::OMP_MAP_TO | 6565 MappableExprsHandler::OMP_MAP_FROM; 6566 } 6567 6568 // We didn't modify anything. 6569 return CurrentModifiers; 6570 } 6571 6572 public: 6573 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 6574 : CurDir(Dir), CGF(CGF) { 6575 // Extract firstprivate clause information. 6576 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 6577 for (const auto *D : C->varlists()) 6578 FirstPrivateDecls.insert( 6579 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 6580 for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) { 6581 for (const auto *D : C->varlists()) { 6582 ReductionDecls.insert( 6583 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 6584 } 6585 } 6586 // Extract device pointer clause information. 6587 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 6588 for (auto L : C->component_lists()) 6589 DevPointersMap[L.first].push_back(L.second); 6590 } 6591 6592 /// \brief Generate all the base pointers, section pointers, sizes and map 6593 /// types for the extracted mappable expressions. Also, for each item that 6594 /// relates with a device pointer, a pair of the relevant declaration and 6595 /// index where it occurs is appended to the device pointers info array. 6596 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 6597 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 6598 MapFlagsArrayTy &Types) const { 6599 BasePointers.clear(); 6600 Pointers.clear(); 6601 Sizes.clear(); 6602 Types.clear(); 6603 6604 struct MapInfo { 6605 /// Kind that defines how a device pointer has to be returned. 6606 enum ReturnPointerKind { 6607 // Don't have to return any pointer. 6608 RPK_None, 6609 // Pointer is the base of the declaration. 6610 RPK_Base, 6611 // Pointer is a member of the base declaration - 'this' 6612 RPK_Member, 6613 // Pointer is a reference and a member of the base declaration - 'this' 6614 RPK_MemberReference, 6615 }; 6616 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 6617 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 6618 OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; 6619 ReturnPointerKind ReturnDevicePointer = RPK_None; 6620 bool IsImplicit = false; 6621 6622 MapInfo() = default; 6623 MapInfo( 6624 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6625 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 6626 ReturnPointerKind ReturnDevicePointer, bool IsImplicit) 6627 : Components(Components), MapType(MapType), 6628 MapTypeModifier(MapTypeModifier), 6629 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 6630 }; 6631 6632 // We have to process the component lists that relate with the same 6633 // declaration in a single chunk so that we can generate the map flags 6634 // correctly. Therefore, we organize all lists in a map. 6635 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 6636 6637 // Helper function to fill the information map for the different supported 6638 // clauses. 6639 auto &&InfoGen = [&Info]( 6640 const ValueDecl *D, 6641 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 6642 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, 6643 MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) { 6644 const ValueDecl *VD = 6645 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 6646 Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer, 6647 IsImplicit); 6648 }; 6649 6650 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 6651 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 6652 for (auto L : C->component_lists()) { 6653 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), 6654 MapInfo::RPK_None, C->isImplicit()); 6655 } 6656 for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 6657 for (auto L : C->component_lists()) { 6658 InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, 6659 MapInfo::RPK_None, C->isImplicit()); 6660 } 6661 for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 6662 for (auto L : C->component_lists()) { 6663 InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, 6664 MapInfo::RPK_None, C->isImplicit()); 6665 } 6666 6667 // Look at the use_device_ptr clause information and mark the existing map 6668 // entries as such. If there is no map information for an entry in the 6669 // use_device_ptr list, we create one with map type 'alloc' and zero size 6670 // section. It is the user fault if that was not mapped before. 6671 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 6672 for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) 6673 for (auto L : C->component_lists()) { 6674 assert(!L.second.empty() && "Not expecting empty list of components!"); 6675 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 6676 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 6677 auto *IE = L.second.back().getAssociatedExpression(); 6678 // If the first component is a member expression, we have to look into 6679 // 'this', which maps to null in the map of map information. Otherwise 6680 // look directly for the information. 6681 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 6682 6683 // We potentially have map information for this declaration already. 6684 // Look for the first set of components that refer to it. 6685 if (It != Info.end()) { 6686 auto CI = std::find_if( 6687 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 6688 return MI.Components.back().getAssociatedDeclaration() == VD; 6689 }); 6690 // If we found a map entry, signal that the pointer has to be returned 6691 // and move on to the next declaration. 6692 if (CI != It->second.end()) { 6693 CI->ReturnDevicePointer = isa<MemberExpr>(IE) 6694 ? (VD->getType()->isReferenceType() 6695 ? MapInfo::RPK_MemberReference 6696 : MapInfo::RPK_Member) 6697 : MapInfo::RPK_Base; 6698 continue; 6699 } 6700 } 6701 6702 // We didn't find any match in our map information - generate a zero 6703 // size array section. 6704 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 6705 llvm::Value *Ptr = 6706 this->CGF 6707 .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation()) 6708 .getScalarVal(); 6709 BasePointers.push_back({Ptr, VD}); 6710 Pointers.push_back(Ptr); 6711 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 6712 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 6713 } 6714 6715 for (auto &M : Info) { 6716 // We need to know when we generate information for the first component 6717 // associated with a capture, because the mapping flags depend on it. 6718 bool IsFirstComponentList = true; 6719 for (MapInfo &L : M.second) { 6720 assert(!L.Components.empty() && 6721 "Not expecting declaration with no component lists."); 6722 6723 // Remember the current base pointer index. 6724 unsigned CurrentBasePointersIdx = BasePointers.size(); 6725 // FIXME: MSVC 2013 seems to require this-> to find the member method. 6726 this->generateInfoForComponentList( 6727 L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers, 6728 Sizes, Types, IsFirstComponentList, L.IsImplicit); 6729 6730 // If this entry relates with a device pointer, set the relevant 6731 // declaration and add the 'return pointer' flag. 6732 if (IsFirstComponentList && 6733 L.ReturnDevicePointer != MapInfo::RPK_None) { 6734 // If the pointer is not the base of the map, we need to skip the 6735 // base. If it is a reference in a member field, we also need to skip 6736 // the map of the reference. 6737 if (L.ReturnDevicePointer != MapInfo::RPK_Base) { 6738 ++CurrentBasePointersIdx; 6739 if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference) 6740 ++CurrentBasePointersIdx; 6741 } 6742 assert(BasePointers.size() > CurrentBasePointersIdx && 6743 "Unexpected number of mapped base pointers."); 6744 6745 auto *RelevantVD = L.Components.back().getAssociatedDeclaration(); 6746 assert(RelevantVD && 6747 "No relevant declaration related with device pointer??"); 6748 6749 BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 6750 Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 6751 } 6752 IsFirstComponentList = false; 6753 } 6754 } 6755 } 6756 6757 /// \brief Generate the base pointers, section pointers, sizes and map types 6758 /// associated to a given capture. 6759 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 6760 llvm::Value *Arg, 6761 MapBaseValuesArrayTy &BasePointers, 6762 MapValuesArrayTy &Pointers, 6763 MapValuesArrayTy &Sizes, 6764 MapFlagsArrayTy &Types) const { 6765 assert(!Cap->capturesVariableArrayType() && 6766 "Not expecting to generate map info for a variable array type!"); 6767 6768 BasePointers.clear(); 6769 Pointers.clear(); 6770 Sizes.clear(); 6771 Types.clear(); 6772 6773 // We need to know when we generating information for the first component 6774 // associated with a capture, because the mapping flags depend on it. 6775 bool IsFirstComponentList = true; 6776 6777 const ValueDecl *VD = 6778 Cap->capturesThis() 6779 ? nullptr 6780 : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl()); 6781 6782 // If this declaration appears in a is_device_ptr clause we just have to 6783 // pass the pointer by value. If it is a reference to a declaration, we just 6784 // pass its value, otherwise, if it is a member expression, we need to map 6785 // 'to' the field. 6786 if (!VD) { 6787 auto It = DevPointersMap.find(VD); 6788 if (It != DevPointersMap.end()) { 6789 for (auto L : It->second) { 6790 generateInfoForComponentList( 6791 /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, 6792 BasePointers, Pointers, Sizes, Types, IsFirstComponentList, 6793 /*IsImplicit=*/false); 6794 IsFirstComponentList = false; 6795 } 6796 return; 6797 } 6798 } else if (DevPointersMap.count(VD)) { 6799 BasePointers.push_back({Arg, VD}); 6800 Pointers.push_back(Arg); 6801 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 6802 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 6803 return; 6804 } 6805 6806 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 6807 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 6808 for (auto L : C->decl_component_lists(VD)) { 6809 assert(L.first == VD && 6810 "We got information for the wrong declaration??"); 6811 assert(!L.second.empty() && 6812 "Not expecting declaration with no component lists."); 6813 generateInfoForComponentList( 6814 C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, 6815 Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit()); 6816 IsFirstComponentList = false; 6817 } 6818 6819 return; 6820 } 6821 6822 /// \brief Generate the default map information for a given capture \a CI, 6823 /// record field declaration \a RI and captured value \a CV. 6824 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 6825 const FieldDecl &RI, llvm::Value *CV, 6826 MapBaseValuesArrayTy &CurBasePointers, 6827 MapValuesArrayTy &CurPointers, 6828 MapValuesArrayTy &CurSizes, 6829 MapFlagsArrayTy &CurMapTypes) { 6830 6831 // Do the default mapping. 6832 if (CI.capturesThis()) { 6833 CurBasePointers.push_back(CV); 6834 CurPointers.push_back(CV); 6835 const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 6836 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 6837 // Default map type. 6838 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 6839 } else if (CI.capturesVariableByCopy()) { 6840 CurBasePointers.push_back(CV); 6841 CurPointers.push_back(CV); 6842 if (!RI.getType()->isAnyPointerType()) { 6843 // We have to signal to the runtime captures passed by value that are 6844 // not pointers. 6845 CurMapTypes.push_back(OMP_MAP_LITERAL); 6846 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 6847 } else { 6848 // Pointers are implicitly mapped with a zero size and no flags 6849 // (other than first map that is added for all implicit maps). 6850 CurMapTypes.push_back(0u); 6851 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 6852 } 6853 } else { 6854 assert(CI.capturesVariable() && "Expected captured reference."); 6855 CurBasePointers.push_back(CV); 6856 CurPointers.push_back(CV); 6857 6858 const ReferenceType *PtrTy = 6859 cast<ReferenceType>(RI.getType().getTypePtr()); 6860 QualType ElementType = PtrTy->getPointeeType(); 6861 CurSizes.push_back(CGF.getTypeSize(ElementType)); 6862 // The default map type for a scalar/complex type is 'to' because by 6863 // default the value doesn't have to be retrieved. For an aggregate 6864 // type, the default is 'tofrom'. 6865 CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses( 6866 CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) 6867 : OMP_MAP_TO)); 6868 } 6869 // Every default map produces a single argument which is a target parameter. 6870 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 6871 } 6872 }; 6873 6874 enum OpenMPOffloadingReservedDeviceIDs { 6875 /// \brief Device ID if the device was not defined, runtime should get it 6876 /// from environment variables in the spec. 6877 OMP_DEVICEID_UNDEF = -1, 6878 }; 6879 } // anonymous namespace 6880 6881 /// \brief Emit the arrays used to pass the captures and map information to the 6882 /// offloading runtime library. If there is no map or capture information, 6883 /// return nullptr by reference. 6884 static void 6885 emitOffloadingArrays(CodeGenFunction &CGF, 6886 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 6887 MappableExprsHandler::MapValuesArrayTy &Pointers, 6888 MappableExprsHandler::MapValuesArrayTy &Sizes, 6889 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 6890 CGOpenMPRuntime::TargetDataInfo &Info) { 6891 auto &CGM = CGF.CGM; 6892 auto &Ctx = CGF.getContext(); 6893 6894 // Reset the array information. 6895 Info.clearArrayInfo(); 6896 Info.NumberOfPtrs = BasePointers.size(); 6897 6898 if (Info.NumberOfPtrs) { 6899 // Detect if we have any capture size requiring runtime evaluation of the 6900 // size so that a constant array could be eventually used. 6901 bool hasRuntimeEvaluationCaptureSize = false; 6902 for (auto *S : Sizes) 6903 if (!isa<llvm::Constant>(S)) { 6904 hasRuntimeEvaluationCaptureSize = true; 6905 break; 6906 } 6907 6908 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 6909 QualType PointerArrayType = 6910 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 6911 /*IndexTypeQuals=*/0); 6912 6913 Info.BasePointersArray = 6914 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 6915 Info.PointersArray = 6916 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 6917 6918 // If we don't have any VLA types or other types that require runtime 6919 // evaluation, we can use a constant array for the map sizes, otherwise we 6920 // need to fill up the arrays as we do for the pointers. 6921 if (hasRuntimeEvaluationCaptureSize) { 6922 QualType SizeArrayType = Ctx.getConstantArrayType( 6923 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 6924 /*IndexTypeQuals=*/0); 6925 Info.SizesArray = 6926 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 6927 } else { 6928 // We expect all the sizes to be constant, so we collect them to create 6929 // a constant array. 6930 SmallVector<llvm::Constant *, 16> ConstSizes; 6931 for (auto S : Sizes) 6932 ConstSizes.push_back(cast<llvm::Constant>(S)); 6933 6934 auto *SizesArrayInit = llvm::ConstantArray::get( 6935 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 6936 auto *SizesArrayGbl = new llvm::GlobalVariable( 6937 CGM.getModule(), SizesArrayInit->getType(), 6938 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 6939 SizesArrayInit, ".offload_sizes"); 6940 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 6941 Info.SizesArray = SizesArrayGbl; 6942 } 6943 6944 // The map types are always constant so we don't need to generate code to 6945 // fill arrays. Instead, we create an array constant. 6946 llvm::Constant *MapTypesArrayInit = 6947 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 6948 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 6949 CGM.getModule(), MapTypesArrayInit->getType(), 6950 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 6951 MapTypesArrayInit, ".offload_maptypes"); 6952 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 6953 Info.MapTypesArray = MapTypesArrayGbl; 6954 6955 for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) { 6956 llvm::Value *BPVal = *BasePointers[i]; 6957 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 6958 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 6959 Info.BasePointersArray, 0, i); 6960 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6961 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 6962 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 6963 CGF.Builder.CreateStore(BPVal, BPAddr); 6964 6965 if (Info.requiresDevicePointerInfo()) 6966 if (auto *DevVD = BasePointers[i].getDevicePtrDecl()) 6967 Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr)); 6968 6969 llvm::Value *PVal = Pointers[i]; 6970 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 6971 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 6972 Info.PointersArray, 0, i); 6973 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6974 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 6975 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 6976 CGF.Builder.CreateStore(PVal, PAddr); 6977 6978 if (hasRuntimeEvaluationCaptureSize) { 6979 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 6980 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 6981 Info.SizesArray, 6982 /*Idx0=*/0, 6983 /*Idx1=*/i); 6984 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 6985 CGF.Builder.CreateStore( 6986 CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), 6987 SAddr); 6988 } 6989 } 6990 } 6991 } 6992 /// \brief Emit the arguments to be passed to the runtime library based on the 6993 /// arrays of pointers, sizes and map types. 6994 static void emitOffloadingArraysArgument( 6995 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 6996 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 6997 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 6998 auto &CGM = CGF.CGM; 6999 if (Info.NumberOfPtrs) { 7000 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7001 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7002 Info.BasePointersArray, 7003 /*Idx0=*/0, /*Idx1=*/0); 7004 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7005 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7006 Info.PointersArray, 7007 /*Idx0=*/0, 7008 /*Idx1=*/0); 7009 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7010 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 7011 /*Idx0=*/0, /*Idx1=*/0); 7012 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7013 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 7014 Info.MapTypesArray, 7015 /*Idx0=*/0, 7016 /*Idx1=*/0); 7017 } else { 7018 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 7019 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 7020 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 7021 MapTypesArrayArg = 7022 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 7023 } 7024 } 7025 7026 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 7027 const OMPExecutableDirective &D, 7028 llvm::Value *OutlinedFn, 7029 llvm::Value *OutlinedFnID, 7030 const Expr *IfCond, const Expr *Device, 7031 ArrayRef<llvm::Value *> CapturedVars) { 7032 if (!CGF.HaveInsertPoint()) 7033 return; 7034 7035 assert(OutlinedFn && "Invalid outlined function!"); 7036 7037 // Fill up the arrays with all the captured variables. 7038 MappableExprsHandler::MapValuesArrayTy KernelArgs; 7039 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 7040 MappableExprsHandler::MapValuesArrayTy Pointers; 7041 MappableExprsHandler::MapValuesArrayTy Sizes; 7042 MappableExprsHandler::MapFlagsArrayTy MapTypes; 7043 7044 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 7045 MappableExprsHandler::MapValuesArrayTy CurPointers; 7046 MappableExprsHandler::MapValuesArrayTy CurSizes; 7047 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 7048 7049 // Get mappable expression information. 7050 MappableExprsHandler MEHandler(D, CGF); 7051 7052 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 7053 auto RI = CS.getCapturedRecordDecl()->field_begin(); 7054 auto CV = CapturedVars.begin(); 7055 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 7056 CE = CS.capture_end(); 7057 CI != CE; ++CI, ++RI, ++CV) { 7058 CurBasePointers.clear(); 7059 CurPointers.clear(); 7060 CurSizes.clear(); 7061 CurMapTypes.clear(); 7062 7063 // VLA sizes are passed to the outlined region by copy and do not have map 7064 // information associated. 7065 if (CI->capturesVariableArrayType()) { 7066 CurBasePointers.push_back(*CV); 7067 CurPointers.push_back(*CV); 7068 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 7069 // Copy to the device as an argument. No need to retrieve it. 7070 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 7071 MappableExprsHandler::OMP_MAP_TARGET_PARAM); 7072 } else { 7073 // If we have any information in the map clause, we use it, otherwise we 7074 // just do a default mapping. 7075 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 7076 CurSizes, CurMapTypes); 7077 if (CurBasePointers.empty()) 7078 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 7079 CurPointers, CurSizes, CurMapTypes); 7080 } 7081 // We expect to have at least an element of information for this capture. 7082 assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!"); 7083 assert(CurBasePointers.size() == CurPointers.size() && 7084 CurBasePointers.size() == CurSizes.size() && 7085 CurBasePointers.size() == CurMapTypes.size() && 7086 "Inconsistent map information sizes!"); 7087 7088 // The kernel args are always the first elements of the base pointers 7089 // associated with a capture. 7090 KernelArgs.push_back(*CurBasePointers.front()); 7091 // We need to append the results of this capture to what we already have. 7092 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 7093 Pointers.append(CurPointers.begin(), CurPointers.end()); 7094 Sizes.append(CurSizes.begin(), CurSizes.end()); 7095 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 7096 } 7097 7098 // Fill up the pointer arrays and transfer execution to the device. 7099 auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, 7100 OutlinedFn, OutlinedFnID, &D, 7101 &KernelArgs](CodeGenFunction &CGF, PrePostActionTy &) { 7102 auto &RT = CGF.CGM.getOpenMPRuntime(); 7103 // Emit the offloading arrays. 7104 TargetDataInfo Info; 7105 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 7106 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 7107 Info.PointersArray, Info.SizesArray, 7108 Info.MapTypesArray, Info); 7109 7110 // On top of the arrays that were filled up, the target offloading call 7111 // takes as arguments the device id as well as the host pointer. The host 7112 // pointer is used by the runtime library to identify the current target 7113 // region, so it only has to be unique and not necessarily point to 7114 // anything. It could be the pointer to the outlined function that 7115 // implements the target region, but we aren't using that so that the 7116 // compiler doesn't need to keep that, and could therefore inline the host 7117 // function if proven worthwhile during optimization. 7118 7119 // From this point on, we need to have an ID of the target region defined. 7120 assert(OutlinedFnID && "Invalid outlined function ID!"); 7121 7122 // Emit device ID if any. 7123 llvm::Value *DeviceID; 7124 if (Device) { 7125 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7126 CGF.Int64Ty, /*isSigned=*/true); 7127 } else { 7128 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 7129 } 7130 7131 // Emit the number of elements in the offloading arrays. 7132 llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 7133 7134 // Return value of the runtime offloading call. 7135 llvm::Value *Return; 7136 7137 auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D); 7138 auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D); 7139 7140 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 7141 // The target region is an outlined function launched by the runtime 7142 // via calls __tgt_target() or __tgt_target_teams(). 7143 // 7144 // __tgt_target() launches a target region with one team and one thread, 7145 // executing a serial region. This master thread may in turn launch 7146 // more threads within its team upon encountering a parallel region, 7147 // however, no additional teams can be launched on the device. 7148 // 7149 // __tgt_target_teams() launches a target region with one or more teams, 7150 // each with one or more threads. This call is required for target 7151 // constructs such as: 7152 // 'target teams' 7153 // 'target' / 'teams' 7154 // 'target teams distribute parallel for' 7155 // 'target parallel' 7156 // and so on. 7157 // 7158 // Note that on the host and CPU targets, the runtime implementation of 7159 // these calls simply call the outlined function without forking threads. 7160 // The outlined functions themselves have runtime calls to 7161 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 7162 // the compiler in emitTeamsCall() and emitParallelCall(). 7163 // 7164 // In contrast, on the NVPTX target, the implementation of 7165 // __tgt_target_teams() launches a GPU kernel with the requested number 7166 // of teams and threads so no additional calls to the runtime are required. 7167 if (NumTeams) { 7168 // If we have NumTeams defined this means that we have an enclosed teams 7169 // region. Therefore we also expect to have NumThreads defined. These two 7170 // values should be defined in the presence of a teams directive, 7171 // regardless of having any clauses associated. If the user is using teams 7172 // but no clauses, these two values will be the default that should be 7173 // passed to the runtime library - a 32-bit integer with the value zero. 7174 assert(NumThreads && "Thread limit expression should be available along " 7175 "with number of teams."); 7176 llvm::Value *OffloadingArgs[] = { 7177 DeviceID, OutlinedFnID, 7178 PointerNum, Info.BasePointersArray, 7179 Info.PointersArray, Info.SizesArray, 7180 Info.MapTypesArray, NumTeams, 7181 NumThreads}; 7182 Return = CGF.EmitRuntimeCall( 7183 RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 7184 : OMPRTL__tgt_target_teams), 7185 OffloadingArgs); 7186 } else { 7187 llvm::Value *OffloadingArgs[] = { 7188 DeviceID, OutlinedFnID, 7189 PointerNum, Info.BasePointersArray, 7190 Info.PointersArray, Info.SizesArray, 7191 Info.MapTypesArray}; 7192 Return = CGF.EmitRuntimeCall( 7193 RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 7194 : OMPRTL__tgt_target), 7195 OffloadingArgs); 7196 } 7197 7198 // Check the error code and execute the host version if required. 7199 llvm::BasicBlock *OffloadFailedBlock = 7200 CGF.createBasicBlock("omp_offload.failed"); 7201 llvm::BasicBlock *OffloadContBlock = 7202 CGF.createBasicBlock("omp_offload.cont"); 7203 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 7204 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 7205 7206 CGF.EmitBlock(OffloadFailedBlock); 7207 emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs); 7208 CGF.EmitBranch(OffloadContBlock); 7209 7210 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 7211 }; 7212 7213 // Notify that the host version must be executed. 7214 auto &&ElseGen = [this, &D, OutlinedFn, &KernelArgs](CodeGenFunction &CGF, 7215 PrePostActionTy &) { 7216 emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, 7217 KernelArgs); 7218 }; 7219 7220 // If we have a target function ID it means that we need to support 7221 // offloading, otherwise, just execute on the host. We need to execute on host 7222 // regardless of the conditional in the if clause if, e.g., the user do not 7223 // specify target triples. 7224 if (OutlinedFnID) { 7225 if (IfCond) 7226 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 7227 else { 7228 RegionCodeGenTy ThenRCG(ThenGen); 7229 ThenRCG(CGF); 7230 } 7231 } else { 7232 RegionCodeGenTy ElseRCG(ElseGen); 7233 ElseRCG(CGF); 7234 } 7235 } 7236 7237 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 7238 StringRef ParentName) { 7239 if (!S) 7240 return; 7241 7242 // Codegen OMP target directives that offload compute to the device. 7243 bool requiresDeviceCodegen = 7244 isa<OMPExecutableDirective>(S) && 7245 isOpenMPTargetExecutionDirective( 7246 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 7247 7248 if (requiresDeviceCodegen) { 7249 auto &E = *cast<OMPExecutableDirective>(S); 7250 unsigned DeviceID; 7251 unsigned FileID; 7252 unsigned Line; 7253 getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID, 7254 FileID, Line); 7255 7256 // Is this a target region that should not be emitted as an entry point? If 7257 // so just signal we are done with this target region. 7258 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 7259 ParentName, Line)) 7260 return; 7261 7262 switch (S->getStmtClass()) { 7263 case Stmt::OMPTargetDirectiveClass: 7264 CodeGenFunction::EmitOMPTargetDeviceFunction( 7265 CGM, ParentName, cast<OMPTargetDirective>(*S)); 7266 break; 7267 case Stmt::OMPTargetParallelDirectiveClass: 7268 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 7269 CGM, ParentName, cast<OMPTargetParallelDirective>(*S)); 7270 break; 7271 case Stmt::OMPTargetTeamsDirectiveClass: 7272 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 7273 CGM, ParentName, cast<OMPTargetTeamsDirective>(*S)); 7274 break; 7275 case Stmt::OMPTargetTeamsDistributeDirectiveClass: 7276 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 7277 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(*S)); 7278 break; 7279 case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass: 7280 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 7281 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(*S)); 7282 break; 7283 case Stmt::OMPTargetParallelForDirectiveClass: 7284 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 7285 CGM, ParentName, cast<OMPTargetParallelForDirective>(*S)); 7286 break; 7287 case Stmt::OMPTargetParallelForSimdDirectiveClass: 7288 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 7289 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(*S)); 7290 break; 7291 case Stmt::OMPTargetSimdDirectiveClass: 7292 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 7293 CGM, ParentName, cast<OMPTargetSimdDirective>(*S)); 7294 break; 7295 case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass: 7296 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 7297 CGM, ParentName, 7298 cast<OMPTargetTeamsDistributeParallelForDirective>(*S)); 7299 break; 7300 default: 7301 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 7302 } 7303 return; 7304 } 7305 7306 if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { 7307 if (!E->hasAssociatedStmt()) 7308 return; 7309 7310 scanForTargetRegionsFunctions( 7311 cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(), 7312 ParentName); 7313 return; 7314 } 7315 7316 // If this is a lambda function, look into its body. 7317 if (auto *L = dyn_cast<LambdaExpr>(S)) 7318 S = L->getBody(); 7319 7320 // Keep looking for target regions recursively. 7321 for (auto *II : S->children()) 7322 scanForTargetRegionsFunctions(II, ParentName); 7323 } 7324 7325 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 7326 auto &FD = *cast<FunctionDecl>(GD.getDecl()); 7327 7328 // If emitting code for the host, we do not process FD here. Instead we do 7329 // the normal code generation. 7330 if (!CGM.getLangOpts().OpenMPIsDevice) 7331 return false; 7332 7333 // Try to detect target regions in the function. 7334 scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); 7335 7336 // We should not emit any function other that the ones created during the 7337 // scanning. Therefore, we signal that this function is completely dealt 7338 // with. 7339 return true; 7340 } 7341 7342 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 7343 if (!CGM.getLangOpts().OpenMPIsDevice) 7344 return false; 7345 7346 // Check if there are Ctors/Dtors in this declaration and look for target 7347 // regions in it. We use the complete variant to produce the kernel name 7348 // mangling. 7349 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 7350 if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 7351 for (auto *Ctor : RD->ctors()) { 7352 StringRef ParentName = 7353 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 7354 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 7355 } 7356 auto *Dtor = RD->getDestructor(); 7357 if (Dtor) { 7358 StringRef ParentName = 7359 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 7360 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 7361 } 7362 } 7363 7364 // If we are in target mode, we do not emit any global (declare target is not 7365 // implemented yet). Therefore we signal that GD was processed in this case. 7366 return true; 7367 } 7368 7369 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 7370 auto *VD = GD.getDecl(); 7371 if (isa<FunctionDecl>(VD)) 7372 return emitTargetFunctions(GD); 7373 7374 return emitTargetGlobalVariable(GD); 7375 } 7376 7377 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 7378 // If we have offloading in the current module, we need to emit the entries 7379 // now and register the offloading descriptor. 7380 createOffloadEntriesAndInfoMetadata(); 7381 7382 // Create and register the offloading binary descriptors. This is the main 7383 // entity that captures all the information about offloading in the current 7384 // compilation unit. 7385 return createOffloadingBinaryDescriptorRegistration(); 7386 } 7387 7388 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 7389 const OMPExecutableDirective &D, 7390 SourceLocation Loc, 7391 llvm::Value *OutlinedFn, 7392 ArrayRef<llvm::Value *> CapturedVars) { 7393 if (!CGF.HaveInsertPoint()) 7394 return; 7395 7396 auto *RTLoc = emitUpdateLocation(CGF, Loc); 7397 CodeGenFunction::RunCleanupsScope Scope(CGF); 7398 7399 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 7400 llvm::Value *Args[] = { 7401 RTLoc, 7402 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 7403 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 7404 llvm::SmallVector<llvm::Value *, 16> RealArgs; 7405 RealArgs.append(std::begin(Args), std::end(Args)); 7406 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 7407 7408 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 7409 CGF.EmitRuntimeCall(RTLFn, RealArgs); 7410 } 7411 7412 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 7413 const Expr *NumTeams, 7414 const Expr *ThreadLimit, 7415 SourceLocation Loc) { 7416 if (!CGF.HaveInsertPoint()) 7417 return; 7418 7419 auto *RTLoc = emitUpdateLocation(CGF, Loc); 7420 7421 llvm::Value *NumTeamsVal = 7422 (NumTeams) 7423 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 7424 CGF.CGM.Int32Ty, /* isSigned = */ true) 7425 : CGF.Builder.getInt32(0); 7426 7427 llvm::Value *ThreadLimitVal = 7428 (ThreadLimit) 7429 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 7430 CGF.CGM.Int32Ty, /* isSigned = */ true) 7431 : CGF.Builder.getInt32(0); 7432 7433 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 7434 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 7435 ThreadLimitVal}; 7436 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 7437 PushNumTeamsArgs); 7438 } 7439 7440 void CGOpenMPRuntime::emitTargetDataCalls( 7441 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 7442 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 7443 if (!CGF.HaveInsertPoint()) 7444 return; 7445 7446 // Action used to replace the default codegen action and turn privatization 7447 // off. 7448 PrePostActionTy NoPrivAction; 7449 7450 // Generate the code for the opening of the data environment. Capture all the 7451 // arguments of the runtime call by reference because they are used in the 7452 // closing of the region. 7453 auto &&BeginThenGen = [this, &D, Device, &Info, 7454 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 7455 // Fill up the arrays with all the mapped variables. 7456 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 7457 MappableExprsHandler::MapValuesArrayTy Pointers; 7458 MappableExprsHandler::MapValuesArrayTy Sizes; 7459 MappableExprsHandler::MapFlagsArrayTy MapTypes; 7460 7461 // Get map clause information. 7462 MappableExprsHandler MCHandler(D, CGF); 7463 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 7464 7465 // Fill up the arrays and create the arguments. 7466 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 7467 7468 llvm::Value *BasePointersArrayArg = nullptr; 7469 llvm::Value *PointersArrayArg = nullptr; 7470 llvm::Value *SizesArrayArg = nullptr; 7471 llvm::Value *MapTypesArrayArg = nullptr; 7472 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 7473 SizesArrayArg, MapTypesArrayArg, Info); 7474 7475 // Emit device ID if any. 7476 llvm::Value *DeviceID = nullptr; 7477 if (Device) { 7478 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7479 CGF.Int64Ty, /*isSigned=*/true); 7480 } else { 7481 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 7482 } 7483 7484 // Emit the number of elements in the offloading arrays. 7485 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 7486 7487 llvm::Value *OffloadingArgs[] = { 7488 DeviceID, PointerNum, BasePointersArrayArg, 7489 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 7490 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 7491 OffloadingArgs); 7492 7493 // If device pointer privatization is required, emit the body of the region 7494 // here. It will have to be duplicated: with and without privatization. 7495 if (!Info.CaptureDeviceAddrMap.empty()) 7496 CodeGen(CGF); 7497 }; 7498 7499 // Generate code for the closing of the data region. 7500 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 7501 PrePostActionTy &) { 7502 assert(Info.isValid() && "Invalid data environment closing arguments."); 7503 7504 llvm::Value *BasePointersArrayArg = nullptr; 7505 llvm::Value *PointersArrayArg = nullptr; 7506 llvm::Value *SizesArrayArg = nullptr; 7507 llvm::Value *MapTypesArrayArg = nullptr; 7508 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 7509 SizesArrayArg, MapTypesArrayArg, Info); 7510 7511 // Emit device ID if any. 7512 llvm::Value *DeviceID = nullptr; 7513 if (Device) { 7514 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7515 CGF.Int64Ty, /*isSigned=*/true); 7516 } else { 7517 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 7518 } 7519 7520 // Emit the number of elements in the offloading arrays. 7521 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 7522 7523 llvm::Value *OffloadingArgs[] = { 7524 DeviceID, PointerNum, BasePointersArrayArg, 7525 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 7526 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 7527 OffloadingArgs); 7528 }; 7529 7530 // If we need device pointer privatization, we need to emit the body of the 7531 // region with no privatization in the 'else' branch of the conditional. 7532 // Otherwise, we don't have to do anything. 7533 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 7534 PrePostActionTy &) { 7535 if (!Info.CaptureDeviceAddrMap.empty()) { 7536 CodeGen.setAction(NoPrivAction); 7537 CodeGen(CGF); 7538 } 7539 }; 7540 7541 // We don't have to do anything to close the region if the if clause evaluates 7542 // to false. 7543 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 7544 7545 if (IfCond) { 7546 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 7547 } else { 7548 RegionCodeGenTy RCG(BeginThenGen); 7549 RCG(CGF); 7550 } 7551 7552 // If we don't require privatization of device pointers, we emit the body in 7553 // between the runtime calls. This avoids duplicating the body code. 7554 if (Info.CaptureDeviceAddrMap.empty()) { 7555 CodeGen.setAction(NoPrivAction); 7556 CodeGen(CGF); 7557 } 7558 7559 if (IfCond) { 7560 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 7561 } else { 7562 RegionCodeGenTy RCG(EndThenGen); 7563 RCG(CGF); 7564 } 7565 } 7566 7567 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 7568 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 7569 const Expr *Device) { 7570 if (!CGF.HaveInsertPoint()) 7571 return; 7572 7573 assert((isa<OMPTargetEnterDataDirective>(D) || 7574 isa<OMPTargetExitDataDirective>(D) || 7575 isa<OMPTargetUpdateDirective>(D)) && 7576 "Expecting either target enter, exit data, or update directives."); 7577 7578 CodeGenFunction::OMPTargetDataInfo InputInfo; 7579 llvm::Value *MapTypesArray = nullptr; 7580 // Generate the code for the opening of the data environment. 7581 auto &&ThenGen = [this, &D, Device, &InputInfo, 7582 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 7583 // Emit device ID if any. 7584 llvm::Value *DeviceID = nullptr; 7585 if (Device) { 7586 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7587 CGF.Int64Ty, /*isSigned=*/true); 7588 } else { 7589 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 7590 } 7591 7592 // Emit the number of elements in the offloading arrays. 7593 llvm::Constant *PointerNum = 7594 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 7595 7596 llvm::Value *OffloadingArgs[] = {DeviceID, 7597 PointerNum, 7598 InputInfo.BasePointersArray.getPointer(), 7599 InputInfo.PointersArray.getPointer(), 7600 InputInfo.SizesArray.getPointer(), 7601 MapTypesArray}; 7602 7603 // Select the right runtime function call for each expected standalone 7604 // directive. 7605 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 7606 OpenMPRTLFunction RTLFn; 7607 switch (D.getDirectiveKind()) { 7608 default: 7609 llvm_unreachable("Unexpected standalone target data directive."); 7610 break; 7611 case OMPD_target_enter_data: 7612 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 7613 : OMPRTL__tgt_target_data_begin; 7614 break; 7615 case OMPD_target_exit_data: 7616 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 7617 : OMPRTL__tgt_target_data_end; 7618 break; 7619 case OMPD_target_update: 7620 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 7621 : OMPRTL__tgt_target_data_update; 7622 break; 7623 } 7624 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 7625 }; 7626 7627 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 7628 CodeGenFunction &CGF, PrePostActionTy &) { 7629 // Fill up the arrays with all the mapped variables. 7630 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 7631 MappableExprsHandler::MapValuesArrayTy Pointers; 7632 MappableExprsHandler::MapValuesArrayTy Sizes; 7633 MappableExprsHandler::MapFlagsArrayTy MapTypes; 7634 7635 // Get map clause information. 7636 MappableExprsHandler MEHandler(D, CGF); 7637 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 7638 7639 TargetDataInfo Info; 7640 // Fill up the arrays and create the arguments. 7641 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 7642 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 7643 Info.PointersArray, Info.SizesArray, 7644 Info.MapTypesArray, Info); 7645 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 7646 InputInfo.BasePointersArray = 7647 Address(Info.BasePointersArray, CGM.getPointerAlign()); 7648 InputInfo.PointersArray = 7649 Address(Info.PointersArray, CGM.getPointerAlign()); 7650 InputInfo.SizesArray = 7651 Address(Info.SizesArray, CGM.getPointerAlign()); 7652 MapTypesArray = Info.MapTypesArray; 7653 if (D.hasClausesOfKind<OMPDependClause>()) 7654 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 7655 else 7656 emitInlinedDirective(CGF, OMPD_target_update, ThenGen); 7657 }; 7658 7659 if (IfCond) 7660 emitOMPIfClause(CGF, IfCond, TargetThenGen, 7661 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 7662 else { 7663 RegionCodeGenTy ThenRCG(TargetThenGen); 7664 ThenRCG(CGF); 7665 } 7666 } 7667 7668 namespace { 7669 /// Kind of parameter in a function with 'declare simd' directive. 7670 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 7671 /// Attribute set of the parameter. 7672 struct ParamAttrTy { 7673 ParamKindTy Kind = Vector; 7674 llvm::APSInt StrideOrArg; 7675 llvm::APSInt Alignment; 7676 }; 7677 } // namespace 7678 7679 static unsigned evaluateCDTSize(const FunctionDecl *FD, 7680 ArrayRef<ParamAttrTy> ParamAttrs) { 7681 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 7682 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 7683 // of that clause. The VLEN value must be power of 2. 7684 // In other case the notion of the function`s "characteristic data type" (CDT) 7685 // is used to compute the vector length. 7686 // CDT is defined in the following order: 7687 // a) For non-void function, the CDT is the return type. 7688 // b) If the function has any non-uniform, non-linear parameters, then the 7689 // CDT is the type of the first such parameter. 7690 // c) If the CDT determined by a) or b) above is struct, union, or class 7691 // type which is pass-by-value (except for the type that maps to the 7692 // built-in complex data type), the characteristic data type is int. 7693 // d) If none of the above three cases is applicable, the CDT is int. 7694 // The VLEN is then determined based on the CDT and the size of vector 7695 // register of that ISA for which current vector version is generated. The 7696 // VLEN is computed using the formula below: 7697 // VLEN = sizeof(vector_register) / sizeof(CDT), 7698 // where vector register size specified in section 3.2.1 Registers and the 7699 // Stack Frame of original AMD64 ABI document. 7700 QualType RetType = FD->getReturnType(); 7701 if (RetType.isNull()) 7702 return 0; 7703 ASTContext &C = FD->getASTContext(); 7704 QualType CDT; 7705 if (!RetType.isNull() && !RetType->isVoidType()) 7706 CDT = RetType; 7707 else { 7708 unsigned Offset = 0; 7709 if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 7710 if (ParamAttrs[Offset].Kind == Vector) 7711 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 7712 ++Offset; 7713 } 7714 if (CDT.isNull()) { 7715 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 7716 if (ParamAttrs[I + Offset].Kind == Vector) { 7717 CDT = FD->getParamDecl(I)->getType(); 7718 break; 7719 } 7720 } 7721 } 7722 } 7723 if (CDT.isNull()) 7724 CDT = C.IntTy; 7725 CDT = CDT->getCanonicalTypeUnqualified(); 7726 if (CDT->isRecordType() || CDT->isUnionType()) 7727 CDT = C.IntTy; 7728 return C.getTypeSize(CDT); 7729 } 7730 7731 static void 7732 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 7733 const llvm::APSInt &VLENVal, 7734 ArrayRef<ParamAttrTy> ParamAttrs, 7735 OMPDeclareSimdDeclAttr::BranchStateTy State) { 7736 struct ISADataTy { 7737 char ISA; 7738 unsigned VecRegSize; 7739 }; 7740 ISADataTy ISAData[] = { 7741 { 7742 'b', 128 7743 }, // SSE 7744 { 7745 'c', 256 7746 }, // AVX 7747 { 7748 'd', 256 7749 }, // AVX2 7750 { 7751 'e', 512 7752 }, // AVX512 7753 }; 7754 llvm::SmallVector<char, 2> Masked; 7755 switch (State) { 7756 case OMPDeclareSimdDeclAttr::BS_Undefined: 7757 Masked.push_back('N'); 7758 Masked.push_back('M'); 7759 break; 7760 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 7761 Masked.push_back('N'); 7762 break; 7763 case OMPDeclareSimdDeclAttr::BS_Inbranch: 7764 Masked.push_back('M'); 7765 break; 7766 } 7767 for (auto Mask : Masked) { 7768 for (auto &Data : ISAData) { 7769 SmallString<256> Buffer; 7770 llvm::raw_svector_ostream Out(Buffer); 7771 Out << "_ZGV" << Data.ISA << Mask; 7772 if (!VLENVal) { 7773 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 7774 evaluateCDTSize(FD, ParamAttrs)); 7775 } else 7776 Out << VLENVal; 7777 for (auto &ParamAttr : ParamAttrs) { 7778 switch (ParamAttr.Kind){ 7779 case LinearWithVarStride: 7780 Out << 's' << ParamAttr.StrideOrArg; 7781 break; 7782 case Linear: 7783 Out << 'l'; 7784 if (!!ParamAttr.StrideOrArg) 7785 Out << ParamAttr.StrideOrArg; 7786 break; 7787 case Uniform: 7788 Out << 'u'; 7789 break; 7790 case Vector: 7791 Out << 'v'; 7792 break; 7793 } 7794 if (!!ParamAttr.Alignment) 7795 Out << 'a' << ParamAttr.Alignment; 7796 } 7797 Out << '_' << Fn->getName(); 7798 Fn->addFnAttr(Out.str()); 7799 } 7800 } 7801 } 7802 7803 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 7804 llvm::Function *Fn) { 7805 ASTContext &C = CGM.getContext(); 7806 FD = FD->getCanonicalDecl(); 7807 // Map params to their positions in function decl. 7808 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 7809 if (isa<CXXMethodDecl>(FD)) 7810 ParamPositions.insert({FD, 0}); 7811 unsigned ParamPos = ParamPositions.size(); 7812 for (auto *P : FD->parameters()) { 7813 ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); 7814 ++ParamPos; 7815 } 7816 for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 7817 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 7818 // Mark uniform parameters. 7819 for (auto *E : Attr->uniforms()) { 7820 E = E->IgnoreParenImpCasts(); 7821 unsigned Pos; 7822 if (isa<CXXThisExpr>(E)) 7823 Pos = ParamPositions[FD]; 7824 else { 7825 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 7826 ->getCanonicalDecl(); 7827 Pos = ParamPositions[PVD]; 7828 } 7829 ParamAttrs[Pos].Kind = Uniform; 7830 } 7831 // Get alignment info. 7832 auto NI = Attr->alignments_begin(); 7833 for (auto *E : Attr->aligneds()) { 7834 E = E->IgnoreParenImpCasts(); 7835 unsigned Pos; 7836 QualType ParmTy; 7837 if (isa<CXXThisExpr>(E)) { 7838 Pos = ParamPositions[FD]; 7839 ParmTy = E->getType(); 7840 } else { 7841 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 7842 ->getCanonicalDecl(); 7843 Pos = ParamPositions[PVD]; 7844 ParmTy = PVD->getType(); 7845 } 7846 ParamAttrs[Pos].Alignment = 7847 (*NI) ? (*NI)->EvaluateKnownConstInt(C) 7848 : llvm::APSInt::getUnsigned( 7849 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 7850 .getQuantity()); 7851 ++NI; 7852 } 7853 // Mark linear parameters. 7854 auto SI = Attr->steps_begin(); 7855 auto MI = Attr->modifiers_begin(); 7856 for (auto *E : Attr->linears()) { 7857 E = E->IgnoreParenImpCasts(); 7858 unsigned Pos; 7859 if (isa<CXXThisExpr>(E)) 7860 Pos = ParamPositions[FD]; 7861 else { 7862 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 7863 ->getCanonicalDecl(); 7864 Pos = ParamPositions[PVD]; 7865 } 7866 auto &ParamAttr = ParamAttrs[Pos]; 7867 ParamAttr.Kind = Linear; 7868 if (*SI) { 7869 if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, 7870 Expr::SE_AllowSideEffects)) { 7871 if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 7872 if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 7873 ParamAttr.Kind = LinearWithVarStride; 7874 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 7875 ParamPositions[StridePVD->getCanonicalDecl()]); 7876 } 7877 } 7878 } 7879 } 7880 ++SI; 7881 ++MI; 7882 } 7883 llvm::APSInt VLENVal; 7884 if (const Expr *VLEN = Attr->getSimdlen()) 7885 VLENVal = VLEN->EvaluateKnownConstInt(C); 7886 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 7887 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 7888 CGM.getTriple().getArch() == llvm::Triple::x86_64) 7889 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 7890 } 7891 } 7892 7893 namespace { 7894 /// Cleanup action for doacross support. 7895 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 7896 public: 7897 static const int DoacrossFinArgs = 2; 7898 7899 private: 7900 llvm::Value *RTLFn; 7901 llvm::Value *Args[DoacrossFinArgs]; 7902 7903 public: 7904 DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) 7905 : RTLFn(RTLFn) { 7906 assert(CallArgs.size() == DoacrossFinArgs); 7907 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 7908 } 7909 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 7910 if (!CGF.HaveInsertPoint()) 7911 return; 7912 CGF.EmitRuntimeCall(RTLFn, Args); 7913 } 7914 }; 7915 } // namespace 7916 7917 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 7918 const OMPLoopDirective &D) { 7919 if (!CGF.HaveInsertPoint()) 7920 return; 7921 7922 ASTContext &C = CGM.getContext(); 7923 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 7924 RecordDecl *RD; 7925 if (KmpDimTy.isNull()) { 7926 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 7927 // kmp_int64 lo; // lower 7928 // kmp_int64 up; // upper 7929 // kmp_int64 st; // stride 7930 // }; 7931 RD = C.buildImplicitRecord("kmp_dim"); 7932 RD->startDefinition(); 7933 addFieldToRecordDecl(C, RD, Int64Ty); 7934 addFieldToRecordDecl(C, RD, Int64Ty); 7935 addFieldToRecordDecl(C, RD, Int64Ty); 7936 RD->completeDefinition(); 7937 KmpDimTy = C.getRecordType(RD); 7938 } else 7939 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 7940 7941 Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); 7942 CGF.EmitNullInitialization(DimsAddr, KmpDimTy); 7943 enum { LowerFD = 0, UpperFD, StrideFD }; 7944 // Fill dims with data. 7945 LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy); 7946 // dims.upper = num_iterations; 7947 LValue UpperLVal = 7948 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD)); 7949 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 7950 CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(), 7951 Int64Ty, D.getNumIterations()->getExprLoc()); 7952 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 7953 // dims.stride = 1; 7954 LValue StrideLVal = 7955 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD)); 7956 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 7957 StrideLVal); 7958 7959 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 7960 // kmp_int32 num_dims, struct kmp_dim * dims); 7961 llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()), 7962 getThreadID(CGF, D.getLocStart()), 7963 llvm::ConstantInt::getSigned(CGM.Int32Ty, 1), 7964 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7965 DimsAddr.getPointer(), CGM.VoidPtrTy)}; 7966 7967 llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); 7968 CGF.EmitRuntimeCall(RTLFn, Args); 7969 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 7970 emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())}; 7971 llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 7972 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 7973 llvm::makeArrayRef(FiniArgs)); 7974 } 7975 7976 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 7977 const OMPDependClause *C) { 7978 QualType Int64Ty = 7979 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 7980 const Expr *CounterVal = C->getCounterValue(); 7981 assert(CounterVal); 7982 llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal), 7983 CounterVal->getType(), Int64Ty, 7984 CounterVal->getExprLoc()); 7985 Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr"); 7986 CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty); 7987 llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()), 7988 getThreadID(CGF, C->getLocStart()), 7989 CntAddr.getPointer()}; 7990 llvm::Value *RTLFn; 7991 if (C->getDependencyKind() == OMPC_DEPEND_source) 7992 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 7993 else { 7994 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 7995 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 7996 } 7997 CGF.EmitRuntimeCall(RTLFn, Args); 7998 } 7999 8000 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee, 8001 ArrayRef<llvm::Value *> Args, 8002 SourceLocation Loc) const { 8003 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 8004 8005 if (auto *Fn = dyn_cast<llvm::Function>(Callee)) { 8006 if (Fn->doesNotThrow()) { 8007 CGF.EmitNounwindRuntimeCall(Fn, Args); 8008 return; 8009 } 8010 } 8011 CGF.EmitRuntimeCall(Callee, Args); 8012 } 8013 8014 void CGOpenMPRuntime::emitOutlinedFunctionCall( 8015 CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, 8016 ArrayRef<llvm::Value *> Args) const { 8017 assert(Loc.isValid() && "Outlined function call location must be valid."); 8018 emitCall(CGF, OutlinedFn, Args, Loc); 8019 } 8020 8021 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 8022 const VarDecl *NativeParam, 8023 const VarDecl *TargetParam) const { 8024 return CGF.GetAddrOfLocalVar(NativeParam); 8025 } 8026 8027 llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 8028 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 8029 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 8030 llvm_unreachable("Not supported in SIMD-only mode"); 8031 } 8032 8033 llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 8034 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 8035 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 8036 llvm_unreachable("Not supported in SIMD-only mode"); 8037 } 8038 8039 llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 8040 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 8041 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 8042 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 8043 bool Tied, unsigned &NumberOfParts) { 8044 llvm_unreachable("Not supported in SIMD-only mode"); 8045 } 8046 8047 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 8048 SourceLocation Loc, 8049 llvm::Value *OutlinedFn, 8050 ArrayRef<llvm::Value *> CapturedVars, 8051 const Expr *IfCond) { 8052 llvm_unreachable("Not supported in SIMD-only mode"); 8053 } 8054 8055 void CGOpenMPSIMDRuntime::emitCriticalRegion( 8056 CodeGenFunction &CGF, StringRef CriticalName, 8057 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 8058 const Expr *Hint) { 8059 llvm_unreachable("Not supported in SIMD-only mode"); 8060 } 8061 8062 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 8063 const RegionCodeGenTy &MasterOpGen, 8064 SourceLocation Loc) { 8065 llvm_unreachable("Not supported in SIMD-only mode"); 8066 } 8067 8068 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 8069 SourceLocation Loc) { 8070 llvm_unreachable("Not supported in SIMD-only mode"); 8071 } 8072 8073 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 8074 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 8075 SourceLocation Loc) { 8076 llvm_unreachable("Not supported in SIMD-only mode"); 8077 } 8078 8079 void CGOpenMPSIMDRuntime::emitSingleRegion( 8080 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 8081 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 8082 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 8083 ArrayRef<const Expr *> AssignmentOps) { 8084 llvm_unreachable("Not supported in SIMD-only mode"); 8085 } 8086 8087 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 8088 const RegionCodeGenTy &OrderedOpGen, 8089 SourceLocation Loc, 8090 bool IsThreads) { 8091 llvm_unreachable("Not supported in SIMD-only mode"); 8092 } 8093 8094 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 8095 SourceLocation Loc, 8096 OpenMPDirectiveKind Kind, 8097 bool EmitChecks, 8098 bool ForceSimpleCall) { 8099 llvm_unreachable("Not supported in SIMD-only mode"); 8100 } 8101 8102 void CGOpenMPSIMDRuntime::emitForDispatchInit( 8103 CodeGenFunction &CGF, SourceLocation Loc, 8104 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 8105 bool Ordered, const DispatchRTInput &DispatchValues) { 8106 llvm_unreachable("Not supported in SIMD-only mode"); 8107 } 8108 8109 void CGOpenMPSIMDRuntime::emitForStaticInit( 8110 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 8111 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 8112 llvm_unreachable("Not supported in SIMD-only mode"); 8113 } 8114 8115 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 8116 CodeGenFunction &CGF, SourceLocation Loc, 8117 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 8118 llvm_unreachable("Not supported in SIMD-only mode"); 8119 } 8120 8121 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 8122 SourceLocation Loc, 8123 unsigned IVSize, 8124 bool IVSigned) { 8125 llvm_unreachable("Not supported in SIMD-only mode"); 8126 } 8127 8128 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 8129 SourceLocation Loc, 8130 OpenMPDirectiveKind DKind) { 8131 llvm_unreachable("Not supported in SIMD-only mode"); 8132 } 8133 8134 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 8135 SourceLocation Loc, 8136 unsigned IVSize, bool IVSigned, 8137 Address IL, Address LB, 8138 Address UB, Address ST) { 8139 llvm_unreachable("Not supported in SIMD-only mode"); 8140 } 8141 8142 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 8143 llvm::Value *NumThreads, 8144 SourceLocation Loc) { 8145 llvm_unreachable("Not supported in SIMD-only mode"); 8146 } 8147 8148 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 8149 OpenMPProcBindClauseKind ProcBind, 8150 SourceLocation Loc) { 8151 llvm_unreachable("Not supported in SIMD-only mode"); 8152 } 8153 8154 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 8155 const VarDecl *VD, 8156 Address VDAddr, 8157 SourceLocation Loc) { 8158 llvm_unreachable("Not supported in SIMD-only mode"); 8159 } 8160 8161 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 8162 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 8163 CodeGenFunction *CGF) { 8164 llvm_unreachable("Not supported in SIMD-only mode"); 8165 } 8166 8167 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 8168 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 8169 llvm_unreachable("Not supported in SIMD-only mode"); 8170 } 8171 8172 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 8173 ArrayRef<const Expr *> Vars, 8174 SourceLocation Loc) { 8175 llvm_unreachable("Not supported in SIMD-only mode"); 8176 } 8177 8178 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 8179 const OMPExecutableDirective &D, 8180 llvm::Value *TaskFunction, 8181 QualType SharedsTy, Address Shareds, 8182 const Expr *IfCond, 8183 const OMPTaskDataTy &Data) { 8184 llvm_unreachable("Not supported in SIMD-only mode"); 8185 } 8186 8187 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 8188 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 8189 llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, 8190 const Expr *IfCond, const OMPTaskDataTy &Data) { 8191 llvm_unreachable("Not supported in SIMD-only mode"); 8192 } 8193 8194 void CGOpenMPSIMDRuntime::emitReduction( 8195 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 8196 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 8197 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 8198 assert(Options.SimpleReduction && "Only simple reduction is expected."); 8199 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 8200 ReductionOps, Options); 8201 } 8202 8203 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 8204 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 8205 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 8206 llvm_unreachable("Not supported in SIMD-only mode"); 8207 } 8208 8209 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 8210 SourceLocation Loc, 8211 ReductionCodeGen &RCG, 8212 unsigned N) { 8213 llvm_unreachable("Not supported in SIMD-only mode"); 8214 } 8215 8216 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 8217 SourceLocation Loc, 8218 llvm::Value *ReductionsPtr, 8219 LValue SharedLVal) { 8220 llvm_unreachable("Not supported in SIMD-only mode"); 8221 } 8222 8223 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 8224 SourceLocation Loc) { 8225 llvm_unreachable("Not supported in SIMD-only mode"); 8226 } 8227 8228 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 8229 CodeGenFunction &CGF, SourceLocation Loc, 8230 OpenMPDirectiveKind CancelRegion) { 8231 llvm_unreachable("Not supported in SIMD-only mode"); 8232 } 8233 8234 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 8235 SourceLocation Loc, const Expr *IfCond, 8236 OpenMPDirectiveKind CancelRegion) { 8237 llvm_unreachable("Not supported in SIMD-only mode"); 8238 } 8239 8240 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 8241 const OMPExecutableDirective &D, StringRef ParentName, 8242 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 8243 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 8244 llvm_unreachable("Not supported in SIMD-only mode"); 8245 } 8246 8247 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 8248 const OMPExecutableDirective &D, 8249 llvm::Value *OutlinedFn, 8250 llvm::Value *OutlinedFnID, 8251 const Expr *IfCond, const Expr *Device, 8252 ArrayRef<llvm::Value *> CapturedVars) { 8253 llvm_unreachable("Not supported in SIMD-only mode"); 8254 } 8255 8256 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 8257 llvm_unreachable("Not supported in SIMD-only mode"); 8258 } 8259 8260 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 8261 llvm_unreachable("Not supported in SIMD-only mode"); 8262 } 8263 8264 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 8265 return false; 8266 } 8267 8268 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 8269 return nullptr; 8270 } 8271 8272 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 8273 const OMPExecutableDirective &D, 8274 SourceLocation Loc, 8275 llvm::Value *OutlinedFn, 8276 ArrayRef<llvm::Value *> CapturedVars) { 8277 llvm_unreachable("Not supported in SIMD-only mode"); 8278 } 8279 8280 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 8281 const Expr *NumTeams, 8282 const Expr *ThreadLimit, 8283 SourceLocation Loc) { 8284 llvm_unreachable("Not supported in SIMD-only mode"); 8285 } 8286 8287 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 8288 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 8289 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 8290 llvm_unreachable("Not supported in SIMD-only mode"); 8291 } 8292 8293 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 8294 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 8295 const Expr *Device) { 8296 llvm_unreachable("Not supported in SIMD-only mode"); 8297 } 8298 8299 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 8300 const OMPLoopDirective &D) { 8301 llvm_unreachable("Not supported in SIMD-only mode"); 8302 } 8303 8304 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 8305 const OMPDependClause *C) { 8306 llvm_unreachable("Not supported in SIMD-only mode"); 8307 } 8308 8309 const VarDecl * 8310 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 8311 const VarDecl *NativeParam) const { 8312 llvm_unreachable("Not supported in SIMD-only mode"); 8313 } 8314 8315 Address 8316 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 8317 const VarDecl *NativeParam, 8318 const VarDecl *TargetParam) const { 8319 llvm_unreachable("Not supported in SIMD-only mode"); 8320 } 8321 8322