1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/StringExtras.h" 33 #include "llvm/Bitcode/BitcodeReader.h" 34 #include "llvm/IR/Constants.h" 35 #include "llvm/IR/DerivedTypes.h" 36 #include "llvm/IR/GlobalValue.h" 37 #include "llvm/IR/Value.h" 38 #include "llvm/Support/AtomicOrdering.h" 39 #include "llvm/Support/Format.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <cassert> 42 #include <numeric> 43 44 using namespace clang; 45 using namespace CodeGen; 46 using namespace llvm::omp; 47 48 namespace { 49 /// Base class for handling code generation inside OpenMP regions. 50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 51 public: 52 /// Kinds of OpenMP regions used in codegen. 53 enum CGOpenMPRegionKind { 54 /// Region with outlined function for standalone 'parallel' 55 /// directive. 56 ParallelOutlinedRegion, 57 /// Region with outlined function for standalone 'task' directive. 58 TaskOutlinedRegion, 59 /// Region for constructs that do not require function outlining, 60 /// like 'for', 'sections', 'atomic' etc. directives. 61 InlinedRegion, 62 /// Region with outlined function for standalone 'target' directive. 63 TargetRegion, 64 }; 65 66 CGOpenMPRegionInfo(const CapturedStmt &CS, 67 const CGOpenMPRegionKind RegionKind, 68 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 69 bool HasCancel) 70 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 71 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 72 73 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 75 bool HasCancel) 76 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 77 Kind(Kind), HasCancel(HasCancel) {} 78 79 /// Get a variable or parameter for storing global thread id 80 /// inside OpenMP construct. 81 virtual const VarDecl *getThreadIDVariable() const = 0; 82 83 /// Emit the captured statement body. 84 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 85 86 /// Get an LValue for the current ThreadID variable. 87 /// \return LValue for thread id variable. This LValue always has type int32*. 88 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 89 90 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 91 92 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 93 94 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 95 96 bool hasCancel() const { return HasCancel; } 97 98 static bool classof(const CGCapturedStmtInfo *Info) { 99 return Info->getKind() == CR_OpenMP; 100 } 101 102 ~CGOpenMPRegionInfo() override = default; 103 104 protected: 105 CGOpenMPRegionKind RegionKind; 106 RegionCodeGenTy CodeGen; 107 OpenMPDirectiveKind Kind; 108 bool HasCancel; 109 }; 110 111 /// API for captured statement code generation in OpenMP constructs. 112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 113 public: 114 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 115 const RegionCodeGenTy &CodeGen, 116 OpenMPDirectiveKind Kind, bool HasCancel, 117 StringRef HelperName) 118 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 119 HasCancel), 120 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 121 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 122 } 123 124 /// Get a variable or parameter for storing global thread id 125 /// inside OpenMP construct. 126 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 127 128 /// Get the name of the capture helper. 129 StringRef getHelperName() const override { return HelperName; } 130 131 static bool classof(const CGCapturedStmtInfo *Info) { 132 return CGOpenMPRegionInfo::classof(Info) && 133 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 134 ParallelOutlinedRegion; 135 } 136 137 private: 138 /// A variable or parameter storing global thread id for OpenMP 139 /// constructs. 140 const VarDecl *ThreadIDVar; 141 StringRef HelperName; 142 }; 143 144 /// API for captured statement code generation in OpenMP constructs. 145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 146 public: 147 class UntiedTaskActionTy final : public PrePostActionTy { 148 bool Untied; 149 const VarDecl *PartIDVar; 150 const RegionCodeGenTy UntiedCodeGen; 151 llvm::SwitchInst *UntiedSwitch = nullptr; 152 153 public: 154 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 155 const RegionCodeGenTy &UntiedCodeGen) 156 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 157 void Enter(CodeGenFunction &CGF) override { 158 if (Untied) { 159 // Emit task switching point. 160 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 161 CGF.GetAddrOfLocalVar(PartIDVar), 162 PartIDVar->getType()->castAs<PointerType>()); 163 llvm::Value *Res = 164 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 165 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 166 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 167 CGF.EmitBlock(DoneBB); 168 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 169 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 170 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 171 CGF.Builder.GetInsertBlock()); 172 emitUntiedSwitch(CGF); 173 } 174 } 175 void emitUntiedSwitch(CodeGenFunction &CGF) const { 176 if (Untied) { 177 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 178 CGF.GetAddrOfLocalVar(PartIDVar), 179 PartIDVar->getType()->castAs<PointerType>()); 180 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 181 PartIdLVal); 182 UntiedCodeGen(CGF); 183 CodeGenFunction::JumpDest CurPoint = 184 CGF.getJumpDestInCurrentScope(".untied.next."); 185 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 186 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 187 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 188 CGF.Builder.GetInsertBlock()); 189 CGF.EmitBranchThroughCleanup(CurPoint); 190 CGF.EmitBlock(CurPoint.getBlock()); 191 } 192 } 193 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 194 }; 195 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 196 const VarDecl *ThreadIDVar, 197 const RegionCodeGenTy &CodeGen, 198 OpenMPDirectiveKind Kind, bool HasCancel, 199 const UntiedTaskActionTy &Action) 200 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 201 ThreadIDVar(ThreadIDVar), Action(Action) { 202 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 203 } 204 205 /// Get a variable or parameter for storing global thread id 206 /// inside OpenMP construct. 207 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 208 209 /// Get an LValue for the current ThreadID variable. 210 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 211 212 /// Get the name of the capture helper. 213 StringRef getHelperName() const override { return ".omp_outlined."; } 214 215 void emitUntiedSwitch(CodeGenFunction &CGF) override { 216 Action.emitUntiedSwitch(CGF); 217 } 218 219 static bool classof(const CGCapturedStmtInfo *Info) { 220 return CGOpenMPRegionInfo::classof(Info) && 221 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 222 TaskOutlinedRegion; 223 } 224 225 private: 226 /// A variable or parameter storing global thread id for OpenMP 227 /// constructs. 228 const VarDecl *ThreadIDVar; 229 /// Action for emitting code for untied tasks. 230 const UntiedTaskActionTy &Action; 231 }; 232 233 /// API for inlined captured statement code generation in OpenMP 234 /// constructs. 235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 236 public: 237 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 238 const RegionCodeGenTy &CodeGen, 239 OpenMPDirectiveKind Kind, bool HasCancel) 240 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 241 OldCSI(OldCSI), 242 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 243 244 // Retrieve the value of the context parameter. 245 llvm::Value *getContextValue() const override { 246 if (OuterRegionInfo) 247 return OuterRegionInfo->getContextValue(); 248 llvm_unreachable("No context value for inlined OpenMP region"); 249 } 250 251 void setContextValue(llvm::Value *V) override { 252 if (OuterRegionInfo) { 253 OuterRegionInfo->setContextValue(V); 254 return; 255 } 256 llvm_unreachable("No context value for inlined OpenMP region"); 257 } 258 259 /// Lookup the captured field decl for a variable. 260 const FieldDecl *lookup(const VarDecl *VD) const override { 261 if (OuterRegionInfo) 262 return OuterRegionInfo->lookup(VD); 263 // If there is no outer outlined region,no need to lookup in a list of 264 // captured variables, we can use the original one. 265 return nullptr; 266 } 267 268 FieldDecl *getThisFieldDecl() const override { 269 if (OuterRegionInfo) 270 return OuterRegionInfo->getThisFieldDecl(); 271 return nullptr; 272 } 273 274 /// Get a variable or parameter for storing global thread id 275 /// inside OpenMP construct. 276 const VarDecl *getThreadIDVariable() const override { 277 if (OuterRegionInfo) 278 return OuterRegionInfo->getThreadIDVariable(); 279 return nullptr; 280 } 281 282 /// Get an LValue for the current ThreadID variable. 283 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 284 if (OuterRegionInfo) 285 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 286 llvm_unreachable("No LValue for inlined OpenMP construct"); 287 } 288 289 /// Get the name of the capture helper. 290 StringRef getHelperName() const override { 291 if (auto *OuterRegionInfo = getOldCSI()) 292 return OuterRegionInfo->getHelperName(); 293 llvm_unreachable("No helper name for inlined OpenMP construct"); 294 } 295 296 void emitUntiedSwitch(CodeGenFunction &CGF) override { 297 if (OuterRegionInfo) 298 OuterRegionInfo->emitUntiedSwitch(CGF); 299 } 300 301 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 302 303 static bool classof(const CGCapturedStmtInfo *Info) { 304 return CGOpenMPRegionInfo::classof(Info) && 305 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 306 } 307 308 ~CGOpenMPInlinedRegionInfo() override = default; 309 310 private: 311 /// CodeGen info about outer OpenMP region. 312 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 313 CGOpenMPRegionInfo *OuterRegionInfo; 314 }; 315 316 /// API for captured statement code generation in OpenMP target 317 /// constructs. For this captures, implicit parameters are used instead of the 318 /// captured fields. The name of the target region has to be unique in a given 319 /// application so it is provided by the client, because only the client has 320 /// the information to generate that. 321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 322 public: 323 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 324 const RegionCodeGenTy &CodeGen, StringRef HelperName) 325 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 326 /*HasCancel=*/false), 327 HelperName(HelperName) {} 328 329 /// This is unused for target regions because each starts executing 330 /// with a single thread. 331 const VarDecl *getThreadIDVariable() const override { return nullptr; } 332 333 /// Get the name of the capture helper. 334 StringRef getHelperName() const override { return HelperName; } 335 336 static bool classof(const CGCapturedStmtInfo *Info) { 337 return CGOpenMPRegionInfo::classof(Info) && 338 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 339 } 340 341 private: 342 StringRef HelperName; 343 }; 344 345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 346 llvm_unreachable("No codegen for expressions"); 347 } 348 /// API for generation of expressions captured in a innermost OpenMP 349 /// region. 350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 351 public: 352 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 353 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 354 OMPD_unknown, 355 /*HasCancel=*/false), 356 PrivScope(CGF) { 357 // Make sure the globals captured in the provided statement are local by 358 // using the privatization logic. We assume the same variable is not 359 // captured more than once. 360 for (const auto &C : CS.captures()) { 361 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 362 continue; 363 364 const VarDecl *VD = C.getCapturedVar(); 365 if (VD->isLocalVarDeclOrParm()) 366 continue; 367 368 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 369 /*RefersToEnclosingVariableOrCapture=*/false, 370 VD->getType().getNonReferenceType(), VK_LValue, 371 C.getLocation()); 372 PrivScope.addPrivate( 373 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 374 } 375 (void)PrivScope.Privatize(); 376 } 377 378 /// Lookup the captured field decl for a variable. 379 const FieldDecl *lookup(const VarDecl *VD) const override { 380 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 381 return FD; 382 return nullptr; 383 } 384 385 /// Emit the captured statement body. 386 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 387 llvm_unreachable("No body for expressions"); 388 } 389 390 /// Get a variable or parameter for storing global thread id 391 /// inside OpenMP construct. 392 const VarDecl *getThreadIDVariable() const override { 393 llvm_unreachable("No thread id for expressions"); 394 } 395 396 /// Get the name of the capture helper. 397 StringRef getHelperName() const override { 398 llvm_unreachable("No helper name for expressions"); 399 } 400 401 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 402 403 private: 404 /// Private scope to capture global variables. 405 CodeGenFunction::OMPPrivateScope PrivScope; 406 }; 407 408 /// RAII for emitting code of OpenMP constructs. 409 class InlinedOpenMPRegionRAII { 410 CodeGenFunction &CGF; 411 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 412 FieldDecl *LambdaThisCaptureField = nullptr; 413 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 414 bool NoInheritance = false; 415 416 public: 417 /// Constructs region for combined constructs. 418 /// \param CodeGen Code generation sequence for combined directives. Includes 419 /// a list of functions used for code generation of implicitly inlined 420 /// regions. 421 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 422 OpenMPDirectiveKind Kind, bool HasCancel, 423 bool NoInheritance = true) 424 : CGF(CGF), NoInheritance(NoInheritance) { 425 // Start emission for the construct. 426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 428 if (NoInheritance) { 429 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 430 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 431 CGF.LambdaThisCaptureField = nullptr; 432 BlockInfo = CGF.BlockInfo; 433 CGF.BlockInfo = nullptr; 434 } 435 } 436 437 ~InlinedOpenMPRegionRAII() { 438 // Restore original CapturedStmtInfo only if we're done with code emission. 439 auto *OldCSI = 440 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 441 delete CGF.CapturedStmtInfo; 442 CGF.CapturedStmtInfo = OldCSI; 443 if (NoInheritance) { 444 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 445 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 446 CGF.BlockInfo = BlockInfo; 447 } 448 } 449 }; 450 451 /// Values for bit flags used in the ident_t to describe the fields. 452 /// All enumeric elements are named and described in accordance with the code 453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 454 enum OpenMPLocationFlags : unsigned { 455 /// Use trampoline for internal microtask. 456 OMP_IDENT_IMD = 0x01, 457 /// Use c-style ident structure. 458 OMP_IDENT_KMPC = 0x02, 459 /// Atomic reduction option for kmpc_reduce. 460 OMP_ATOMIC_REDUCE = 0x10, 461 /// Explicit 'barrier' directive. 462 OMP_IDENT_BARRIER_EXPL = 0x20, 463 /// Implicit barrier in code. 464 OMP_IDENT_BARRIER_IMPL = 0x40, 465 /// Implicit barrier in 'for' directive. 466 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 467 /// Implicit barrier in 'sections' directive. 468 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 469 /// Implicit barrier in 'single' directive. 470 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 471 /// Call of __kmp_for_static_init for static loop. 472 OMP_IDENT_WORK_LOOP = 0x200, 473 /// Call of __kmp_for_static_init for sections. 474 OMP_IDENT_WORK_SECTIONS = 0x400, 475 /// Call of __kmp_for_static_init for distribute. 476 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 478 }; 479 480 namespace { 481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 482 /// Values for bit flags for marking which requires clauses have been used. 483 enum OpenMPOffloadingRequiresDirFlags : int64_t { 484 /// flag undefined. 485 OMP_REQ_UNDEFINED = 0x000, 486 /// no requires clause present. 487 OMP_REQ_NONE = 0x001, 488 /// reverse_offload clause. 489 OMP_REQ_REVERSE_OFFLOAD = 0x002, 490 /// unified_address clause. 491 OMP_REQ_UNIFIED_ADDRESS = 0x004, 492 /// unified_shared_memory clause. 493 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 494 /// dynamic_allocators clause. 495 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 496 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 497 }; 498 499 enum OpenMPOffloadingReservedDeviceIDs { 500 /// Device ID if the device was not defined, runtime should get it 501 /// from environment variables in the spec. 502 OMP_DEVICEID_UNDEF = -1, 503 }; 504 } // anonymous namespace 505 506 /// Describes ident structure that describes a source location. 507 /// All descriptions are taken from 508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 509 /// Original structure: 510 /// typedef struct ident { 511 /// kmp_int32 reserved_1; /**< might be used in Fortran; 512 /// see above */ 513 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 514 /// KMP_IDENT_KMPC identifies this union 515 /// member */ 516 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 517 /// see above */ 518 ///#if USE_ITT_BUILD 519 /// /* but currently used for storing 520 /// region-specific ITT */ 521 /// /* contextual information. */ 522 ///#endif /* USE_ITT_BUILD */ 523 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 524 /// C++ */ 525 /// char const *psource; /**< String describing the source location. 526 /// The string is composed of semi-colon separated 527 // fields which describe the source file, 528 /// the function and a pair of line numbers that 529 /// delimit the construct. 530 /// */ 531 /// } ident_t; 532 enum IdentFieldIndex { 533 /// might be used in Fortran 534 IdentField_Reserved_1, 535 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 536 IdentField_Flags, 537 /// Not really used in Fortran any more 538 IdentField_Reserved_2, 539 /// Source[4] in Fortran, do not use for C++ 540 IdentField_Reserved_3, 541 /// String describing the source location. The string is composed of 542 /// semi-colon separated fields which describe the source file, the function 543 /// and a pair of line numbers that delimit the construct. 544 IdentField_PSource 545 }; 546 547 /// Schedule types for 'omp for' loops (these enumerators are taken from 548 /// the enum sched_type in kmp.h). 549 enum OpenMPSchedType { 550 /// Lower bound for default (unordered) versions. 551 OMP_sch_lower = 32, 552 OMP_sch_static_chunked = 33, 553 OMP_sch_static = 34, 554 OMP_sch_dynamic_chunked = 35, 555 OMP_sch_guided_chunked = 36, 556 OMP_sch_runtime = 37, 557 OMP_sch_auto = 38, 558 /// static with chunk adjustment (e.g., simd) 559 OMP_sch_static_balanced_chunked = 45, 560 /// Lower bound for 'ordered' versions. 561 OMP_ord_lower = 64, 562 OMP_ord_static_chunked = 65, 563 OMP_ord_static = 66, 564 OMP_ord_dynamic_chunked = 67, 565 OMP_ord_guided_chunked = 68, 566 OMP_ord_runtime = 69, 567 OMP_ord_auto = 70, 568 OMP_sch_default = OMP_sch_static, 569 /// dist_schedule types 570 OMP_dist_sch_static_chunked = 91, 571 OMP_dist_sch_static = 92, 572 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 573 /// Set if the monotonic schedule modifier was present. 574 OMP_sch_modifier_monotonic = (1 << 29), 575 /// Set if the nonmonotonic schedule modifier was present. 576 OMP_sch_modifier_nonmonotonic = (1 << 30), 577 }; 578 579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 580 /// region. 581 class CleanupTy final : public EHScopeStack::Cleanup { 582 PrePostActionTy *Action; 583 584 public: 585 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 586 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 587 if (!CGF.HaveInsertPoint()) 588 return; 589 Action->Exit(CGF); 590 } 591 }; 592 593 } // anonymous namespace 594 595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 596 CodeGenFunction::RunCleanupsScope Scope(CGF); 597 if (PrePostAction) { 598 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 599 Callback(CodeGen, CGF, *PrePostAction); 600 } else { 601 PrePostActionTy Action; 602 Callback(CodeGen, CGF, Action); 603 } 604 } 605 606 /// Check if the combiner is a call to UDR combiner and if it is so return the 607 /// UDR decl used for reduction. 608 static const OMPDeclareReductionDecl * 609 getReductionInit(const Expr *ReductionOp) { 610 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 611 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 612 if (const auto *DRE = 613 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 614 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 615 return DRD; 616 return nullptr; 617 } 618 619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 620 const OMPDeclareReductionDecl *DRD, 621 const Expr *InitOp, 622 Address Private, Address Original, 623 QualType Ty) { 624 if (DRD->getInitializer()) { 625 std::pair<llvm::Function *, llvm::Function *> Reduction = 626 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 627 const auto *CE = cast<CallExpr>(InitOp); 628 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 629 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 630 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 631 const auto *LHSDRE = 632 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 633 const auto *RHSDRE = 634 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 635 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 636 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 637 [=]() { return Private; }); 638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 639 [=]() { return Original; }); 640 (void)PrivateScope.Privatize(); 641 RValue Func = RValue::get(Reduction.second); 642 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 643 CGF.EmitIgnoredExpr(InitOp); 644 } else { 645 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 646 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 647 auto *GV = new llvm::GlobalVariable( 648 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 649 llvm::GlobalValue::PrivateLinkage, Init, Name); 650 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 651 RValue InitRVal; 652 switch (CGF.getEvaluationKind(Ty)) { 653 case TEK_Scalar: 654 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 655 break; 656 case TEK_Complex: 657 InitRVal = 658 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 659 break; 660 case TEK_Aggregate: { 661 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 662 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 663 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 664 /*IsInitializer=*/false); 665 return; 666 } 667 } 668 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 669 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 670 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 671 /*IsInitializer=*/false); 672 } 673 } 674 675 /// Emit initialization of arrays of complex types. 676 /// \param DestAddr Address of the array. 677 /// \param Type Type of array. 678 /// \param Init Initial expression of array. 679 /// \param SrcAddr Address of the original array. 680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 681 QualType Type, bool EmitDeclareReductionInit, 682 const Expr *Init, 683 const OMPDeclareReductionDecl *DRD, 684 Address SrcAddr = Address::invalid()) { 685 // Perform element-by-element initialization. 686 QualType ElementTy; 687 688 // Drill down to the base element type on both arrays. 689 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 690 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 691 DestAddr = 692 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 693 if (DRD) 694 SrcAddr = 695 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 696 697 llvm::Value *SrcBegin = nullptr; 698 if (DRD) 699 SrcBegin = SrcAddr.getPointer(); 700 llvm::Value *DestBegin = DestAddr.getPointer(); 701 // Cast from pointer to array type to pointer to single element. 702 llvm::Value *DestEnd = 703 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 704 // The basic structure here is a while-do loop. 705 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 706 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 707 llvm::Value *IsEmpty = 708 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 709 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 710 711 // Enter the loop body, making that address the current address. 712 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 713 CGF.EmitBlock(BodyBB); 714 715 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 716 717 llvm::PHINode *SrcElementPHI = nullptr; 718 Address SrcElementCurrent = Address::invalid(); 719 if (DRD) { 720 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 721 "omp.arraycpy.srcElementPast"); 722 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 723 SrcElementCurrent = 724 Address(SrcElementPHI, 725 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 726 } 727 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 728 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 729 DestElementPHI->addIncoming(DestBegin, EntryBB); 730 Address DestElementCurrent = 731 Address(DestElementPHI, 732 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 733 734 // Emit copy. 735 { 736 CodeGenFunction::RunCleanupsScope InitScope(CGF); 737 if (EmitDeclareReductionInit) { 738 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 739 SrcElementCurrent, ElementTy); 740 } else 741 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 742 /*IsInitializer=*/false); 743 } 744 745 if (DRD) { 746 // Shift the address forward by one element. 747 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 748 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 749 "omp.arraycpy.dest.element"); 750 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 751 } 752 753 // Shift the address forward by one element. 754 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 755 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 756 "omp.arraycpy.dest.element"); 757 // Check whether we've reached the end. 758 llvm::Value *Done = 759 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 760 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 761 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 762 763 // Done. 764 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 765 } 766 767 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 768 return CGF.EmitOMPSharedLValue(E); 769 } 770 771 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 772 const Expr *E) { 773 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 774 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 775 return LValue(); 776 } 777 778 void ReductionCodeGen::emitAggregateInitialization( 779 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 780 const OMPDeclareReductionDecl *DRD) { 781 // Emit VarDecl with copy init for arrays. 782 // Get the address of the original variable captured in current 783 // captured region. 784 const auto *PrivateVD = 785 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 786 bool EmitDeclareReductionInit = 787 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 788 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 789 EmitDeclareReductionInit, 790 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 791 : PrivateVD->getInit(), 792 DRD, SharedLVal.getAddress(CGF)); 793 } 794 795 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 796 ArrayRef<const Expr *> Origs, 797 ArrayRef<const Expr *> Privates, 798 ArrayRef<const Expr *> ReductionOps) { 799 ClausesData.reserve(Shareds.size()); 800 SharedAddresses.reserve(Shareds.size()); 801 Sizes.reserve(Shareds.size()); 802 BaseDecls.reserve(Shareds.size()); 803 const auto *IOrig = Origs.begin(); 804 const auto *IPriv = Privates.begin(); 805 const auto *IRed = ReductionOps.begin(); 806 for (const Expr *Ref : Shareds) { 807 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 808 std::advance(IOrig, 1); 809 std::advance(IPriv, 1); 810 std::advance(IRed, 1); 811 } 812 } 813 814 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 815 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 816 "Number of generated lvalues must be exactly N."); 817 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 818 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 819 SharedAddresses.emplace_back(First, Second); 820 if (ClausesData[N].Shared == ClausesData[N].Ref) { 821 OrigAddresses.emplace_back(First, Second); 822 } else { 823 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 824 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 825 OrigAddresses.emplace_back(First, Second); 826 } 827 } 828 829 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 830 const auto *PrivateVD = 831 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 832 QualType PrivateType = PrivateVD->getType(); 833 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 834 if (!PrivateType->isVariablyModifiedType()) { 835 Sizes.emplace_back( 836 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 837 nullptr); 838 return; 839 } 840 llvm::Value *Size; 841 llvm::Value *SizeInChars; 842 auto *ElemType = 843 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 844 ->getElementType(); 845 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 846 if (AsArraySection) { 847 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 848 OrigAddresses[N].first.getPointer(CGF)); 849 Size = CGF.Builder.CreateNUWAdd( 850 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 851 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 852 } else { 853 SizeInChars = 854 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 855 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 856 } 857 Sizes.emplace_back(SizeInChars, Size); 858 CodeGenFunction::OpaqueValueMapping OpaqueMap( 859 CGF, 860 cast<OpaqueValueExpr>( 861 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 862 RValue::get(Size)); 863 CGF.EmitVariablyModifiedType(PrivateType); 864 } 865 866 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 867 llvm::Value *Size) { 868 const auto *PrivateVD = 869 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 870 QualType PrivateType = PrivateVD->getType(); 871 if (!PrivateType->isVariablyModifiedType()) { 872 assert(!Size && !Sizes[N].second && 873 "Size should be nullptr for non-variably modified reduction " 874 "items."); 875 return; 876 } 877 CodeGenFunction::OpaqueValueMapping OpaqueMap( 878 CGF, 879 cast<OpaqueValueExpr>( 880 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 881 RValue::get(Size)); 882 CGF.EmitVariablyModifiedType(PrivateType); 883 } 884 885 void ReductionCodeGen::emitInitialization( 886 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 887 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 888 assert(SharedAddresses.size() > N && "No variable was generated"); 889 const auto *PrivateVD = 890 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 891 const OMPDeclareReductionDecl *DRD = 892 getReductionInit(ClausesData[N].ReductionOp); 893 QualType PrivateType = PrivateVD->getType(); 894 PrivateAddr = CGF.Builder.CreateElementBitCast( 895 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 896 QualType SharedType = SharedAddresses[N].first.getType(); 897 SharedLVal = CGF.MakeAddrLValue( 898 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 899 CGF.ConvertTypeForMem(SharedType)), 900 SharedType, SharedAddresses[N].first.getBaseInfo(), 901 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 902 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 903 if (DRD && DRD->getInitializer()) 904 (void)DefaultInit(CGF); 905 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 906 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 907 (void)DefaultInit(CGF); 908 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 909 PrivateAddr, SharedLVal.getAddress(CGF), 910 SharedLVal.getType()); 911 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 912 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 913 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 914 PrivateVD->getType().getQualifiers(), 915 /*IsInitializer=*/false); 916 } 917 } 918 919 bool ReductionCodeGen::needCleanups(unsigned N) { 920 const auto *PrivateVD = 921 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 922 QualType PrivateType = PrivateVD->getType(); 923 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 924 return DTorKind != QualType::DK_none; 925 } 926 927 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 928 Address PrivateAddr) { 929 const auto *PrivateVD = 930 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 931 QualType PrivateType = PrivateVD->getType(); 932 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 933 if (needCleanups(N)) { 934 PrivateAddr = CGF.Builder.CreateElementBitCast( 935 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 936 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 937 } 938 } 939 940 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 941 LValue BaseLV) { 942 BaseTy = BaseTy.getNonReferenceType(); 943 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 944 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 945 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 946 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 947 } else { 948 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 949 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 950 } 951 BaseTy = BaseTy->getPointeeType(); 952 } 953 return CGF.MakeAddrLValue( 954 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 955 CGF.ConvertTypeForMem(ElTy)), 956 BaseLV.getType(), BaseLV.getBaseInfo(), 957 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 958 } 959 960 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 961 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 962 llvm::Value *Addr) { 963 Address Tmp = Address::invalid(); 964 Address TopTmp = Address::invalid(); 965 Address MostTopTmp = Address::invalid(); 966 BaseTy = BaseTy.getNonReferenceType(); 967 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 968 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 969 Tmp = CGF.CreateMemTemp(BaseTy); 970 if (TopTmp.isValid()) 971 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 972 else 973 MostTopTmp = Tmp; 974 TopTmp = Tmp; 975 BaseTy = BaseTy->getPointeeType(); 976 } 977 llvm::Type *Ty = BaseLVType; 978 if (Tmp.isValid()) 979 Ty = Tmp.getElementType(); 980 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 981 if (Tmp.isValid()) { 982 CGF.Builder.CreateStore(Addr, Tmp); 983 return MostTopTmp; 984 } 985 return Address(Addr, BaseLVAlignment); 986 } 987 988 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 989 const VarDecl *OrigVD = nullptr; 990 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 991 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 992 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 993 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 994 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 995 Base = TempASE->getBase()->IgnoreParenImpCasts(); 996 DE = cast<DeclRefExpr>(Base); 997 OrigVD = cast<VarDecl>(DE->getDecl()); 998 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 999 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1000 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1001 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1002 DE = cast<DeclRefExpr>(Base); 1003 OrigVD = cast<VarDecl>(DE->getDecl()); 1004 } 1005 return OrigVD; 1006 } 1007 1008 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1009 Address PrivateAddr) { 1010 const DeclRefExpr *DE; 1011 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1012 BaseDecls.emplace_back(OrigVD); 1013 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1014 LValue BaseLValue = 1015 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1016 OriginalBaseLValue); 1017 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1018 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1019 BaseLValue.getPointer(CGF), SharedAddr.getPointer()); 1020 llvm::Value *PrivatePointer = 1021 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1022 PrivateAddr.getPointer(), SharedAddr.getType()); 1023 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1024 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1025 return castToBase(CGF, OrigVD->getType(), 1026 SharedAddresses[N].first.getType(), 1027 OriginalBaseLValue.getAddress(CGF).getType(), 1028 OriginalBaseLValue.getAlignment(), Ptr); 1029 } 1030 BaseDecls.emplace_back( 1031 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1032 return PrivateAddr; 1033 } 1034 1035 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1036 const OMPDeclareReductionDecl *DRD = 1037 getReductionInit(ClausesData[N].ReductionOp); 1038 return DRD && DRD->getInitializer(); 1039 } 1040 1041 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1042 return CGF.EmitLoadOfPointerLValue( 1043 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1044 getThreadIDVariable()->getType()->castAs<PointerType>()); 1045 } 1046 1047 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1048 if (!CGF.HaveInsertPoint()) 1049 return; 1050 // 1.2.2 OpenMP Language Terminology 1051 // Structured block - An executable statement with a single entry at the 1052 // top and a single exit at the bottom. 1053 // The point of exit cannot be a branch out of the structured block. 1054 // longjmp() and throw() must not violate the entry/exit criteria. 1055 CGF.EHStack.pushTerminate(); 1056 if (S) 1057 CGF.incrementProfileCounter(S); 1058 CodeGen(CGF); 1059 CGF.EHStack.popTerminate(); 1060 } 1061 1062 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1063 CodeGenFunction &CGF) { 1064 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1065 getThreadIDVariable()->getType(), 1066 AlignmentSource::Decl); 1067 } 1068 1069 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1070 QualType FieldTy) { 1071 auto *Field = FieldDecl::Create( 1072 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1073 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1074 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1075 Field->setAccess(AS_public); 1076 DC->addDecl(Field); 1077 return Field; 1078 } 1079 1080 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1081 StringRef Separator) 1082 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1083 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1084 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1085 1086 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1087 OMPBuilder.initialize(); 1088 loadOffloadInfoMetadata(); 1089 } 1090 1091 void CGOpenMPRuntime::clear() { 1092 InternalVars.clear(); 1093 // Clean non-target variable declarations possibly used only in debug info. 1094 for (const auto &Data : EmittedNonTargetVariables) { 1095 if (!Data.getValue().pointsToAliveValue()) 1096 continue; 1097 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1098 if (!GV) 1099 continue; 1100 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1101 continue; 1102 GV->eraseFromParent(); 1103 } 1104 } 1105 1106 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1107 SmallString<128> Buffer; 1108 llvm::raw_svector_ostream OS(Buffer); 1109 StringRef Sep = FirstSeparator; 1110 for (StringRef Part : Parts) { 1111 OS << Sep << Part; 1112 Sep = Separator; 1113 } 1114 return std::string(OS.str()); 1115 } 1116 1117 static llvm::Function * 1118 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1119 const Expr *CombinerInitializer, const VarDecl *In, 1120 const VarDecl *Out, bool IsCombiner) { 1121 // void .omp_combiner.(Ty *in, Ty *out); 1122 ASTContext &C = CGM.getContext(); 1123 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1124 FunctionArgList Args; 1125 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1126 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1127 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1128 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1129 Args.push_back(&OmpOutParm); 1130 Args.push_back(&OmpInParm); 1131 const CGFunctionInfo &FnInfo = 1132 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1133 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1134 std::string Name = CGM.getOpenMPRuntime().getName( 1135 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1136 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1137 Name, &CGM.getModule()); 1138 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1139 if (CGM.getLangOpts().Optimize) { 1140 Fn->removeFnAttr(llvm::Attribute::NoInline); 1141 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1142 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1143 } 1144 CodeGenFunction CGF(CGM); 1145 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1146 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1147 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1148 Out->getLocation()); 1149 CodeGenFunction::OMPPrivateScope Scope(CGF); 1150 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1151 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1152 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1153 .getAddress(CGF); 1154 }); 1155 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1156 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1157 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1158 .getAddress(CGF); 1159 }); 1160 (void)Scope.Privatize(); 1161 if (!IsCombiner && Out->hasInit() && 1162 !CGF.isTrivialInitializer(Out->getInit())) { 1163 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1164 Out->getType().getQualifiers(), 1165 /*IsInitializer=*/true); 1166 } 1167 if (CombinerInitializer) 1168 CGF.EmitIgnoredExpr(CombinerInitializer); 1169 Scope.ForceCleanup(); 1170 CGF.FinishFunction(); 1171 return Fn; 1172 } 1173 1174 void CGOpenMPRuntime::emitUserDefinedReduction( 1175 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1176 if (UDRMap.count(D) > 0) 1177 return; 1178 llvm::Function *Combiner = emitCombinerOrInitializer( 1179 CGM, D->getType(), D->getCombiner(), 1180 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1181 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1182 /*IsCombiner=*/true); 1183 llvm::Function *Initializer = nullptr; 1184 if (const Expr *Init = D->getInitializer()) { 1185 Initializer = emitCombinerOrInitializer( 1186 CGM, D->getType(), 1187 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1188 : nullptr, 1189 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1190 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1191 /*IsCombiner=*/false); 1192 } 1193 UDRMap.try_emplace(D, Combiner, Initializer); 1194 if (CGF) { 1195 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1196 Decls.second.push_back(D); 1197 } 1198 } 1199 1200 std::pair<llvm::Function *, llvm::Function *> 1201 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1202 auto I = UDRMap.find(D); 1203 if (I != UDRMap.end()) 1204 return I->second; 1205 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1206 return UDRMap.lookup(D); 1207 } 1208 1209 namespace { 1210 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1211 // Builder if one is present. 1212 struct PushAndPopStackRAII { 1213 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1214 bool HasCancel, llvm::omp::Directive Kind) 1215 : OMPBuilder(OMPBuilder) { 1216 if (!OMPBuilder) 1217 return; 1218 1219 // The following callback is the crucial part of clangs cleanup process. 1220 // 1221 // NOTE: 1222 // Once the OpenMPIRBuilder is used to create parallel regions (and 1223 // similar), the cancellation destination (Dest below) is determined via 1224 // IP. That means if we have variables to finalize we split the block at IP, 1225 // use the new block (=BB) as destination to build a JumpDest (via 1226 // getJumpDestInCurrentScope(BB)) which then is fed to 1227 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1228 // to push & pop an FinalizationInfo object. 1229 // The FiniCB will still be needed but at the point where the 1230 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1231 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1232 assert(IP.getBlock()->end() == IP.getPoint() && 1233 "Clang CG should cause non-terminated block!"); 1234 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1235 CGF.Builder.restoreIP(IP); 1236 CodeGenFunction::JumpDest Dest = 1237 CGF.getOMPCancelDestination(OMPD_parallel); 1238 CGF.EmitBranchThroughCleanup(Dest); 1239 }; 1240 1241 // TODO: Remove this once we emit parallel regions through the 1242 // OpenMPIRBuilder as it can do this setup internally. 1243 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1244 OMPBuilder->pushFinalizationCB(std::move(FI)); 1245 } 1246 ~PushAndPopStackRAII() { 1247 if (OMPBuilder) 1248 OMPBuilder->popFinalizationCB(); 1249 } 1250 llvm::OpenMPIRBuilder *OMPBuilder; 1251 }; 1252 } // namespace 1253 1254 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1255 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1256 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1257 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1258 assert(ThreadIDVar->getType()->isPointerType() && 1259 "thread id variable must be of type kmp_int32 *"); 1260 CodeGenFunction CGF(CGM, true); 1261 bool HasCancel = false; 1262 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1263 HasCancel = OPD->hasCancel(); 1264 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1265 HasCancel = OPD->hasCancel(); 1266 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1267 HasCancel = OPSD->hasCancel(); 1268 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1269 HasCancel = OPFD->hasCancel(); 1270 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1273 HasCancel = OPFD->hasCancel(); 1274 else if (const auto *OPFD = 1275 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1276 HasCancel = OPFD->hasCancel(); 1277 else if (const auto *OPFD = 1278 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1279 HasCancel = OPFD->hasCancel(); 1280 1281 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1282 // parallel region to make cancellation barriers work properly. 1283 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1284 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1285 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1286 HasCancel, OutlinedHelperName); 1287 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1288 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1289 } 1290 1291 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1292 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1294 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1295 return emitParallelOrTeamsOutlinedFunction( 1296 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1297 } 1298 1299 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1300 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1301 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1302 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1303 return emitParallelOrTeamsOutlinedFunction( 1304 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1305 } 1306 1307 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1308 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1309 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1310 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1311 bool Tied, unsigned &NumberOfParts) { 1312 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1313 PrePostActionTy &) { 1314 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1315 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1316 llvm::Value *TaskArgs[] = { 1317 UpLoc, ThreadID, 1318 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1319 TaskTVar->getType()->castAs<PointerType>()) 1320 .getPointer(CGF)}; 1321 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1322 CGM.getModule(), OMPRTL___kmpc_omp_task), 1323 TaskArgs); 1324 }; 1325 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1326 UntiedCodeGen); 1327 CodeGen.setAction(Action); 1328 assert(!ThreadIDVar->getType()->isPointerType() && 1329 "thread id variable must be of type kmp_int32 for tasks"); 1330 const OpenMPDirectiveKind Region = 1331 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1332 : OMPD_task; 1333 const CapturedStmt *CS = D.getCapturedStmt(Region); 1334 bool HasCancel = false; 1335 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1336 HasCancel = TD->hasCancel(); 1337 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1338 HasCancel = TD->hasCancel(); 1339 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1340 HasCancel = TD->hasCancel(); 1341 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1342 HasCancel = TD->hasCancel(); 1343 1344 CodeGenFunction CGF(CGM, true); 1345 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1346 InnermostKind, HasCancel, Action); 1347 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1348 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1349 if (!Tied) 1350 NumberOfParts = Action.getNumberOfParts(); 1351 return Res; 1352 } 1353 1354 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1355 const RecordDecl *RD, const CGRecordLayout &RL, 1356 ArrayRef<llvm::Constant *> Data) { 1357 llvm::StructType *StructTy = RL.getLLVMType(); 1358 unsigned PrevIdx = 0; 1359 ConstantInitBuilder CIBuilder(CGM); 1360 auto DI = Data.begin(); 1361 for (const FieldDecl *FD : RD->fields()) { 1362 unsigned Idx = RL.getLLVMFieldNo(FD); 1363 // Fill the alignment. 1364 for (unsigned I = PrevIdx; I < Idx; ++I) 1365 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1366 PrevIdx = Idx + 1; 1367 Fields.add(*DI); 1368 ++DI; 1369 } 1370 } 1371 1372 template <class... As> 1373 static llvm::GlobalVariable * 1374 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1375 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1376 As &&... Args) { 1377 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1378 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1379 ConstantInitBuilder CIBuilder(CGM); 1380 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1381 buildStructValue(Fields, CGM, RD, RL, Data); 1382 return Fields.finishAndCreateGlobal( 1383 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1384 std::forward<As>(Args)...); 1385 } 1386 1387 template <typename T> 1388 static void 1389 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1390 ArrayRef<llvm::Constant *> Data, 1391 T &Parent) { 1392 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1393 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1394 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1395 buildStructValue(Fields, CGM, RD, RL, Data); 1396 Fields.finishAndAddTo(Parent); 1397 } 1398 1399 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1400 bool AtCurrentPoint) { 1401 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1402 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1403 1404 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1405 if (AtCurrentPoint) { 1406 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1407 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1408 } else { 1409 Elem.second.ServiceInsertPt = 1410 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1411 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1412 } 1413 } 1414 1415 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1416 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1417 if (Elem.second.ServiceInsertPt) { 1418 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1419 Elem.second.ServiceInsertPt = nullptr; 1420 Ptr->eraseFromParent(); 1421 } 1422 } 1423 1424 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1425 SourceLocation Loc, 1426 SmallString<128> &Buffer) { 1427 llvm::raw_svector_ostream OS(Buffer); 1428 // Build debug location 1429 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1430 OS << ";" << PLoc.getFilename() << ";"; 1431 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1432 OS << FD->getQualifiedNameAsString(); 1433 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1434 return OS.str(); 1435 } 1436 1437 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1438 SourceLocation Loc, 1439 unsigned Flags) { 1440 llvm::Constant *SrcLocStr; 1441 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1442 Loc.isInvalid()) { 1443 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1444 } else { 1445 std::string FunctionName = ""; 1446 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1447 FunctionName = FD->getQualifiedNameAsString(); 1448 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1449 const char *FileName = PLoc.getFilename(); 1450 unsigned Line = PLoc.getLine(); 1451 unsigned Column = PLoc.getColumn(); 1452 SrcLocStr = 1453 OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column); 1454 } 1455 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1456 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1457 Reserved2Flags); 1458 } 1459 1460 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1461 SourceLocation Loc) { 1462 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1463 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1464 // the clang invariants used below might be broken. 1465 if (CGM.getLangOpts().OpenMPIRBuilder) { 1466 SmallString<128> Buffer; 1467 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1468 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1469 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1470 return OMPBuilder.getOrCreateThreadID( 1471 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1472 } 1473 1474 llvm::Value *ThreadID = nullptr; 1475 // Check whether we've already cached a load of the thread id in this 1476 // function. 1477 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1478 if (I != OpenMPLocThreadIDMap.end()) { 1479 ThreadID = I->second.ThreadID; 1480 if (ThreadID != nullptr) 1481 return ThreadID; 1482 } 1483 // If exceptions are enabled, do not use parameter to avoid possible crash. 1484 if (auto *OMPRegionInfo = 1485 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1486 if (OMPRegionInfo->getThreadIDVariable()) { 1487 // Check if this an outlined function with thread id passed as argument. 1488 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1489 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1490 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1491 !CGF.getLangOpts().CXXExceptions || 1492 CGF.Builder.GetInsertBlock() == TopBlock || 1493 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1494 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1495 TopBlock || 1496 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1497 CGF.Builder.GetInsertBlock()) { 1498 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1499 // If value loaded in entry block, cache it and use it everywhere in 1500 // function. 1501 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1502 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1503 Elem.second.ThreadID = ThreadID; 1504 } 1505 return ThreadID; 1506 } 1507 } 1508 } 1509 1510 // This is not an outlined function region - need to call __kmpc_int32 1511 // kmpc_global_thread_num(ident_t *loc). 1512 // Generate thread id value and cache this value for use across the 1513 // function. 1514 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1515 if (!Elem.second.ServiceInsertPt) 1516 setLocThreadIdInsertPt(CGF); 1517 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1518 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1519 llvm::CallInst *Call = CGF.Builder.CreateCall( 1520 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1521 OMPRTL___kmpc_global_thread_num), 1522 emitUpdateLocation(CGF, Loc)); 1523 Call->setCallingConv(CGF.getRuntimeCC()); 1524 Elem.second.ThreadID = Call; 1525 return Call; 1526 } 1527 1528 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1529 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1530 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1531 clearLocThreadIdInsertPt(CGF); 1532 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1533 } 1534 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1535 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1536 UDRMap.erase(D); 1537 FunctionUDRMap.erase(CGF.CurFn); 1538 } 1539 auto I = FunctionUDMMap.find(CGF.CurFn); 1540 if (I != FunctionUDMMap.end()) { 1541 for(const auto *D : I->second) 1542 UDMMap.erase(D); 1543 FunctionUDMMap.erase(I); 1544 } 1545 LastprivateConditionalToTypes.erase(CGF.CurFn); 1546 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1547 } 1548 1549 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1550 return OMPBuilder.IdentPtr; 1551 } 1552 1553 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1554 if (!Kmpc_MicroTy) { 1555 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1556 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1557 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1558 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1559 } 1560 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1561 } 1562 1563 llvm::FunctionCallee 1564 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1565 bool IsGPUDistribute) { 1566 assert((IVSize == 32 || IVSize == 64) && 1567 "IV size is not compatible with the omp runtime"); 1568 StringRef Name; 1569 if (IsGPUDistribute) 1570 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1571 : "__kmpc_distribute_static_init_4u") 1572 : (IVSigned ? "__kmpc_distribute_static_init_8" 1573 : "__kmpc_distribute_static_init_8u"); 1574 else 1575 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1576 : "__kmpc_for_static_init_4u") 1577 : (IVSigned ? "__kmpc_for_static_init_8" 1578 : "__kmpc_for_static_init_8u"); 1579 1580 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1581 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1582 llvm::Type *TypeParams[] = { 1583 getIdentTyPointerTy(), // loc 1584 CGM.Int32Ty, // tid 1585 CGM.Int32Ty, // schedtype 1586 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1587 PtrTy, // p_lower 1588 PtrTy, // p_upper 1589 PtrTy, // p_stride 1590 ITy, // incr 1591 ITy // chunk 1592 }; 1593 auto *FnTy = 1594 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1595 return CGM.CreateRuntimeFunction(FnTy, Name); 1596 } 1597 1598 llvm::FunctionCallee 1599 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1600 assert((IVSize == 32 || IVSize == 64) && 1601 "IV size is not compatible with the omp runtime"); 1602 StringRef Name = 1603 IVSize == 32 1604 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1605 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1606 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1607 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1608 CGM.Int32Ty, // tid 1609 CGM.Int32Ty, // schedtype 1610 ITy, // lower 1611 ITy, // upper 1612 ITy, // stride 1613 ITy // chunk 1614 }; 1615 auto *FnTy = 1616 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1617 return CGM.CreateRuntimeFunction(FnTy, Name); 1618 } 1619 1620 llvm::FunctionCallee 1621 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1622 assert((IVSize == 32 || IVSize == 64) && 1623 "IV size is not compatible with the omp runtime"); 1624 StringRef Name = 1625 IVSize == 32 1626 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1627 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1628 llvm::Type *TypeParams[] = { 1629 getIdentTyPointerTy(), // loc 1630 CGM.Int32Ty, // tid 1631 }; 1632 auto *FnTy = 1633 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1634 return CGM.CreateRuntimeFunction(FnTy, Name); 1635 } 1636 1637 llvm::FunctionCallee 1638 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1639 assert((IVSize == 32 || IVSize == 64) && 1640 "IV size is not compatible with the omp runtime"); 1641 StringRef Name = 1642 IVSize == 32 1643 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1644 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1645 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1646 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1647 llvm::Type *TypeParams[] = { 1648 getIdentTyPointerTy(), // loc 1649 CGM.Int32Ty, // tid 1650 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1651 PtrTy, // p_lower 1652 PtrTy, // p_upper 1653 PtrTy // p_stride 1654 }; 1655 auto *FnTy = 1656 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1657 return CGM.CreateRuntimeFunction(FnTy, Name); 1658 } 1659 1660 /// Obtain information that uniquely identifies a target entry. This 1661 /// consists of the file and device IDs as well as line number associated with 1662 /// the relevant entry source location. 1663 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1664 unsigned &DeviceID, unsigned &FileID, 1665 unsigned &LineNum) { 1666 SourceManager &SM = C.getSourceManager(); 1667 1668 // The loc should be always valid and have a file ID (the user cannot use 1669 // #pragma directives in macros) 1670 1671 assert(Loc.isValid() && "Source location is expected to be always valid."); 1672 1673 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1674 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1675 1676 llvm::sys::fs::UniqueID ID; 1677 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1678 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1679 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1680 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1681 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1682 << PLoc.getFilename() << EC.message(); 1683 } 1684 1685 DeviceID = ID.getDevice(); 1686 FileID = ID.getFile(); 1687 LineNum = PLoc.getLine(); 1688 } 1689 1690 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1691 if (CGM.getLangOpts().OpenMPSimd) 1692 return Address::invalid(); 1693 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1694 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1695 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1696 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1697 HasRequiresUnifiedSharedMemory))) { 1698 SmallString<64> PtrName; 1699 { 1700 llvm::raw_svector_ostream OS(PtrName); 1701 OS << CGM.getMangledName(GlobalDecl(VD)); 1702 if (!VD->isExternallyVisible()) { 1703 unsigned DeviceID, FileID, Line; 1704 getTargetEntryUniqueInfo(CGM.getContext(), 1705 VD->getCanonicalDecl()->getBeginLoc(), 1706 DeviceID, FileID, Line); 1707 OS << llvm::format("_%x", FileID); 1708 } 1709 OS << "_decl_tgt_ref_ptr"; 1710 } 1711 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1712 if (!Ptr) { 1713 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1714 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1715 PtrName); 1716 1717 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1718 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1719 1720 if (!CGM.getLangOpts().OpenMPIsDevice) 1721 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1722 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1723 } 1724 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1725 } 1726 return Address::invalid(); 1727 } 1728 1729 llvm::Constant * 1730 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1731 assert(!CGM.getLangOpts().OpenMPUseTLS || 1732 !CGM.getContext().getTargetInfo().isTLSSupported()); 1733 // Lookup the entry, lazily creating it if necessary. 1734 std::string Suffix = getName({"cache", ""}); 1735 return getOrCreateInternalVariable( 1736 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1737 } 1738 1739 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1740 const VarDecl *VD, 1741 Address VDAddr, 1742 SourceLocation Loc) { 1743 if (CGM.getLangOpts().OpenMPUseTLS && 1744 CGM.getContext().getTargetInfo().isTLSSupported()) 1745 return VDAddr; 1746 1747 llvm::Type *VarTy = VDAddr.getElementType(); 1748 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1749 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1750 CGM.Int8PtrTy), 1751 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1752 getOrCreateThreadPrivateCache(VD)}; 1753 return Address(CGF.EmitRuntimeCall( 1754 OMPBuilder.getOrCreateRuntimeFunction( 1755 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1756 Args), 1757 VDAddr.getAlignment()); 1758 } 1759 1760 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1761 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1762 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1763 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1764 // library. 1765 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1766 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1767 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1768 OMPLoc); 1769 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1770 // to register constructor/destructor for variable. 1771 llvm::Value *Args[] = { 1772 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1773 Ctor, CopyCtor, Dtor}; 1774 CGF.EmitRuntimeCall( 1775 OMPBuilder.getOrCreateRuntimeFunction( 1776 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1777 Args); 1778 } 1779 1780 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1781 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1782 bool PerformInit, CodeGenFunction *CGF) { 1783 if (CGM.getLangOpts().OpenMPUseTLS && 1784 CGM.getContext().getTargetInfo().isTLSSupported()) 1785 return nullptr; 1786 1787 VD = VD->getDefinition(CGM.getContext()); 1788 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1789 QualType ASTTy = VD->getType(); 1790 1791 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1792 const Expr *Init = VD->getAnyInitializer(); 1793 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1794 // Generate function that re-emits the declaration's initializer into the 1795 // threadprivate copy of the variable VD 1796 CodeGenFunction CtorCGF(CGM); 1797 FunctionArgList Args; 1798 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1799 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1800 ImplicitParamDecl::Other); 1801 Args.push_back(&Dst); 1802 1803 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1804 CGM.getContext().VoidPtrTy, Args); 1805 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1806 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1807 llvm::Function *Fn = 1808 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1809 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1810 Args, Loc, Loc); 1811 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1812 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1813 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1814 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1815 Arg = CtorCGF.Builder.CreateElementBitCast( 1816 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1817 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1818 /*IsInitializer=*/true); 1819 ArgVal = CtorCGF.EmitLoadOfScalar( 1820 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1821 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1822 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1823 CtorCGF.FinishFunction(); 1824 Ctor = Fn; 1825 } 1826 if (VD->getType().isDestructedType() != QualType::DK_none) { 1827 // Generate function that emits destructor call for the threadprivate copy 1828 // of the variable VD 1829 CodeGenFunction DtorCGF(CGM); 1830 FunctionArgList Args; 1831 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1832 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1833 ImplicitParamDecl::Other); 1834 Args.push_back(&Dst); 1835 1836 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1837 CGM.getContext().VoidTy, Args); 1838 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1839 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1840 llvm::Function *Fn = 1841 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1842 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1843 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1844 Loc, Loc); 1845 // Create a scope with an artificial location for the body of this function. 1846 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1847 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1848 DtorCGF.GetAddrOfLocalVar(&Dst), 1849 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1850 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1851 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1852 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1853 DtorCGF.FinishFunction(); 1854 Dtor = Fn; 1855 } 1856 // Do not emit init function if it is not required. 1857 if (!Ctor && !Dtor) 1858 return nullptr; 1859 1860 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1861 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1862 /*isVarArg=*/false) 1863 ->getPointerTo(); 1864 // Copying constructor for the threadprivate variable. 1865 // Must be NULL - reserved by runtime, but currently it requires that this 1866 // parameter is always NULL. Otherwise it fires assertion. 1867 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1868 if (Ctor == nullptr) { 1869 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1870 /*isVarArg=*/false) 1871 ->getPointerTo(); 1872 Ctor = llvm::Constant::getNullValue(CtorTy); 1873 } 1874 if (Dtor == nullptr) { 1875 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1876 /*isVarArg=*/false) 1877 ->getPointerTo(); 1878 Dtor = llvm::Constant::getNullValue(DtorTy); 1879 } 1880 if (!CGF) { 1881 auto *InitFunctionTy = 1882 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1883 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1884 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1885 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1886 CodeGenFunction InitCGF(CGM); 1887 FunctionArgList ArgList; 1888 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1889 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1890 Loc, Loc); 1891 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1892 InitCGF.FinishFunction(); 1893 return InitFunction; 1894 } 1895 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1896 } 1897 return nullptr; 1898 } 1899 1900 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1901 llvm::GlobalVariable *Addr, 1902 bool PerformInit) { 1903 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1904 !CGM.getLangOpts().OpenMPIsDevice) 1905 return false; 1906 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1907 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1908 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1909 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1910 HasRequiresUnifiedSharedMemory)) 1911 return CGM.getLangOpts().OpenMPIsDevice; 1912 VD = VD->getDefinition(CGM.getContext()); 1913 assert(VD && "Unknown VarDecl"); 1914 1915 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1916 return CGM.getLangOpts().OpenMPIsDevice; 1917 1918 QualType ASTTy = VD->getType(); 1919 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1920 1921 // Produce the unique prefix to identify the new target regions. We use 1922 // the source location of the variable declaration which we know to not 1923 // conflict with any target region. 1924 unsigned DeviceID; 1925 unsigned FileID; 1926 unsigned Line; 1927 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1928 SmallString<128> Buffer, Out; 1929 { 1930 llvm::raw_svector_ostream OS(Buffer); 1931 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1932 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1933 } 1934 1935 const Expr *Init = VD->getAnyInitializer(); 1936 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1937 llvm::Constant *Ctor; 1938 llvm::Constant *ID; 1939 if (CGM.getLangOpts().OpenMPIsDevice) { 1940 // Generate function that re-emits the declaration's initializer into 1941 // the threadprivate copy of the variable VD 1942 CodeGenFunction CtorCGF(CGM); 1943 1944 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1945 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1946 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1947 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1948 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1949 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1950 FunctionArgList(), Loc, Loc); 1951 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1952 CtorCGF.EmitAnyExprToMem(Init, 1953 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1954 Init->getType().getQualifiers(), 1955 /*IsInitializer=*/true); 1956 CtorCGF.FinishFunction(); 1957 Ctor = Fn; 1958 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1959 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1960 } else { 1961 Ctor = new llvm::GlobalVariable( 1962 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1963 llvm::GlobalValue::PrivateLinkage, 1964 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1965 ID = Ctor; 1966 } 1967 1968 // Register the information for the entry associated with the constructor. 1969 Out.clear(); 1970 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1971 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1972 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1973 } 1974 if (VD->getType().isDestructedType() != QualType::DK_none) { 1975 llvm::Constant *Dtor; 1976 llvm::Constant *ID; 1977 if (CGM.getLangOpts().OpenMPIsDevice) { 1978 // Generate function that emits destructor call for the threadprivate 1979 // copy of the variable VD 1980 CodeGenFunction DtorCGF(CGM); 1981 1982 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1983 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1984 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1985 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1986 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1987 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1988 FunctionArgList(), Loc, Loc); 1989 // Create a scope with an artificial location for the body of this 1990 // function. 1991 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1992 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1993 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1994 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1995 DtorCGF.FinishFunction(); 1996 Dtor = Fn; 1997 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1998 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1999 } else { 2000 Dtor = new llvm::GlobalVariable( 2001 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2002 llvm::GlobalValue::PrivateLinkage, 2003 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2004 ID = Dtor; 2005 } 2006 // Register the information for the entry associated with the destructor. 2007 Out.clear(); 2008 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2009 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2010 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2011 } 2012 return CGM.getLangOpts().OpenMPIsDevice; 2013 } 2014 2015 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2016 QualType VarType, 2017 StringRef Name) { 2018 std::string Suffix = getName({"artificial", ""}); 2019 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2020 llvm::GlobalVariable *GAddr = 2021 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2022 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2023 CGM.getTarget().isTLSSupported()) { 2024 GAddr->setThreadLocal(/*Val=*/true); 2025 return Address(GAddr, GAddr->getValueType(), 2026 CGM.getContext().getTypeAlignInChars(VarType)); 2027 } 2028 std::string CacheSuffix = getName({"cache", ""}); 2029 llvm::Value *Args[] = { 2030 emitUpdateLocation(CGF, SourceLocation()), 2031 getThreadID(CGF, SourceLocation()), 2032 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2033 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2034 /*isSigned=*/false), 2035 getOrCreateInternalVariable( 2036 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2037 return Address( 2038 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2039 CGF.EmitRuntimeCall( 2040 OMPBuilder.getOrCreateRuntimeFunction( 2041 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2042 Args), 2043 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2044 CGM.getContext().getTypeAlignInChars(VarType)); 2045 } 2046 2047 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2048 const RegionCodeGenTy &ThenGen, 2049 const RegionCodeGenTy &ElseGen) { 2050 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2051 2052 // If the condition constant folds and can be elided, try to avoid emitting 2053 // the condition and the dead arm of the if/else. 2054 bool CondConstant; 2055 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2056 if (CondConstant) 2057 ThenGen(CGF); 2058 else 2059 ElseGen(CGF); 2060 return; 2061 } 2062 2063 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2064 // emit the conditional branch. 2065 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2066 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2067 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2068 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2069 2070 // Emit the 'then' code. 2071 CGF.EmitBlock(ThenBlock); 2072 ThenGen(CGF); 2073 CGF.EmitBranch(ContBlock); 2074 // Emit the 'else' code if present. 2075 // There is no need to emit line number for unconditional branch. 2076 (void)ApplyDebugLocation::CreateEmpty(CGF); 2077 CGF.EmitBlock(ElseBlock); 2078 ElseGen(CGF); 2079 // There is no need to emit line number for unconditional branch. 2080 (void)ApplyDebugLocation::CreateEmpty(CGF); 2081 CGF.EmitBranch(ContBlock); 2082 // Emit the continuation block for code after the if. 2083 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2084 } 2085 2086 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2087 llvm::Function *OutlinedFn, 2088 ArrayRef<llvm::Value *> CapturedVars, 2089 const Expr *IfCond, 2090 llvm::Value *NumThreads) { 2091 if (!CGF.HaveInsertPoint()) 2092 return; 2093 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2094 auto &M = CGM.getModule(); 2095 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2096 this](CodeGenFunction &CGF, PrePostActionTy &) { 2097 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2098 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2099 llvm::Value *Args[] = { 2100 RTLoc, 2101 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2102 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2103 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2104 RealArgs.append(std::begin(Args), std::end(Args)); 2105 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2106 2107 llvm::FunctionCallee RTLFn = 2108 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2109 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2110 }; 2111 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2112 this](CodeGenFunction &CGF, PrePostActionTy &) { 2113 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2114 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2115 // Build calls: 2116 // __kmpc_serialized_parallel(&Loc, GTid); 2117 llvm::Value *Args[] = {RTLoc, ThreadID}; 2118 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2119 M, OMPRTL___kmpc_serialized_parallel), 2120 Args); 2121 2122 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2123 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2124 Address ZeroAddrBound = 2125 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2126 /*Name=*/".bound.zero.addr"); 2127 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2128 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2129 // ThreadId for serialized parallels is 0. 2130 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2131 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2132 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2133 2134 // Ensure we do not inline the function. This is trivially true for the ones 2135 // passed to __kmpc_fork_call but the ones called in serialized regions 2136 // could be inlined. This is not a perfect but it is closer to the invariant 2137 // we want, namely, every data environment starts with a new function. 2138 // TODO: We should pass the if condition to the runtime function and do the 2139 // handling there. Much cleaner code. 2140 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2141 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2142 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2143 2144 // __kmpc_end_serialized_parallel(&Loc, GTid); 2145 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2146 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2147 M, OMPRTL___kmpc_end_serialized_parallel), 2148 EndArgs); 2149 }; 2150 if (IfCond) { 2151 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2152 } else { 2153 RegionCodeGenTy ThenRCG(ThenGen); 2154 ThenRCG(CGF); 2155 } 2156 } 2157 2158 // If we're inside an (outlined) parallel region, use the region info's 2159 // thread-ID variable (it is passed in a first argument of the outlined function 2160 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2161 // regular serial code region, get thread ID by calling kmp_int32 2162 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2163 // return the address of that temp. 2164 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2165 SourceLocation Loc) { 2166 if (auto *OMPRegionInfo = 2167 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2168 if (OMPRegionInfo->getThreadIDVariable()) 2169 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2170 2171 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2172 QualType Int32Ty = 2173 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2174 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2175 CGF.EmitStoreOfScalar(ThreadID, 2176 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2177 2178 return ThreadIDTemp; 2179 } 2180 2181 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2182 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2183 SmallString<256> Buffer; 2184 llvm::raw_svector_ostream Out(Buffer); 2185 Out << Name; 2186 StringRef RuntimeName = Out.str(); 2187 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2188 if (Elem.second) { 2189 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2190 "OMP internal variable has different type than requested"); 2191 return &*Elem.second; 2192 } 2193 2194 return Elem.second = new llvm::GlobalVariable( 2195 CGM.getModule(), Ty, /*IsConstant*/ false, 2196 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2197 Elem.first(), /*InsertBefore=*/nullptr, 2198 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2199 } 2200 2201 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2202 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2203 std::string Name = getName({Prefix, "var"}); 2204 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2205 } 2206 2207 namespace { 2208 /// Common pre(post)-action for different OpenMP constructs. 2209 class CommonActionTy final : public PrePostActionTy { 2210 llvm::FunctionCallee EnterCallee; 2211 ArrayRef<llvm::Value *> EnterArgs; 2212 llvm::FunctionCallee ExitCallee; 2213 ArrayRef<llvm::Value *> ExitArgs; 2214 bool Conditional; 2215 llvm::BasicBlock *ContBlock = nullptr; 2216 2217 public: 2218 CommonActionTy(llvm::FunctionCallee EnterCallee, 2219 ArrayRef<llvm::Value *> EnterArgs, 2220 llvm::FunctionCallee ExitCallee, 2221 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2222 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2223 ExitArgs(ExitArgs), Conditional(Conditional) {} 2224 void Enter(CodeGenFunction &CGF) override { 2225 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2226 if (Conditional) { 2227 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2228 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2229 ContBlock = CGF.createBasicBlock("omp_if.end"); 2230 // Generate the branch (If-stmt) 2231 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2232 CGF.EmitBlock(ThenBlock); 2233 } 2234 } 2235 void Done(CodeGenFunction &CGF) { 2236 // Emit the rest of blocks/branches 2237 CGF.EmitBranch(ContBlock); 2238 CGF.EmitBlock(ContBlock, true); 2239 } 2240 void Exit(CodeGenFunction &CGF) override { 2241 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2242 } 2243 }; 2244 } // anonymous namespace 2245 2246 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2247 StringRef CriticalName, 2248 const RegionCodeGenTy &CriticalOpGen, 2249 SourceLocation Loc, const Expr *Hint) { 2250 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2251 // CriticalOpGen(); 2252 // __kmpc_end_critical(ident_t *, gtid, Lock); 2253 // Prepare arguments and build a call to __kmpc_critical 2254 if (!CGF.HaveInsertPoint()) 2255 return; 2256 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2257 getCriticalRegionLock(CriticalName)}; 2258 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2259 std::end(Args)); 2260 if (Hint) { 2261 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2262 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2263 } 2264 CommonActionTy Action( 2265 OMPBuilder.getOrCreateRuntimeFunction( 2266 CGM.getModule(), 2267 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2268 EnterArgs, 2269 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2270 OMPRTL___kmpc_end_critical), 2271 Args); 2272 CriticalOpGen.setAction(Action); 2273 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2274 } 2275 2276 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2277 const RegionCodeGenTy &MasterOpGen, 2278 SourceLocation Loc) { 2279 if (!CGF.HaveInsertPoint()) 2280 return; 2281 // if(__kmpc_master(ident_t *, gtid)) { 2282 // MasterOpGen(); 2283 // __kmpc_end_master(ident_t *, gtid); 2284 // } 2285 // Prepare arguments and build a call to __kmpc_master 2286 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2287 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2288 CGM.getModule(), OMPRTL___kmpc_master), 2289 Args, 2290 OMPBuilder.getOrCreateRuntimeFunction( 2291 CGM.getModule(), OMPRTL___kmpc_end_master), 2292 Args, 2293 /*Conditional=*/true); 2294 MasterOpGen.setAction(Action); 2295 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2296 Action.Done(CGF); 2297 } 2298 2299 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2300 const RegionCodeGenTy &MaskedOpGen, 2301 SourceLocation Loc, const Expr *Filter) { 2302 if (!CGF.HaveInsertPoint()) 2303 return; 2304 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2305 // MaskedOpGen(); 2306 // __kmpc_end_masked(iden_t *, gtid); 2307 // } 2308 // Prepare arguments and build a call to __kmpc_masked 2309 llvm::Value *FilterVal = Filter 2310 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2311 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2312 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2313 FilterVal}; 2314 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2315 getThreadID(CGF, Loc)}; 2316 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2317 CGM.getModule(), OMPRTL___kmpc_masked), 2318 Args, 2319 OMPBuilder.getOrCreateRuntimeFunction( 2320 CGM.getModule(), OMPRTL___kmpc_end_masked), 2321 ArgsEnd, 2322 /*Conditional=*/true); 2323 MaskedOpGen.setAction(Action); 2324 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2325 Action.Done(CGF); 2326 } 2327 2328 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2329 SourceLocation Loc) { 2330 if (!CGF.HaveInsertPoint()) 2331 return; 2332 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2333 OMPBuilder.createTaskyield(CGF.Builder); 2334 } else { 2335 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2336 llvm::Value *Args[] = { 2337 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2338 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2339 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2340 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2341 Args); 2342 } 2343 2344 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2345 Region->emitUntiedSwitch(CGF); 2346 } 2347 2348 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2349 const RegionCodeGenTy &TaskgroupOpGen, 2350 SourceLocation Loc) { 2351 if (!CGF.HaveInsertPoint()) 2352 return; 2353 // __kmpc_taskgroup(ident_t *, gtid); 2354 // TaskgroupOpGen(); 2355 // __kmpc_end_taskgroup(ident_t *, gtid); 2356 // Prepare arguments and build a call to __kmpc_taskgroup 2357 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2358 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2359 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2360 Args, 2361 OMPBuilder.getOrCreateRuntimeFunction( 2362 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2363 Args); 2364 TaskgroupOpGen.setAction(Action); 2365 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2366 } 2367 2368 /// Given an array of pointers to variables, project the address of a 2369 /// given variable. 2370 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2371 unsigned Index, const VarDecl *Var) { 2372 // Pull out the pointer to the variable. 2373 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2374 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2375 2376 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2377 Addr = CGF.Builder.CreateElementBitCast( 2378 Addr, CGF.ConvertTypeForMem(Var->getType())); 2379 return Addr; 2380 } 2381 2382 static llvm::Value *emitCopyprivateCopyFunction( 2383 CodeGenModule &CGM, llvm::Type *ArgsType, 2384 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2385 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2386 SourceLocation Loc) { 2387 ASTContext &C = CGM.getContext(); 2388 // void copy_func(void *LHSArg, void *RHSArg); 2389 FunctionArgList Args; 2390 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2391 ImplicitParamDecl::Other); 2392 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2393 ImplicitParamDecl::Other); 2394 Args.push_back(&LHSArg); 2395 Args.push_back(&RHSArg); 2396 const auto &CGFI = 2397 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2398 std::string Name = 2399 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2400 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2401 llvm::GlobalValue::InternalLinkage, Name, 2402 &CGM.getModule()); 2403 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2404 Fn->setDoesNotRecurse(); 2405 CodeGenFunction CGF(CGM); 2406 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2407 // Dest = (void*[n])(LHSArg); 2408 // Src = (void*[n])(RHSArg); 2409 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2410 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2411 ArgsType), CGF.getPointerAlign()); 2412 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2413 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2414 ArgsType), CGF.getPointerAlign()); 2415 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2416 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2417 // ... 2418 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2419 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2420 const auto *DestVar = 2421 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2422 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2423 2424 const auto *SrcVar = 2425 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2426 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2427 2428 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2429 QualType Type = VD->getType(); 2430 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2431 } 2432 CGF.FinishFunction(); 2433 return Fn; 2434 } 2435 2436 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2437 const RegionCodeGenTy &SingleOpGen, 2438 SourceLocation Loc, 2439 ArrayRef<const Expr *> CopyprivateVars, 2440 ArrayRef<const Expr *> SrcExprs, 2441 ArrayRef<const Expr *> DstExprs, 2442 ArrayRef<const Expr *> AssignmentOps) { 2443 if (!CGF.HaveInsertPoint()) 2444 return; 2445 assert(CopyprivateVars.size() == SrcExprs.size() && 2446 CopyprivateVars.size() == DstExprs.size() && 2447 CopyprivateVars.size() == AssignmentOps.size()); 2448 ASTContext &C = CGM.getContext(); 2449 // int32 did_it = 0; 2450 // if(__kmpc_single(ident_t *, gtid)) { 2451 // SingleOpGen(); 2452 // __kmpc_end_single(ident_t *, gtid); 2453 // did_it = 1; 2454 // } 2455 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2456 // <copy_func>, did_it); 2457 2458 Address DidIt = Address::invalid(); 2459 if (!CopyprivateVars.empty()) { 2460 // int32 did_it = 0; 2461 QualType KmpInt32Ty = 2462 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2463 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2464 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2465 } 2466 // Prepare arguments and build a call to __kmpc_single 2467 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2468 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2469 CGM.getModule(), OMPRTL___kmpc_single), 2470 Args, 2471 OMPBuilder.getOrCreateRuntimeFunction( 2472 CGM.getModule(), OMPRTL___kmpc_end_single), 2473 Args, 2474 /*Conditional=*/true); 2475 SingleOpGen.setAction(Action); 2476 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2477 if (DidIt.isValid()) { 2478 // did_it = 1; 2479 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2480 } 2481 Action.Done(CGF); 2482 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2483 // <copy_func>, did_it); 2484 if (DidIt.isValid()) { 2485 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2486 QualType CopyprivateArrayTy = C.getConstantArrayType( 2487 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2488 /*IndexTypeQuals=*/0); 2489 // Create a list of all private variables for copyprivate. 2490 Address CopyprivateList = 2491 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2492 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2493 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2494 CGF.Builder.CreateStore( 2495 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2496 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2497 CGF.VoidPtrTy), 2498 Elem); 2499 } 2500 // Build function that copies private values from single region to all other 2501 // threads in the corresponding parallel region. 2502 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2503 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2504 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2505 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2506 Address CL = 2507 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2508 CGF.VoidPtrTy); 2509 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2510 llvm::Value *Args[] = { 2511 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2512 getThreadID(CGF, Loc), // i32 <gtid> 2513 BufSize, // size_t <buf_size> 2514 CL.getPointer(), // void *<copyprivate list> 2515 CpyFn, // void (*) (void *, void *) <copy_func> 2516 DidItVal // i32 did_it 2517 }; 2518 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2519 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2520 Args); 2521 } 2522 } 2523 2524 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2525 const RegionCodeGenTy &OrderedOpGen, 2526 SourceLocation Loc, bool IsThreads) { 2527 if (!CGF.HaveInsertPoint()) 2528 return; 2529 // __kmpc_ordered(ident_t *, gtid); 2530 // OrderedOpGen(); 2531 // __kmpc_end_ordered(ident_t *, gtid); 2532 // Prepare arguments and build a call to __kmpc_ordered 2533 if (IsThreads) { 2534 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2535 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2536 CGM.getModule(), OMPRTL___kmpc_ordered), 2537 Args, 2538 OMPBuilder.getOrCreateRuntimeFunction( 2539 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2540 Args); 2541 OrderedOpGen.setAction(Action); 2542 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2543 return; 2544 } 2545 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2546 } 2547 2548 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2549 unsigned Flags; 2550 if (Kind == OMPD_for) 2551 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2552 else if (Kind == OMPD_sections) 2553 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2554 else if (Kind == OMPD_single) 2555 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2556 else if (Kind == OMPD_barrier) 2557 Flags = OMP_IDENT_BARRIER_EXPL; 2558 else 2559 Flags = OMP_IDENT_BARRIER_IMPL; 2560 return Flags; 2561 } 2562 2563 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2564 CodeGenFunction &CGF, const OMPLoopDirective &S, 2565 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2566 // Check if the loop directive is actually a doacross loop directive. In this 2567 // case choose static, 1 schedule. 2568 if (llvm::any_of( 2569 S.getClausesOfKind<OMPOrderedClause>(), 2570 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2571 ScheduleKind = OMPC_SCHEDULE_static; 2572 // Chunk size is 1 in this case. 2573 llvm::APInt ChunkSize(32, 1); 2574 ChunkExpr = IntegerLiteral::Create( 2575 CGF.getContext(), ChunkSize, 2576 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2577 SourceLocation()); 2578 } 2579 } 2580 2581 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2582 OpenMPDirectiveKind Kind, bool EmitChecks, 2583 bool ForceSimpleCall) { 2584 // Check if we should use the OMPBuilder 2585 auto *OMPRegionInfo = 2586 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2587 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2588 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2589 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2590 return; 2591 } 2592 2593 if (!CGF.HaveInsertPoint()) 2594 return; 2595 // Build call __kmpc_cancel_barrier(loc, thread_id); 2596 // Build call __kmpc_barrier(loc, thread_id); 2597 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2598 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2599 // thread_id); 2600 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2601 getThreadID(CGF, Loc)}; 2602 if (OMPRegionInfo) { 2603 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2604 llvm::Value *Result = CGF.EmitRuntimeCall( 2605 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2606 OMPRTL___kmpc_cancel_barrier), 2607 Args); 2608 if (EmitChecks) { 2609 // if (__kmpc_cancel_barrier()) { 2610 // exit from construct; 2611 // } 2612 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2613 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2614 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2615 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2616 CGF.EmitBlock(ExitBB); 2617 // exit from construct; 2618 CodeGenFunction::JumpDest CancelDestination = 2619 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2620 CGF.EmitBranchThroughCleanup(CancelDestination); 2621 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2622 } 2623 return; 2624 } 2625 } 2626 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2627 CGM.getModule(), OMPRTL___kmpc_barrier), 2628 Args); 2629 } 2630 2631 /// Map the OpenMP loop schedule to the runtime enumeration. 2632 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2633 bool Chunked, bool Ordered) { 2634 switch (ScheduleKind) { 2635 case OMPC_SCHEDULE_static: 2636 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2637 : (Ordered ? OMP_ord_static : OMP_sch_static); 2638 case OMPC_SCHEDULE_dynamic: 2639 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2640 case OMPC_SCHEDULE_guided: 2641 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2642 case OMPC_SCHEDULE_runtime: 2643 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2644 case OMPC_SCHEDULE_auto: 2645 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2646 case OMPC_SCHEDULE_unknown: 2647 assert(!Chunked && "chunk was specified but schedule kind not known"); 2648 return Ordered ? OMP_ord_static : OMP_sch_static; 2649 } 2650 llvm_unreachable("Unexpected runtime schedule"); 2651 } 2652 2653 /// Map the OpenMP distribute schedule to the runtime enumeration. 2654 static OpenMPSchedType 2655 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2656 // only static is allowed for dist_schedule 2657 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2658 } 2659 2660 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2661 bool Chunked) const { 2662 OpenMPSchedType Schedule = 2663 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2664 return Schedule == OMP_sch_static; 2665 } 2666 2667 bool CGOpenMPRuntime::isStaticNonchunked( 2668 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2669 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2670 return Schedule == OMP_dist_sch_static; 2671 } 2672 2673 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2674 bool Chunked) const { 2675 OpenMPSchedType Schedule = 2676 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2677 return Schedule == OMP_sch_static_chunked; 2678 } 2679 2680 bool CGOpenMPRuntime::isStaticChunked( 2681 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2682 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2683 return Schedule == OMP_dist_sch_static_chunked; 2684 } 2685 2686 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2687 OpenMPSchedType Schedule = 2688 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2689 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2690 return Schedule != OMP_sch_static; 2691 } 2692 2693 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2694 OpenMPScheduleClauseModifier M1, 2695 OpenMPScheduleClauseModifier M2) { 2696 int Modifier = 0; 2697 switch (M1) { 2698 case OMPC_SCHEDULE_MODIFIER_monotonic: 2699 Modifier = OMP_sch_modifier_monotonic; 2700 break; 2701 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2702 Modifier = OMP_sch_modifier_nonmonotonic; 2703 break; 2704 case OMPC_SCHEDULE_MODIFIER_simd: 2705 if (Schedule == OMP_sch_static_chunked) 2706 Schedule = OMP_sch_static_balanced_chunked; 2707 break; 2708 case OMPC_SCHEDULE_MODIFIER_last: 2709 case OMPC_SCHEDULE_MODIFIER_unknown: 2710 break; 2711 } 2712 switch (M2) { 2713 case OMPC_SCHEDULE_MODIFIER_monotonic: 2714 Modifier = OMP_sch_modifier_monotonic; 2715 break; 2716 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2717 Modifier = OMP_sch_modifier_nonmonotonic; 2718 break; 2719 case OMPC_SCHEDULE_MODIFIER_simd: 2720 if (Schedule == OMP_sch_static_chunked) 2721 Schedule = OMP_sch_static_balanced_chunked; 2722 break; 2723 case OMPC_SCHEDULE_MODIFIER_last: 2724 case OMPC_SCHEDULE_MODIFIER_unknown: 2725 break; 2726 } 2727 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2728 // If the static schedule kind is specified or if the ordered clause is 2729 // specified, and if the nonmonotonic modifier is not specified, the effect is 2730 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2731 // modifier is specified, the effect is as if the nonmonotonic modifier is 2732 // specified. 2733 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2734 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2735 Schedule == OMP_sch_static_balanced_chunked || 2736 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2737 Schedule == OMP_dist_sch_static_chunked || 2738 Schedule == OMP_dist_sch_static)) 2739 Modifier = OMP_sch_modifier_nonmonotonic; 2740 } 2741 return Schedule | Modifier; 2742 } 2743 2744 void CGOpenMPRuntime::emitForDispatchInit( 2745 CodeGenFunction &CGF, SourceLocation Loc, 2746 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2747 bool Ordered, const DispatchRTInput &DispatchValues) { 2748 if (!CGF.HaveInsertPoint()) 2749 return; 2750 OpenMPSchedType Schedule = getRuntimeSchedule( 2751 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2752 assert(Ordered || 2753 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2754 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2755 Schedule != OMP_sch_static_balanced_chunked)); 2756 // Call __kmpc_dispatch_init( 2757 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2758 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2759 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2760 2761 // If the Chunk was not specified in the clause - use default value 1. 2762 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2763 : CGF.Builder.getIntN(IVSize, 1); 2764 llvm::Value *Args[] = { 2765 emitUpdateLocation(CGF, Loc), 2766 getThreadID(CGF, Loc), 2767 CGF.Builder.getInt32(addMonoNonMonoModifier( 2768 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2769 DispatchValues.LB, // Lower 2770 DispatchValues.UB, // Upper 2771 CGF.Builder.getIntN(IVSize, 1), // Stride 2772 Chunk // Chunk 2773 }; 2774 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2775 } 2776 2777 static void emitForStaticInitCall( 2778 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2779 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2780 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2781 const CGOpenMPRuntime::StaticRTInput &Values) { 2782 if (!CGF.HaveInsertPoint()) 2783 return; 2784 2785 assert(!Values.Ordered); 2786 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2787 Schedule == OMP_sch_static_balanced_chunked || 2788 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2789 Schedule == OMP_dist_sch_static || 2790 Schedule == OMP_dist_sch_static_chunked); 2791 2792 // Call __kmpc_for_static_init( 2793 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2794 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2795 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2796 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2797 llvm::Value *Chunk = Values.Chunk; 2798 if (Chunk == nullptr) { 2799 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2800 Schedule == OMP_dist_sch_static) && 2801 "expected static non-chunked schedule"); 2802 // If the Chunk was not specified in the clause - use default value 1. 2803 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2804 } else { 2805 assert((Schedule == OMP_sch_static_chunked || 2806 Schedule == OMP_sch_static_balanced_chunked || 2807 Schedule == OMP_ord_static_chunked || 2808 Schedule == OMP_dist_sch_static_chunked) && 2809 "expected static chunked schedule"); 2810 } 2811 llvm::Value *Args[] = { 2812 UpdateLocation, 2813 ThreadId, 2814 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2815 M2)), // Schedule type 2816 Values.IL.getPointer(), // &isLastIter 2817 Values.LB.getPointer(), // &LB 2818 Values.UB.getPointer(), // &UB 2819 Values.ST.getPointer(), // &Stride 2820 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2821 Chunk // Chunk 2822 }; 2823 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2824 } 2825 2826 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2827 SourceLocation Loc, 2828 OpenMPDirectiveKind DKind, 2829 const OpenMPScheduleTy &ScheduleKind, 2830 const StaticRTInput &Values) { 2831 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2832 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2833 assert(isOpenMPWorksharingDirective(DKind) && 2834 "Expected loop-based or sections-based directive."); 2835 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2836 isOpenMPLoopDirective(DKind) 2837 ? OMP_IDENT_WORK_LOOP 2838 : OMP_IDENT_WORK_SECTIONS); 2839 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2840 llvm::FunctionCallee StaticInitFunction = 2841 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2842 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2843 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2844 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2845 } 2846 2847 void CGOpenMPRuntime::emitDistributeStaticInit( 2848 CodeGenFunction &CGF, SourceLocation Loc, 2849 OpenMPDistScheduleClauseKind SchedKind, 2850 const CGOpenMPRuntime::StaticRTInput &Values) { 2851 OpenMPSchedType ScheduleNum = 2852 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2853 llvm::Value *UpdatedLocation = 2854 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2855 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2856 llvm::FunctionCallee StaticInitFunction; 2857 bool isGPUDistribute = 2858 CGM.getLangOpts().OpenMPIsDevice && 2859 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2860 StaticInitFunction = createForStaticInitFunction( 2861 Values.IVSize, Values.IVSigned, isGPUDistribute); 2862 2863 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2864 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2865 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2866 } 2867 2868 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2869 SourceLocation Loc, 2870 OpenMPDirectiveKind DKind) { 2871 if (!CGF.HaveInsertPoint()) 2872 return; 2873 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2874 llvm::Value *Args[] = { 2875 emitUpdateLocation(CGF, Loc, 2876 isOpenMPDistributeDirective(DKind) 2877 ? OMP_IDENT_WORK_DISTRIBUTE 2878 : isOpenMPLoopDirective(DKind) 2879 ? OMP_IDENT_WORK_LOOP 2880 : OMP_IDENT_WORK_SECTIONS), 2881 getThreadID(CGF, Loc)}; 2882 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2883 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2884 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2885 CGF.EmitRuntimeCall( 2886 OMPBuilder.getOrCreateRuntimeFunction( 2887 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2888 Args); 2889 else 2890 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2891 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2892 Args); 2893 } 2894 2895 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2896 SourceLocation Loc, 2897 unsigned IVSize, 2898 bool IVSigned) { 2899 if (!CGF.HaveInsertPoint()) 2900 return; 2901 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2902 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2903 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2904 } 2905 2906 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2907 SourceLocation Loc, unsigned IVSize, 2908 bool IVSigned, Address IL, 2909 Address LB, Address UB, 2910 Address ST) { 2911 // Call __kmpc_dispatch_next( 2912 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2913 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2914 // kmp_int[32|64] *p_stride); 2915 llvm::Value *Args[] = { 2916 emitUpdateLocation(CGF, Loc), 2917 getThreadID(CGF, Loc), 2918 IL.getPointer(), // &isLastIter 2919 LB.getPointer(), // &Lower 2920 UB.getPointer(), // &Upper 2921 ST.getPointer() // &Stride 2922 }; 2923 llvm::Value *Call = 2924 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2925 return CGF.EmitScalarConversion( 2926 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2927 CGF.getContext().BoolTy, Loc); 2928 } 2929 2930 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2931 llvm::Value *NumThreads, 2932 SourceLocation Loc) { 2933 if (!CGF.HaveInsertPoint()) 2934 return; 2935 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2936 llvm::Value *Args[] = { 2937 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2938 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2939 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2940 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2941 Args); 2942 } 2943 2944 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2945 ProcBindKind ProcBind, 2946 SourceLocation Loc) { 2947 if (!CGF.HaveInsertPoint()) 2948 return; 2949 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2950 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2951 llvm::Value *Args[] = { 2952 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2953 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2954 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2955 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2956 Args); 2957 } 2958 2959 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2960 SourceLocation Loc, llvm::AtomicOrdering AO) { 2961 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2962 OMPBuilder.createFlush(CGF.Builder); 2963 } else { 2964 if (!CGF.HaveInsertPoint()) 2965 return; 2966 // Build call void __kmpc_flush(ident_t *loc) 2967 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2968 CGM.getModule(), OMPRTL___kmpc_flush), 2969 emitUpdateLocation(CGF, Loc)); 2970 } 2971 } 2972 2973 namespace { 2974 /// Indexes of fields for type kmp_task_t. 2975 enum KmpTaskTFields { 2976 /// List of shared variables. 2977 KmpTaskTShareds, 2978 /// Task routine. 2979 KmpTaskTRoutine, 2980 /// Partition id for the untied tasks. 2981 KmpTaskTPartId, 2982 /// Function with call of destructors for private variables. 2983 Data1, 2984 /// Task priority. 2985 Data2, 2986 /// (Taskloops only) Lower bound. 2987 KmpTaskTLowerBound, 2988 /// (Taskloops only) Upper bound. 2989 KmpTaskTUpperBound, 2990 /// (Taskloops only) Stride. 2991 KmpTaskTStride, 2992 /// (Taskloops only) Is last iteration flag. 2993 KmpTaskTLastIter, 2994 /// (Taskloops only) Reduction data. 2995 KmpTaskTReductions, 2996 }; 2997 } // anonymous namespace 2998 2999 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3000 return OffloadEntriesTargetRegion.empty() && 3001 OffloadEntriesDeviceGlobalVar.empty(); 3002 } 3003 3004 /// Initialize target region entry. 3005 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3006 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3007 StringRef ParentName, unsigned LineNum, 3008 unsigned Order) { 3009 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3010 "only required for the device " 3011 "code generation."); 3012 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3013 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3014 OMPTargetRegionEntryTargetRegion); 3015 ++OffloadingEntriesNum; 3016 } 3017 3018 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3019 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3020 StringRef ParentName, unsigned LineNum, 3021 llvm::Constant *Addr, llvm::Constant *ID, 3022 OMPTargetRegionEntryKind Flags) { 3023 // If we are emitting code for a target, the entry is already initialized, 3024 // only has to be registered. 3025 if (CGM.getLangOpts().OpenMPIsDevice) { 3026 // This could happen if the device compilation is invoked standalone. 3027 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3028 return; 3029 auto &Entry = 3030 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3031 Entry.setAddress(Addr); 3032 Entry.setID(ID); 3033 Entry.setFlags(Flags); 3034 } else { 3035 if (Flags == 3036 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3037 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3038 /*IgnoreAddressId*/ true)) 3039 return; 3040 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3041 "Target region entry already registered!"); 3042 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3043 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3044 ++OffloadingEntriesNum; 3045 } 3046 } 3047 3048 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3049 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3050 bool IgnoreAddressId) const { 3051 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3052 if (PerDevice == OffloadEntriesTargetRegion.end()) 3053 return false; 3054 auto PerFile = PerDevice->second.find(FileID); 3055 if (PerFile == PerDevice->second.end()) 3056 return false; 3057 auto PerParentName = PerFile->second.find(ParentName); 3058 if (PerParentName == PerFile->second.end()) 3059 return false; 3060 auto PerLine = PerParentName->second.find(LineNum); 3061 if (PerLine == PerParentName->second.end()) 3062 return false; 3063 // Fail if this entry is already registered. 3064 if (!IgnoreAddressId && 3065 (PerLine->second.getAddress() || PerLine->second.getID())) 3066 return false; 3067 return true; 3068 } 3069 3070 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3071 const OffloadTargetRegionEntryInfoActTy &Action) { 3072 // Scan all target region entries and perform the provided action. 3073 for (const auto &D : OffloadEntriesTargetRegion) 3074 for (const auto &F : D.second) 3075 for (const auto &P : F.second) 3076 for (const auto &L : P.second) 3077 Action(D.first, F.first, P.first(), L.first, L.second); 3078 } 3079 3080 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3081 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3082 OMPTargetGlobalVarEntryKind Flags, 3083 unsigned Order) { 3084 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3085 "only required for the device " 3086 "code generation."); 3087 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3088 ++OffloadingEntriesNum; 3089 } 3090 3091 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3092 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3093 CharUnits VarSize, 3094 OMPTargetGlobalVarEntryKind Flags, 3095 llvm::GlobalValue::LinkageTypes Linkage) { 3096 if (CGM.getLangOpts().OpenMPIsDevice) { 3097 // This could happen if the device compilation is invoked standalone. 3098 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3099 return; 3100 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3101 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3102 if (Entry.getVarSize().isZero()) { 3103 Entry.setVarSize(VarSize); 3104 Entry.setLinkage(Linkage); 3105 } 3106 return; 3107 } 3108 Entry.setVarSize(VarSize); 3109 Entry.setLinkage(Linkage); 3110 Entry.setAddress(Addr); 3111 } else { 3112 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3113 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3114 assert(Entry.isValid() && Entry.getFlags() == Flags && 3115 "Entry not initialized!"); 3116 if (Entry.getVarSize().isZero()) { 3117 Entry.setVarSize(VarSize); 3118 Entry.setLinkage(Linkage); 3119 } 3120 return; 3121 } 3122 OffloadEntriesDeviceGlobalVar.try_emplace( 3123 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3124 ++OffloadingEntriesNum; 3125 } 3126 } 3127 3128 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3129 actOnDeviceGlobalVarEntriesInfo( 3130 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3131 // Scan all target region entries and perform the provided action. 3132 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3133 Action(E.getKey(), E.getValue()); 3134 } 3135 3136 void CGOpenMPRuntime::createOffloadEntry( 3137 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3138 llvm::GlobalValue::LinkageTypes Linkage) { 3139 StringRef Name = Addr->getName(); 3140 llvm::Module &M = CGM.getModule(); 3141 llvm::LLVMContext &C = M.getContext(); 3142 3143 // Create constant string with the name. 3144 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3145 3146 std::string StringName = getName({"omp_offloading", "entry_name"}); 3147 auto *Str = new llvm::GlobalVariable( 3148 M, StrPtrInit->getType(), /*isConstant=*/true, 3149 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3150 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3151 3152 llvm::Constant *Data[] = { 3153 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3154 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3155 llvm::ConstantInt::get(CGM.SizeTy, Size), 3156 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3157 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3158 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3159 llvm::GlobalVariable *Entry = createGlobalStruct( 3160 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3161 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3162 3163 // The entry has to be created in the section the linker expects it to be. 3164 Entry->setSection("omp_offloading_entries"); 3165 } 3166 3167 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3168 // Emit the offloading entries and metadata so that the device codegen side 3169 // can easily figure out what to emit. The produced metadata looks like 3170 // this: 3171 // 3172 // !omp_offload.info = !{!1, ...} 3173 // 3174 // Right now we only generate metadata for function that contain target 3175 // regions. 3176 3177 // If we are in simd mode or there are no entries, we don't need to do 3178 // anything. 3179 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3180 return; 3181 3182 llvm::Module &M = CGM.getModule(); 3183 llvm::LLVMContext &C = M.getContext(); 3184 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3185 SourceLocation, StringRef>, 3186 16> 3187 OrderedEntries(OffloadEntriesInfoManager.size()); 3188 llvm::SmallVector<StringRef, 16> ParentFunctions( 3189 OffloadEntriesInfoManager.size()); 3190 3191 // Auxiliary methods to create metadata values and strings. 3192 auto &&GetMDInt = [this](unsigned V) { 3193 return llvm::ConstantAsMetadata::get( 3194 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3195 }; 3196 3197 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3198 3199 // Create the offloading info metadata node. 3200 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3201 3202 // Create function that emits metadata for each target region entry; 3203 auto &&TargetRegionMetadataEmitter = 3204 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3205 &GetMDString]( 3206 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3207 unsigned Line, 3208 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3209 // Generate metadata for target regions. Each entry of this metadata 3210 // contains: 3211 // - Entry 0 -> Kind of this type of metadata (0). 3212 // - Entry 1 -> Device ID of the file where the entry was identified. 3213 // - Entry 2 -> File ID of the file where the entry was identified. 3214 // - Entry 3 -> Mangled name of the function where the entry was 3215 // identified. 3216 // - Entry 4 -> Line in the file where the entry was identified. 3217 // - Entry 5 -> Order the entry was created. 3218 // The first element of the metadata node is the kind. 3219 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3220 GetMDInt(FileID), GetMDString(ParentName), 3221 GetMDInt(Line), GetMDInt(E.getOrder())}; 3222 3223 SourceLocation Loc; 3224 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3225 E = CGM.getContext().getSourceManager().fileinfo_end(); 3226 I != E; ++I) { 3227 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3228 I->getFirst()->getUniqueID().getFile() == FileID) { 3229 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3230 I->getFirst(), Line, 1); 3231 break; 3232 } 3233 } 3234 // Save this entry in the right position of the ordered entries array. 3235 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3236 ParentFunctions[E.getOrder()] = ParentName; 3237 3238 // Add metadata to the named metadata node. 3239 MD->addOperand(llvm::MDNode::get(C, Ops)); 3240 }; 3241 3242 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3243 TargetRegionMetadataEmitter); 3244 3245 // Create function that emits metadata for each device global variable entry; 3246 auto &&DeviceGlobalVarMetadataEmitter = 3247 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3248 MD](StringRef MangledName, 3249 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3250 &E) { 3251 // Generate metadata for global variables. Each entry of this metadata 3252 // contains: 3253 // - Entry 0 -> Kind of this type of metadata (1). 3254 // - Entry 1 -> Mangled name of the variable. 3255 // - Entry 2 -> Declare target kind. 3256 // - Entry 3 -> Order the entry was created. 3257 // The first element of the metadata node is the kind. 3258 llvm::Metadata *Ops[] = { 3259 GetMDInt(E.getKind()), GetMDString(MangledName), 3260 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3261 3262 // Save this entry in the right position of the ordered entries array. 3263 OrderedEntries[E.getOrder()] = 3264 std::make_tuple(&E, SourceLocation(), MangledName); 3265 3266 // Add metadata to the named metadata node. 3267 MD->addOperand(llvm::MDNode::get(C, Ops)); 3268 }; 3269 3270 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3271 DeviceGlobalVarMetadataEmitter); 3272 3273 for (const auto &E : OrderedEntries) { 3274 assert(std::get<0>(E) && "All ordered entries must exist!"); 3275 if (const auto *CE = 3276 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3277 std::get<0>(E))) { 3278 if (!CE->getID() || !CE->getAddress()) { 3279 // Do not blame the entry if the parent funtion is not emitted. 3280 StringRef FnName = ParentFunctions[CE->getOrder()]; 3281 if (!CGM.GetGlobalValue(FnName)) 3282 continue; 3283 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3284 DiagnosticsEngine::Error, 3285 "Offloading entry for target region in %0 is incorrect: either the " 3286 "address or the ID is invalid."); 3287 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3288 continue; 3289 } 3290 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3291 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3292 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3293 OffloadEntryInfoDeviceGlobalVar>( 3294 std::get<0>(E))) { 3295 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3296 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3297 CE->getFlags()); 3298 switch (Flags) { 3299 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3300 if (CGM.getLangOpts().OpenMPIsDevice && 3301 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3302 continue; 3303 if (!CE->getAddress()) { 3304 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3305 DiagnosticsEngine::Error, "Offloading entry for declare target " 3306 "variable %0 is incorrect: the " 3307 "address is invalid."); 3308 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3309 continue; 3310 } 3311 // The vaiable has no definition - no need to add the entry. 3312 if (CE->getVarSize().isZero()) 3313 continue; 3314 break; 3315 } 3316 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3317 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3318 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3319 "Declaret target link address is set."); 3320 if (CGM.getLangOpts().OpenMPIsDevice) 3321 continue; 3322 if (!CE->getAddress()) { 3323 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3324 DiagnosticsEngine::Error, 3325 "Offloading entry for declare target variable is incorrect: the " 3326 "address is invalid."); 3327 CGM.getDiags().Report(DiagID); 3328 continue; 3329 } 3330 break; 3331 } 3332 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3333 CE->getVarSize().getQuantity(), Flags, 3334 CE->getLinkage()); 3335 } else { 3336 llvm_unreachable("Unsupported entry kind."); 3337 } 3338 } 3339 } 3340 3341 /// Loads all the offload entries information from the host IR 3342 /// metadata. 3343 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3344 // If we are in target mode, load the metadata from the host IR. This code has 3345 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3346 3347 if (!CGM.getLangOpts().OpenMPIsDevice) 3348 return; 3349 3350 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3351 return; 3352 3353 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3354 if (auto EC = Buf.getError()) { 3355 CGM.getDiags().Report(diag::err_cannot_open_file) 3356 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3357 return; 3358 } 3359 3360 llvm::LLVMContext C; 3361 auto ME = expectedToErrorOrAndEmitErrors( 3362 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3363 3364 if (auto EC = ME.getError()) { 3365 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3366 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3367 CGM.getDiags().Report(DiagID) 3368 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3369 return; 3370 } 3371 3372 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3373 if (!MD) 3374 return; 3375 3376 for (llvm::MDNode *MN : MD->operands()) { 3377 auto &&GetMDInt = [MN](unsigned Idx) { 3378 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3379 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3380 }; 3381 3382 auto &&GetMDString = [MN](unsigned Idx) { 3383 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3384 return V->getString(); 3385 }; 3386 3387 switch (GetMDInt(0)) { 3388 default: 3389 llvm_unreachable("Unexpected metadata!"); 3390 break; 3391 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3392 OffloadingEntryInfoTargetRegion: 3393 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3394 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3395 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3396 /*Order=*/GetMDInt(5)); 3397 break; 3398 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3399 OffloadingEntryInfoDeviceGlobalVar: 3400 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3401 /*MangledName=*/GetMDString(1), 3402 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3403 /*Flags=*/GetMDInt(2)), 3404 /*Order=*/GetMDInt(3)); 3405 break; 3406 } 3407 } 3408 } 3409 3410 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3411 if (!KmpRoutineEntryPtrTy) { 3412 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3413 ASTContext &C = CGM.getContext(); 3414 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3415 FunctionProtoType::ExtProtoInfo EPI; 3416 KmpRoutineEntryPtrQTy = C.getPointerType( 3417 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3418 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3419 } 3420 } 3421 3422 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3423 // Make sure the type of the entry is already created. This is the type we 3424 // have to create: 3425 // struct __tgt_offload_entry{ 3426 // void *addr; // Pointer to the offload entry info. 3427 // // (function or global) 3428 // char *name; // Name of the function or global. 3429 // size_t size; // Size of the entry info (0 if it a function). 3430 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3431 // int32_t reserved; // Reserved, to use by the runtime library. 3432 // }; 3433 if (TgtOffloadEntryQTy.isNull()) { 3434 ASTContext &C = CGM.getContext(); 3435 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3436 RD->startDefinition(); 3437 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3438 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3439 addFieldToRecordDecl(C, RD, C.getSizeType()); 3440 addFieldToRecordDecl( 3441 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3442 addFieldToRecordDecl( 3443 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3444 RD->completeDefinition(); 3445 RD->addAttr(PackedAttr::CreateImplicit(C)); 3446 TgtOffloadEntryQTy = C.getRecordType(RD); 3447 } 3448 return TgtOffloadEntryQTy; 3449 } 3450 3451 namespace { 3452 struct PrivateHelpersTy { 3453 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3454 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3455 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3456 PrivateElemInit(PrivateElemInit) {} 3457 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3458 const Expr *OriginalRef = nullptr; 3459 const VarDecl *Original = nullptr; 3460 const VarDecl *PrivateCopy = nullptr; 3461 const VarDecl *PrivateElemInit = nullptr; 3462 bool isLocalPrivate() const { 3463 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3464 } 3465 }; 3466 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3467 } // anonymous namespace 3468 3469 static bool isAllocatableDecl(const VarDecl *VD) { 3470 const VarDecl *CVD = VD->getCanonicalDecl(); 3471 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3472 return false; 3473 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3474 // Use the default allocation. 3475 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3476 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3477 !AA->getAllocator()); 3478 } 3479 3480 static RecordDecl * 3481 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3482 if (!Privates.empty()) { 3483 ASTContext &C = CGM.getContext(); 3484 // Build struct .kmp_privates_t. { 3485 // /* private vars */ 3486 // }; 3487 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3488 RD->startDefinition(); 3489 for (const auto &Pair : Privates) { 3490 const VarDecl *VD = Pair.second.Original; 3491 QualType Type = VD->getType().getNonReferenceType(); 3492 // If the private variable is a local variable with lvalue ref type, 3493 // allocate the pointer instead of the pointee type. 3494 if (Pair.second.isLocalPrivate()) { 3495 if (VD->getType()->isLValueReferenceType()) 3496 Type = C.getPointerType(Type); 3497 if (isAllocatableDecl(VD)) 3498 Type = C.getPointerType(Type); 3499 } 3500 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3501 if (VD->hasAttrs()) { 3502 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3503 E(VD->getAttrs().end()); 3504 I != E; ++I) 3505 FD->addAttr(*I); 3506 } 3507 } 3508 RD->completeDefinition(); 3509 return RD; 3510 } 3511 return nullptr; 3512 } 3513 3514 static RecordDecl * 3515 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3516 QualType KmpInt32Ty, 3517 QualType KmpRoutineEntryPointerQTy) { 3518 ASTContext &C = CGM.getContext(); 3519 // Build struct kmp_task_t { 3520 // void * shareds; 3521 // kmp_routine_entry_t routine; 3522 // kmp_int32 part_id; 3523 // kmp_cmplrdata_t data1; 3524 // kmp_cmplrdata_t data2; 3525 // For taskloops additional fields: 3526 // kmp_uint64 lb; 3527 // kmp_uint64 ub; 3528 // kmp_int64 st; 3529 // kmp_int32 liter; 3530 // void * reductions; 3531 // }; 3532 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3533 UD->startDefinition(); 3534 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3535 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3536 UD->completeDefinition(); 3537 QualType KmpCmplrdataTy = C.getRecordType(UD); 3538 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3539 RD->startDefinition(); 3540 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3541 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3542 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3543 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3544 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3545 if (isOpenMPTaskLoopDirective(Kind)) { 3546 QualType KmpUInt64Ty = 3547 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3548 QualType KmpInt64Ty = 3549 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3550 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3551 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3552 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3553 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3554 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3555 } 3556 RD->completeDefinition(); 3557 return RD; 3558 } 3559 3560 static RecordDecl * 3561 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3562 ArrayRef<PrivateDataTy> Privates) { 3563 ASTContext &C = CGM.getContext(); 3564 // Build struct kmp_task_t_with_privates { 3565 // kmp_task_t task_data; 3566 // .kmp_privates_t. privates; 3567 // }; 3568 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3569 RD->startDefinition(); 3570 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3571 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3572 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3573 RD->completeDefinition(); 3574 return RD; 3575 } 3576 3577 /// Emit a proxy function which accepts kmp_task_t as the second 3578 /// argument. 3579 /// \code 3580 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3581 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3582 /// For taskloops: 3583 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3584 /// tt->reductions, tt->shareds); 3585 /// return 0; 3586 /// } 3587 /// \endcode 3588 static llvm::Function * 3589 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3590 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3591 QualType KmpTaskTWithPrivatesPtrQTy, 3592 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3593 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3594 llvm::Value *TaskPrivatesMap) { 3595 ASTContext &C = CGM.getContext(); 3596 FunctionArgList Args; 3597 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3598 ImplicitParamDecl::Other); 3599 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3600 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3601 ImplicitParamDecl::Other); 3602 Args.push_back(&GtidArg); 3603 Args.push_back(&TaskTypeArg); 3604 const auto &TaskEntryFnInfo = 3605 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3606 llvm::FunctionType *TaskEntryTy = 3607 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3608 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3609 auto *TaskEntry = llvm::Function::Create( 3610 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3611 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3612 TaskEntry->setDoesNotRecurse(); 3613 CodeGenFunction CGF(CGM); 3614 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3615 Loc, Loc); 3616 3617 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3618 // tt, 3619 // For taskloops: 3620 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3621 // tt->task_data.shareds); 3622 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3623 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3624 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3625 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3626 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3627 const auto *KmpTaskTWithPrivatesQTyRD = 3628 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3629 LValue Base = 3630 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3631 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3632 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3633 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3634 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3635 3636 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3637 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3638 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3639 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3640 CGF.ConvertTypeForMem(SharedsPtrTy)); 3641 3642 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3643 llvm::Value *PrivatesParam; 3644 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3645 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3646 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3647 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3648 } else { 3649 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3650 } 3651 3652 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3653 TaskPrivatesMap, 3654 CGF.Builder 3655 .CreatePointerBitCastOrAddrSpaceCast( 3656 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3657 .getPointer()}; 3658 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3659 std::end(CommonArgs)); 3660 if (isOpenMPTaskLoopDirective(Kind)) { 3661 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3662 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3663 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3664 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3665 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3666 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3667 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3668 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3669 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3670 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3671 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3672 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3673 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3674 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3675 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3676 CallArgs.push_back(LBParam); 3677 CallArgs.push_back(UBParam); 3678 CallArgs.push_back(StParam); 3679 CallArgs.push_back(LIParam); 3680 CallArgs.push_back(RParam); 3681 } 3682 CallArgs.push_back(SharedsParam); 3683 3684 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3685 CallArgs); 3686 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3687 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3688 CGF.FinishFunction(); 3689 return TaskEntry; 3690 } 3691 3692 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3693 SourceLocation Loc, 3694 QualType KmpInt32Ty, 3695 QualType KmpTaskTWithPrivatesPtrQTy, 3696 QualType KmpTaskTWithPrivatesQTy) { 3697 ASTContext &C = CGM.getContext(); 3698 FunctionArgList Args; 3699 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3700 ImplicitParamDecl::Other); 3701 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3702 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3703 ImplicitParamDecl::Other); 3704 Args.push_back(&GtidArg); 3705 Args.push_back(&TaskTypeArg); 3706 const auto &DestructorFnInfo = 3707 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3708 llvm::FunctionType *DestructorFnTy = 3709 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3710 std::string Name = 3711 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3712 auto *DestructorFn = 3713 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3714 Name, &CGM.getModule()); 3715 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3716 DestructorFnInfo); 3717 DestructorFn->setDoesNotRecurse(); 3718 CodeGenFunction CGF(CGM); 3719 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3720 Args, Loc, Loc); 3721 3722 LValue Base = CGF.EmitLoadOfPointerLValue( 3723 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3724 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3725 const auto *KmpTaskTWithPrivatesQTyRD = 3726 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3727 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3728 Base = CGF.EmitLValueForField(Base, *FI); 3729 for (const auto *Field : 3730 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3731 if (QualType::DestructionKind DtorKind = 3732 Field->getType().isDestructedType()) { 3733 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3734 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3735 } 3736 } 3737 CGF.FinishFunction(); 3738 return DestructorFn; 3739 } 3740 3741 /// Emit a privates mapping function for correct handling of private and 3742 /// firstprivate variables. 3743 /// \code 3744 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3745 /// **noalias priv1,..., <tyn> **noalias privn) { 3746 /// *priv1 = &.privates.priv1; 3747 /// ...; 3748 /// *privn = &.privates.privn; 3749 /// } 3750 /// \endcode 3751 static llvm::Value * 3752 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3753 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3754 ArrayRef<PrivateDataTy> Privates) { 3755 ASTContext &C = CGM.getContext(); 3756 FunctionArgList Args; 3757 ImplicitParamDecl TaskPrivatesArg( 3758 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3759 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3760 ImplicitParamDecl::Other); 3761 Args.push_back(&TaskPrivatesArg); 3762 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3763 unsigned Counter = 1; 3764 for (const Expr *E : Data.PrivateVars) { 3765 Args.push_back(ImplicitParamDecl::Create( 3766 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3767 C.getPointerType(C.getPointerType(E->getType())) 3768 .withConst() 3769 .withRestrict(), 3770 ImplicitParamDecl::Other)); 3771 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3772 PrivateVarsPos[VD] = Counter; 3773 ++Counter; 3774 } 3775 for (const Expr *E : Data.FirstprivateVars) { 3776 Args.push_back(ImplicitParamDecl::Create( 3777 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3778 C.getPointerType(C.getPointerType(E->getType())) 3779 .withConst() 3780 .withRestrict(), 3781 ImplicitParamDecl::Other)); 3782 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3783 PrivateVarsPos[VD] = Counter; 3784 ++Counter; 3785 } 3786 for (const Expr *E : Data.LastprivateVars) { 3787 Args.push_back(ImplicitParamDecl::Create( 3788 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3789 C.getPointerType(C.getPointerType(E->getType())) 3790 .withConst() 3791 .withRestrict(), 3792 ImplicitParamDecl::Other)); 3793 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3794 PrivateVarsPos[VD] = Counter; 3795 ++Counter; 3796 } 3797 for (const VarDecl *VD : Data.PrivateLocals) { 3798 QualType Ty = VD->getType().getNonReferenceType(); 3799 if (VD->getType()->isLValueReferenceType()) 3800 Ty = C.getPointerType(Ty); 3801 if (isAllocatableDecl(VD)) 3802 Ty = C.getPointerType(Ty); 3803 Args.push_back(ImplicitParamDecl::Create( 3804 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3805 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3806 ImplicitParamDecl::Other)); 3807 PrivateVarsPos[VD] = Counter; 3808 ++Counter; 3809 } 3810 const auto &TaskPrivatesMapFnInfo = 3811 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3812 llvm::FunctionType *TaskPrivatesMapTy = 3813 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3814 std::string Name = 3815 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3816 auto *TaskPrivatesMap = llvm::Function::Create( 3817 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3818 &CGM.getModule()); 3819 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3820 TaskPrivatesMapFnInfo); 3821 if (CGM.getLangOpts().Optimize) { 3822 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3823 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3824 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3825 } 3826 CodeGenFunction CGF(CGM); 3827 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3828 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3829 3830 // *privi = &.privates.privi; 3831 LValue Base = CGF.EmitLoadOfPointerLValue( 3832 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3833 TaskPrivatesArg.getType()->castAs<PointerType>()); 3834 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3835 Counter = 0; 3836 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3837 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3838 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3839 LValue RefLVal = 3840 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3841 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3842 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3843 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3844 ++Counter; 3845 } 3846 CGF.FinishFunction(); 3847 return TaskPrivatesMap; 3848 } 3849 3850 /// Emit initialization for private variables in task-based directives. 3851 static void emitPrivatesInit(CodeGenFunction &CGF, 3852 const OMPExecutableDirective &D, 3853 Address KmpTaskSharedsPtr, LValue TDBase, 3854 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3855 QualType SharedsTy, QualType SharedsPtrTy, 3856 const OMPTaskDataTy &Data, 3857 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3858 ASTContext &C = CGF.getContext(); 3859 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3860 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3861 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3862 ? OMPD_taskloop 3863 : OMPD_task; 3864 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3865 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3866 LValue SrcBase; 3867 bool IsTargetTask = 3868 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3869 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3870 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3871 // PointersArray, SizesArray, and MappersArray. The original variables for 3872 // these arrays are not captured and we get their addresses explicitly. 3873 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3874 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3875 SrcBase = CGF.MakeAddrLValue( 3876 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3877 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3878 SharedsTy); 3879 } 3880 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3881 for (const PrivateDataTy &Pair : Privates) { 3882 // Do not initialize private locals. 3883 if (Pair.second.isLocalPrivate()) { 3884 ++FI; 3885 continue; 3886 } 3887 const VarDecl *VD = Pair.second.PrivateCopy; 3888 const Expr *Init = VD->getAnyInitializer(); 3889 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3890 !CGF.isTrivialInitializer(Init)))) { 3891 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3892 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3893 const VarDecl *OriginalVD = Pair.second.Original; 3894 // Check if the variable is the target-based BasePointersArray, 3895 // PointersArray, SizesArray, or MappersArray. 3896 LValue SharedRefLValue; 3897 QualType Type = PrivateLValue.getType(); 3898 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3899 if (IsTargetTask && !SharedField) { 3900 assert(isa<ImplicitParamDecl>(OriginalVD) && 3901 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3902 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3903 ->getNumParams() == 0 && 3904 isa<TranslationUnitDecl>( 3905 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3906 ->getDeclContext()) && 3907 "Expected artificial target data variable."); 3908 SharedRefLValue = 3909 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3910 } else if (ForDup) { 3911 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3912 SharedRefLValue = CGF.MakeAddrLValue( 3913 Address(SharedRefLValue.getPointer(CGF), 3914 C.getDeclAlign(OriginalVD)), 3915 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3916 SharedRefLValue.getTBAAInfo()); 3917 } else if (CGF.LambdaCaptureFields.count( 3918 Pair.second.Original->getCanonicalDecl()) > 0 || 3919 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3920 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3921 } else { 3922 // Processing for implicitly captured variables. 3923 InlinedOpenMPRegionRAII Region( 3924 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3925 /*HasCancel=*/false, /*NoInheritance=*/true); 3926 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3927 } 3928 if (Type->isArrayType()) { 3929 // Initialize firstprivate array. 3930 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3931 // Perform simple memcpy. 3932 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3933 } else { 3934 // Initialize firstprivate array using element-by-element 3935 // initialization. 3936 CGF.EmitOMPAggregateAssign( 3937 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3938 Type, 3939 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3940 Address SrcElement) { 3941 // Clean up any temporaries needed by the initialization. 3942 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3943 InitScope.addPrivate( 3944 Elem, [SrcElement]() -> Address { return SrcElement; }); 3945 (void)InitScope.Privatize(); 3946 // Emit initialization for single element. 3947 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3948 CGF, &CapturesInfo); 3949 CGF.EmitAnyExprToMem(Init, DestElement, 3950 Init->getType().getQualifiers(), 3951 /*IsInitializer=*/false); 3952 }); 3953 } 3954 } else { 3955 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3956 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3957 return SharedRefLValue.getAddress(CGF); 3958 }); 3959 (void)InitScope.Privatize(); 3960 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3961 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3962 /*capturedByInit=*/false); 3963 } 3964 } else { 3965 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3966 } 3967 } 3968 ++FI; 3969 } 3970 } 3971 3972 /// Check if duplication function is required for taskloops. 3973 static bool checkInitIsRequired(CodeGenFunction &CGF, 3974 ArrayRef<PrivateDataTy> Privates) { 3975 bool InitRequired = false; 3976 for (const PrivateDataTy &Pair : Privates) { 3977 if (Pair.second.isLocalPrivate()) 3978 continue; 3979 const VarDecl *VD = Pair.second.PrivateCopy; 3980 const Expr *Init = VD->getAnyInitializer(); 3981 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3982 !CGF.isTrivialInitializer(Init)); 3983 if (InitRequired) 3984 break; 3985 } 3986 return InitRequired; 3987 } 3988 3989 3990 /// Emit task_dup function (for initialization of 3991 /// private/firstprivate/lastprivate vars and last_iter flag) 3992 /// \code 3993 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3994 /// lastpriv) { 3995 /// // setup lastprivate flag 3996 /// task_dst->last = lastpriv; 3997 /// // could be constructor calls here... 3998 /// } 3999 /// \endcode 4000 static llvm::Value * 4001 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4002 const OMPExecutableDirective &D, 4003 QualType KmpTaskTWithPrivatesPtrQTy, 4004 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4005 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4006 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4007 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4008 ASTContext &C = CGM.getContext(); 4009 FunctionArgList Args; 4010 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4011 KmpTaskTWithPrivatesPtrQTy, 4012 ImplicitParamDecl::Other); 4013 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4014 KmpTaskTWithPrivatesPtrQTy, 4015 ImplicitParamDecl::Other); 4016 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4017 ImplicitParamDecl::Other); 4018 Args.push_back(&DstArg); 4019 Args.push_back(&SrcArg); 4020 Args.push_back(&LastprivArg); 4021 const auto &TaskDupFnInfo = 4022 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4023 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4024 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4025 auto *TaskDup = llvm::Function::Create( 4026 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4027 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4028 TaskDup->setDoesNotRecurse(); 4029 CodeGenFunction CGF(CGM); 4030 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4031 Loc); 4032 4033 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4034 CGF.GetAddrOfLocalVar(&DstArg), 4035 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4036 // task_dst->liter = lastpriv; 4037 if (WithLastIter) { 4038 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4039 LValue Base = CGF.EmitLValueForField( 4040 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4041 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4042 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4043 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4044 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4045 } 4046 4047 // Emit initial values for private copies (if any). 4048 assert(!Privates.empty()); 4049 Address KmpTaskSharedsPtr = Address::invalid(); 4050 if (!Data.FirstprivateVars.empty()) { 4051 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4052 CGF.GetAddrOfLocalVar(&SrcArg), 4053 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4054 LValue Base = CGF.EmitLValueForField( 4055 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4056 KmpTaskSharedsPtr = Address( 4057 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4058 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4059 KmpTaskTShareds)), 4060 Loc), 4061 CGM.getNaturalTypeAlignment(SharedsTy)); 4062 } 4063 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4064 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4065 CGF.FinishFunction(); 4066 return TaskDup; 4067 } 4068 4069 /// Checks if destructor function is required to be generated. 4070 /// \return true if cleanups are required, false otherwise. 4071 static bool 4072 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4073 ArrayRef<PrivateDataTy> Privates) { 4074 for (const PrivateDataTy &P : Privates) { 4075 if (P.second.isLocalPrivate()) 4076 continue; 4077 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4078 if (Ty.isDestructedType()) 4079 return true; 4080 } 4081 return false; 4082 } 4083 4084 namespace { 4085 /// Loop generator for OpenMP iterator expression. 4086 class OMPIteratorGeneratorScope final 4087 : public CodeGenFunction::OMPPrivateScope { 4088 CodeGenFunction &CGF; 4089 const OMPIteratorExpr *E = nullptr; 4090 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4091 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4092 OMPIteratorGeneratorScope() = delete; 4093 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4094 4095 public: 4096 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4097 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4098 if (!E) 4099 return; 4100 SmallVector<llvm::Value *, 4> Uppers; 4101 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4102 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4103 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4104 addPrivate(VD, [&CGF, VD]() { 4105 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4106 }); 4107 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4108 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4109 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4110 "counter.addr"); 4111 }); 4112 } 4113 Privatize(); 4114 4115 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4116 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4117 LValue CLVal = 4118 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4119 HelperData.CounterVD->getType()); 4120 // Counter = 0; 4121 CGF.EmitStoreOfScalar( 4122 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4123 CLVal); 4124 CodeGenFunction::JumpDest &ContDest = 4125 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4126 CodeGenFunction::JumpDest &ExitDest = 4127 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4128 // N = <number-of_iterations>; 4129 llvm::Value *N = Uppers[I]; 4130 // cont: 4131 // if (Counter < N) goto body; else goto exit; 4132 CGF.EmitBlock(ContDest.getBlock()); 4133 auto *CVal = 4134 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4135 llvm::Value *Cmp = 4136 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4137 ? CGF.Builder.CreateICmpSLT(CVal, N) 4138 : CGF.Builder.CreateICmpULT(CVal, N); 4139 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4140 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4141 // body: 4142 CGF.EmitBlock(BodyBB); 4143 // Iteri = Begini + Counter * Stepi; 4144 CGF.EmitIgnoredExpr(HelperData.Update); 4145 } 4146 } 4147 ~OMPIteratorGeneratorScope() { 4148 if (!E) 4149 return; 4150 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4151 // Counter = Counter + 1; 4152 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4153 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4154 // goto cont; 4155 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4156 // exit: 4157 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4158 } 4159 } 4160 }; 4161 } // namespace 4162 4163 static std::pair<llvm::Value *, llvm::Value *> 4164 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4165 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4166 llvm::Value *Addr; 4167 if (OASE) { 4168 const Expr *Base = OASE->getBase(); 4169 Addr = CGF.EmitScalarExpr(Base); 4170 } else { 4171 Addr = CGF.EmitLValue(E).getPointer(CGF); 4172 } 4173 llvm::Value *SizeVal; 4174 QualType Ty = E->getType(); 4175 if (OASE) { 4176 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4177 for (const Expr *SE : OASE->getDimensions()) { 4178 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4179 Sz = CGF.EmitScalarConversion( 4180 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4181 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4182 } 4183 } else if (const auto *ASE = 4184 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4185 LValue UpAddrLVal = 4186 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4187 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4188 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4189 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4190 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4191 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4192 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4193 } else { 4194 SizeVal = CGF.getTypeSize(Ty); 4195 } 4196 return std::make_pair(Addr, SizeVal); 4197 } 4198 4199 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4200 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4201 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4202 if (KmpTaskAffinityInfoTy.isNull()) { 4203 RecordDecl *KmpAffinityInfoRD = 4204 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4205 KmpAffinityInfoRD->startDefinition(); 4206 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4207 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4208 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4209 KmpAffinityInfoRD->completeDefinition(); 4210 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4211 } 4212 } 4213 4214 CGOpenMPRuntime::TaskResultTy 4215 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4216 const OMPExecutableDirective &D, 4217 llvm::Function *TaskFunction, QualType SharedsTy, 4218 Address Shareds, const OMPTaskDataTy &Data) { 4219 ASTContext &C = CGM.getContext(); 4220 llvm::SmallVector<PrivateDataTy, 4> Privates; 4221 // Aggregate privates and sort them by the alignment. 4222 const auto *I = Data.PrivateCopies.begin(); 4223 for (const Expr *E : Data.PrivateVars) { 4224 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4225 Privates.emplace_back( 4226 C.getDeclAlign(VD), 4227 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4228 /*PrivateElemInit=*/nullptr)); 4229 ++I; 4230 } 4231 I = Data.FirstprivateCopies.begin(); 4232 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4233 for (const Expr *E : Data.FirstprivateVars) { 4234 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4235 Privates.emplace_back( 4236 C.getDeclAlign(VD), 4237 PrivateHelpersTy( 4238 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4239 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4240 ++I; 4241 ++IElemInitRef; 4242 } 4243 I = Data.LastprivateCopies.begin(); 4244 for (const Expr *E : Data.LastprivateVars) { 4245 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4246 Privates.emplace_back( 4247 C.getDeclAlign(VD), 4248 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4249 /*PrivateElemInit=*/nullptr)); 4250 ++I; 4251 } 4252 for (const VarDecl *VD : Data.PrivateLocals) { 4253 if (isAllocatableDecl(VD)) 4254 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4255 else 4256 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4257 } 4258 llvm::stable_sort(Privates, 4259 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4260 return L.first > R.first; 4261 }); 4262 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4263 // Build type kmp_routine_entry_t (if not built yet). 4264 emitKmpRoutineEntryT(KmpInt32Ty); 4265 // Build type kmp_task_t (if not built yet). 4266 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4267 if (SavedKmpTaskloopTQTy.isNull()) { 4268 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4269 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4270 } 4271 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4272 } else { 4273 assert((D.getDirectiveKind() == OMPD_task || 4274 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4275 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4276 "Expected taskloop, task or target directive"); 4277 if (SavedKmpTaskTQTy.isNull()) { 4278 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4279 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4280 } 4281 KmpTaskTQTy = SavedKmpTaskTQTy; 4282 } 4283 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4284 // Build particular struct kmp_task_t for the given task. 4285 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4286 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4287 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4288 QualType KmpTaskTWithPrivatesPtrQTy = 4289 C.getPointerType(KmpTaskTWithPrivatesQTy); 4290 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4291 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4292 KmpTaskTWithPrivatesTy->getPointerTo(); 4293 llvm::Value *KmpTaskTWithPrivatesTySize = 4294 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4295 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4296 4297 // Emit initial values for private copies (if any). 4298 llvm::Value *TaskPrivatesMap = nullptr; 4299 llvm::Type *TaskPrivatesMapTy = 4300 std::next(TaskFunction->arg_begin(), 3)->getType(); 4301 if (!Privates.empty()) { 4302 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4303 TaskPrivatesMap = 4304 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4305 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4306 TaskPrivatesMap, TaskPrivatesMapTy); 4307 } else { 4308 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4309 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4310 } 4311 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4312 // kmp_task_t *tt); 4313 llvm::Function *TaskEntry = emitProxyTaskFunction( 4314 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4315 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4316 TaskPrivatesMap); 4317 4318 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4319 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4320 // kmp_routine_entry_t *task_entry); 4321 // Task flags. Format is taken from 4322 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4323 // description of kmp_tasking_flags struct. 4324 enum { 4325 TiedFlag = 0x1, 4326 FinalFlag = 0x2, 4327 DestructorsFlag = 0x8, 4328 PriorityFlag = 0x20, 4329 DetachableFlag = 0x40, 4330 }; 4331 unsigned Flags = Data.Tied ? TiedFlag : 0; 4332 bool NeedsCleanup = false; 4333 if (!Privates.empty()) { 4334 NeedsCleanup = 4335 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4336 if (NeedsCleanup) 4337 Flags = Flags | DestructorsFlag; 4338 } 4339 if (Data.Priority.getInt()) 4340 Flags = Flags | PriorityFlag; 4341 if (D.hasClausesOfKind<OMPDetachClause>()) 4342 Flags = Flags | DetachableFlag; 4343 llvm::Value *TaskFlags = 4344 Data.Final.getPointer() 4345 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4346 CGF.Builder.getInt32(FinalFlag), 4347 CGF.Builder.getInt32(/*C=*/0)) 4348 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4349 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4350 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4351 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4352 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4353 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4354 TaskEntry, KmpRoutineEntryPtrTy)}; 4355 llvm::Value *NewTask; 4356 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4357 // Check if we have any device clause associated with the directive. 4358 const Expr *Device = nullptr; 4359 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4360 Device = C->getDevice(); 4361 // Emit device ID if any otherwise use default value. 4362 llvm::Value *DeviceID; 4363 if (Device) 4364 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4365 CGF.Int64Ty, /*isSigned=*/true); 4366 else 4367 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4368 AllocArgs.push_back(DeviceID); 4369 NewTask = CGF.EmitRuntimeCall( 4370 OMPBuilder.getOrCreateRuntimeFunction( 4371 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4372 AllocArgs); 4373 } else { 4374 NewTask = 4375 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4376 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4377 AllocArgs); 4378 } 4379 // Emit detach clause initialization. 4380 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4381 // task_descriptor); 4382 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4383 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4384 LValue EvtLVal = CGF.EmitLValue(Evt); 4385 4386 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4387 // int gtid, kmp_task_t *task); 4388 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4389 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4390 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4391 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4392 OMPBuilder.getOrCreateRuntimeFunction( 4393 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4394 {Loc, Tid, NewTask}); 4395 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4396 Evt->getExprLoc()); 4397 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4398 } 4399 // Process affinity clauses. 4400 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4401 // Process list of affinity data. 4402 ASTContext &C = CGM.getContext(); 4403 Address AffinitiesArray = Address::invalid(); 4404 // Calculate number of elements to form the array of affinity data. 4405 llvm::Value *NumOfElements = nullptr; 4406 unsigned NumAffinities = 0; 4407 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4408 if (const Expr *Modifier = C->getModifier()) { 4409 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4410 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4411 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4412 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4413 NumOfElements = 4414 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4415 } 4416 } else { 4417 NumAffinities += C->varlist_size(); 4418 } 4419 } 4420 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4421 // Fields ids in kmp_task_affinity_info record. 4422 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4423 4424 QualType KmpTaskAffinityInfoArrayTy; 4425 if (NumOfElements) { 4426 NumOfElements = CGF.Builder.CreateNUWAdd( 4427 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4428 auto *OVE = new (C) OpaqueValueExpr( 4429 Loc, 4430 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4431 VK_PRValue); 4432 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4433 RValue::get(NumOfElements)); 4434 KmpTaskAffinityInfoArrayTy = 4435 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4436 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4437 // Properly emit variable-sized array. 4438 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4439 ImplicitParamDecl::Other); 4440 CGF.EmitVarDecl(*PD); 4441 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4442 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4443 /*isSigned=*/false); 4444 } else { 4445 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4446 KmpTaskAffinityInfoTy, 4447 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4448 ArrayType::Normal, /*IndexTypeQuals=*/0); 4449 AffinitiesArray = 4450 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4451 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4452 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4453 /*isSigned=*/false); 4454 } 4455 4456 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4457 // Fill array by elements without iterators. 4458 unsigned Pos = 0; 4459 bool HasIterator = false; 4460 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4461 if (C->getModifier()) { 4462 HasIterator = true; 4463 continue; 4464 } 4465 for (const Expr *E : C->varlists()) { 4466 llvm::Value *Addr; 4467 llvm::Value *Size; 4468 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4469 LValue Base = 4470 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4471 KmpTaskAffinityInfoTy); 4472 // affs[i].base_addr = &<Affinities[i].second>; 4473 LValue BaseAddrLVal = CGF.EmitLValueForField( 4474 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4475 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4476 BaseAddrLVal); 4477 // affs[i].len = sizeof(<Affinities[i].second>); 4478 LValue LenLVal = CGF.EmitLValueForField( 4479 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4480 CGF.EmitStoreOfScalar(Size, LenLVal); 4481 ++Pos; 4482 } 4483 } 4484 LValue PosLVal; 4485 if (HasIterator) { 4486 PosLVal = CGF.MakeAddrLValue( 4487 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4488 C.getSizeType()); 4489 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4490 } 4491 // Process elements with iterators. 4492 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4493 const Expr *Modifier = C->getModifier(); 4494 if (!Modifier) 4495 continue; 4496 OMPIteratorGeneratorScope IteratorScope( 4497 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4498 for (const Expr *E : C->varlists()) { 4499 llvm::Value *Addr; 4500 llvm::Value *Size; 4501 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4502 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4503 LValue Base = CGF.MakeAddrLValue( 4504 Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(), 4505 AffinitiesArray.getPointer(), Idx), 4506 AffinitiesArray.getAlignment()), 4507 KmpTaskAffinityInfoTy); 4508 // affs[i].base_addr = &<Affinities[i].second>; 4509 LValue BaseAddrLVal = CGF.EmitLValueForField( 4510 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4511 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4512 BaseAddrLVal); 4513 // affs[i].len = sizeof(<Affinities[i].second>); 4514 LValue LenLVal = CGF.EmitLValueForField( 4515 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4516 CGF.EmitStoreOfScalar(Size, LenLVal); 4517 Idx = CGF.Builder.CreateNUWAdd( 4518 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4519 CGF.EmitStoreOfScalar(Idx, PosLVal); 4520 } 4521 } 4522 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4523 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4524 // naffins, kmp_task_affinity_info_t *affin_list); 4525 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4526 llvm::Value *GTid = getThreadID(CGF, Loc); 4527 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4528 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4529 // FIXME: Emit the function and ignore its result for now unless the 4530 // runtime function is properly implemented. 4531 (void)CGF.EmitRuntimeCall( 4532 OMPBuilder.getOrCreateRuntimeFunction( 4533 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4534 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4535 } 4536 llvm::Value *NewTaskNewTaskTTy = 4537 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4538 NewTask, KmpTaskTWithPrivatesPtrTy); 4539 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4540 KmpTaskTWithPrivatesQTy); 4541 LValue TDBase = 4542 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4543 // Fill the data in the resulting kmp_task_t record. 4544 // Copy shareds if there are any. 4545 Address KmpTaskSharedsPtr = Address::invalid(); 4546 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4547 KmpTaskSharedsPtr = 4548 Address(CGF.EmitLoadOfScalar( 4549 CGF.EmitLValueForField( 4550 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4551 KmpTaskTShareds)), 4552 Loc), 4553 CGM.getNaturalTypeAlignment(SharedsTy)); 4554 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4555 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4556 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4557 } 4558 // Emit initial values for private copies (if any). 4559 TaskResultTy Result; 4560 if (!Privates.empty()) { 4561 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4562 SharedsTy, SharedsPtrTy, Data, Privates, 4563 /*ForDup=*/false); 4564 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4565 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4566 Result.TaskDupFn = emitTaskDupFunction( 4567 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4568 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4569 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4570 } 4571 } 4572 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4573 enum { Priority = 0, Destructors = 1 }; 4574 // Provide pointer to function with destructors for privates. 4575 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4576 const RecordDecl *KmpCmplrdataUD = 4577 (*FI)->getType()->getAsUnionType()->getDecl(); 4578 if (NeedsCleanup) { 4579 llvm::Value *DestructorFn = emitDestructorsFunction( 4580 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4581 KmpTaskTWithPrivatesQTy); 4582 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4583 LValue DestructorsLV = CGF.EmitLValueForField( 4584 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4585 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4586 DestructorFn, KmpRoutineEntryPtrTy), 4587 DestructorsLV); 4588 } 4589 // Set priority. 4590 if (Data.Priority.getInt()) { 4591 LValue Data2LV = CGF.EmitLValueForField( 4592 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4593 LValue PriorityLV = CGF.EmitLValueForField( 4594 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4595 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4596 } 4597 Result.NewTask = NewTask; 4598 Result.TaskEntry = TaskEntry; 4599 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4600 Result.TDBase = TDBase; 4601 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4602 return Result; 4603 } 4604 4605 namespace { 4606 /// Dependence kind for RTL. 4607 enum RTLDependenceKindTy { 4608 DepIn = 0x01, 4609 DepInOut = 0x3, 4610 DepMutexInOutSet = 0x4 4611 }; 4612 /// Fields ids in kmp_depend_info record. 4613 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4614 } // namespace 4615 4616 /// Translates internal dependency kind into the runtime kind. 4617 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4618 RTLDependenceKindTy DepKind; 4619 switch (K) { 4620 case OMPC_DEPEND_in: 4621 DepKind = DepIn; 4622 break; 4623 // Out and InOut dependencies must use the same code. 4624 case OMPC_DEPEND_out: 4625 case OMPC_DEPEND_inout: 4626 DepKind = DepInOut; 4627 break; 4628 case OMPC_DEPEND_mutexinoutset: 4629 DepKind = DepMutexInOutSet; 4630 break; 4631 case OMPC_DEPEND_source: 4632 case OMPC_DEPEND_sink: 4633 case OMPC_DEPEND_depobj: 4634 case OMPC_DEPEND_unknown: 4635 llvm_unreachable("Unknown task dependence type"); 4636 } 4637 return DepKind; 4638 } 4639 4640 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4641 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4642 QualType &FlagsTy) { 4643 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4644 if (KmpDependInfoTy.isNull()) { 4645 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4646 KmpDependInfoRD->startDefinition(); 4647 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4648 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4649 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4650 KmpDependInfoRD->completeDefinition(); 4651 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4652 } 4653 } 4654 4655 std::pair<llvm::Value *, LValue> 4656 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4657 SourceLocation Loc) { 4658 ASTContext &C = CGM.getContext(); 4659 QualType FlagsTy; 4660 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4661 RecordDecl *KmpDependInfoRD = 4662 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4663 LValue Base = CGF.EmitLoadOfPointerLValue( 4664 DepobjLVal.getAddress(CGF), 4665 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4666 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4667 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4668 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4669 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4670 Base.getTBAAInfo()); 4671 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4672 Addr.getElementType(), Addr.getPointer(), 4673 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4674 LValue NumDepsBase = CGF.MakeAddrLValue( 4675 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4676 Base.getBaseInfo(), Base.getTBAAInfo()); 4677 // NumDeps = deps[i].base_addr; 4678 LValue BaseAddrLVal = CGF.EmitLValueForField( 4679 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4680 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4681 return std::make_pair(NumDeps, Base); 4682 } 4683 4684 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4685 llvm::PointerUnion<unsigned *, LValue *> Pos, 4686 const OMPTaskDataTy::DependData &Data, 4687 Address DependenciesArray) { 4688 CodeGenModule &CGM = CGF.CGM; 4689 ASTContext &C = CGM.getContext(); 4690 QualType FlagsTy; 4691 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4692 RecordDecl *KmpDependInfoRD = 4693 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4694 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4695 4696 OMPIteratorGeneratorScope IteratorScope( 4697 CGF, cast_or_null<OMPIteratorExpr>( 4698 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4699 : nullptr)); 4700 for (const Expr *E : Data.DepExprs) { 4701 llvm::Value *Addr; 4702 llvm::Value *Size; 4703 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4704 LValue Base; 4705 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4706 Base = CGF.MakeAddrLValue( 4707 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4708 } else { 4709 LValue &PosLVal = *Pos.get<LValue *>(); 4710 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4711 Base = CGF.MakeAddrLValue( 4712 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), 4713 DependenciesArray.getPointer(), Idx), 4714 DependenciesArray.getAlignment()), 4715 KmpDependInfoTy); 4716 } 4717 // deps[i].base_addr = &<Dependencies[i].second>; 4718 LValue BaseAddrLVal = CGF.EmitLValueForField( 4719 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4720 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4721 BaseAddrLVal); 4722 // deps[i].len = sizeof(<Dependencies[i].second>); 4723 LValue LenLVal = CGF.EmitLValueForField( 4724 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4725 CGF.EmitStoreOfScalar(Size, LenLVal); 4726 // deps[i].flags = <Dependencies[i].first>; 4727 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4728 LValue FlagsLVal = CGF.EmitLValueForField( 4729 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4730 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4731 FlagsLVal); 4732 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4733 ++(*P); 4734 } else { 4735 LValue &PosLVal = *Pos.get<LValue *>(); 4736 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4737 Idx = CGF.Builder.CreateNUWAdd(Idx, 4738 llvm::ConstantInt::get(Idx->getType(), 1)); 4739 CGF.EmitStoreOfScalar(Idx, PosLVal); 4740 } 4741 } 4742 } 4743 4744 static SmallVector<llvm::Value *, 4> 4745 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4746 const OMPTaskDataTy::DependData &Data) { 4747 assert(Data.DepKind == OMPC_DEPEND_depobj && 4748 "Expected depobj dependecy kind."); 4749 SmallVector<llvm::Value *, 4> Sizes; 4750 SmallVector<LValue, 4> SizeLVals; 4751 ASTContext &C = CGF.getContext(); 4752 QualType FlagsTy; 4753 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4754 RecordDecl *KmpDependInfoRD = 4755 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4756 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4757 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4758 { 4759 OMPIteratorGeneratorScope IteratorScope( 4760 CGF, cast_or_null<OMPIteratorExpr>( 4761 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4762 : nullptr)); 4763 for (const Expr *E : Data.DepExprs) { 4764 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4765 LValue Base = CGF.EmitLoadOfPointerLValue( 4766 DepobjLVal.getAddress(CGF), 4767 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4768 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4769 Base.getAddress(CGF), KmpDependInfoPtrT); 4770 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4771 Base.getTBAAInfo()); 4772 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4773 Addr.getElementType(), Addr.getPointer(), 4774 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4775 LValue NumDepsBase = CGF.MakeAddrLValue( 4776 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4777 Base.getBaseInfo(), Base.getTBAAInfo()); 4778 // NumDeps = deps[i].base_addr; 4779 LValue BaseAddrLVal = CGF.EmitLValueForField( 4780 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4781 llvm::Value *NumDeps = 4782 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4783 LValue NumLVal = CGF.MakeAddrLValue( 4784 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4785 C.getUIntPtrType()); 4786 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4787 NumLVal.getAddress(CGF)); 4788 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4789 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4790 CGF.EmitStoreOfScalar(Add, NumLVal); 4791 SizeLVals.push_back(NumLVal); 4792 } 4793 } 4794 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4795 llvm::Value *Size = 4796 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4797 Sizes.push_back(Size); 4798 } 4799 return Sizes; 4800 } 4801 4802 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4803 LValue PosLVal, 4804 const OMPTaskDataTy::DependData &Data, 4805 Address DependenciesArray) { 4806 assert(Data.DepKind == OMPC_DEPEND_depobj && 4807 "Expected depobj dependecy kind."); 4808 ASTContext &C = CGF.getContext(); 4809 QualType FlagsTy; 4810 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4811 RecordDecl *KmpDependInfoRD = 4812 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4813 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4814 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4815 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4816 { 4817 OMPIteratorGeneratorScope IteratorScope( 4818 CGF, cast_or_null<OMPIteratorExpr>( 4819 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4820 : nullptr)); 4821 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4822 const Expr *E = Data.DepExprs[I]; 4823 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4824 LValue Base = CGF.EmitLoadOfPointerLValue( 4825 DepobjLVal.getAddress(CGF), 4826 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4827 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4828 Base.getAddress(CGF), KmpDependInfoPtrT); 4829 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4830 Base.getTBAAInfo()); 4831 4832 // Get number of elements in a single depobj. 4833 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4834 Addr.getElementType(), Addr.getPointer(), 4835 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4836 LValue NumDepsBase = CGF.MakeAddrLValue( 4837 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4838 Base.getBaseInfo(), Base.getTBAAInfo()); 4839 // NumDeps = deps[i].base_addr; 4840 LValue BaseAddrLVal = CGF.EmitLValueForField( 4841 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4842 llvm::Value *NumDeps = 4843 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4844 4845 // memcopy dependency data. 4846 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4847 ElSize, 4848 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4849 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4850 Address DepAddr = 4851 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), 4852 DependenciesArray.getPointer(), Pos), 4853 DependenciesArray.getAlignment()); 4854 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4855 4856 // Increase pos. 4857 // pos += size; 4858 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4859 CGF.EmitStoreOfScalar(Add, PosLVal); 4860 } 4861 } 4862 } 4863 4864 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4865 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4866 SourceLocation Loc) { 4867 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4868 return D.DepExprs.empty(); 4869 })) 4870 return std::make_pair(nullptr, Address::invalid()); 4871 // Process list of dependencies. 4872 ASTContext &C = CGM.getContext(); 4873 Address DependenciesArray = Address::invalid(); 4874 llvm::Value *NumOfElements = nullptr; 4875 unsigned NumDependencies = std::accumulate( 4876 Dependencies.begin(), Dependencies.end(), 0, 4877 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4878 return D.DepKind == OMPC_DEPEND_depobj 4879 ? V 4880 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4881 }); 4882 QualType FlagsTy; 4883 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4884 bool HasDepobjDeps = false; 4885 bool HasRegularWithIterators = false; 4886 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4887 llvm::Value *NumOfRegularWithIterators = 4888 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4889 // Calculate number of depobj dependecies and regular deps with the iterators. 4890 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4891 if (D.DepKind == OMPC_DEPEND_depobj) { 4892 SmallVector<llvm::Value *, 4> Sizes = 4893 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4894 for (llvm::Value *Size : Sizes) { 4895 NumOfDepobjElements = 4896 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4897 } 4898 HasDepobjDeps = true; 4899 continue; 4900 } 4901 // Include number of iterations, if any. 4902 4903 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4904 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4905 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4906 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4907 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4908 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4909 NumOfRegularWithIterators = 4910 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4911 } 4912 HasRegularWithIterators = true; 4913 continue; 4914 } 4915 } 4916 4917 QualType KmpDependInfoArrayTy; 4918 if (HasDepobjDeps || HasRegularWithIterators) { 4919 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4920 /*isSigned=*/false); 4921 if (HasDepobjDeps) { 4922 NumOfElements = 4923 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4924 } 4925 if (HasRegularWithIterators) { 4926 NumOfElements = 4927 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4928 } 4929 auto *OVE = new (C) OpaqueValueExpr( 4930 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4931 VK_PRValue); 4932 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4933 RValue::get(NumOfElements)); 4934 KmpDependInfoArrayTy = 4935 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4936 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4937 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4938 // Properly emit variable-sized array. 4939 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4940 ImplicitParamDecl::Other); 4941 CGF.EmitVarDecl(*PD); 4942 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4943 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4944 /*isSigned=*/false); 4945 } else { 4946 KmpDependInfoArrayTy = C.getConstantArrayType( 4947 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4948 ArrayType::Normal, /*IndexTypeQuals=*/0); 4949 DependenciesArray = 4950 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4951 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4952 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4953 /*isSigned=*/false); 4954 } 4955 unsigned Pos = 0; 4956 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4957 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4958 Dependencies[I].IteratorExpr) 4959 continue; 4960 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4961 DependenciesArray); 4962 } 4963 // Copy regular dependecies with iterators. 4964 LValue PosLVal = CGF.MakeAddrLValue( 4965 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4966 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4967 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4968 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4969 !Dependencies[I].IteratorExpr) 4970 continue; 4971 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4972 DependenciesArray); 4973 } 4974 // Copy final depobj arrays without iterators. 4975 if (HasDepobjDeps) { 4976 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4977 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4978 continue; 4979 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4980 DependenciesArray); 4981 } 4982 } 4983 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4984 DependenciesArray, CGF.VoidPtrTy); 4985 return std::make_pair(NumOfElements, DependenciesArray); 4986 } 4987 4988 Address CGOpenMPRuntime::emitDepobjDependClause( 4989 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4990 SourceLocation Loc) { 4991 if (Dependencies.DepExprs.empty()) 4992 return Address::invalid(); 4993 // Process list of dependencies. 4994 ASTContext &C = CGM.getContext(); 4995 Address DependenciesArray = Address::invalid(); 4996 unsigned NumDependencies = Dependencies.DepExprs.size(); 4997 QualType FlagsTy; 4998 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4999 RecordDecl *KmpDependInfoRD = 5000 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5001 5002 llvm::Value *Size; 5003 // Define type kmp_depend_info[<Dependencies.size()>]; 5004 // For depobj reserve one extra element to store the number of elements. 5005 // It is required to handle depobj(x) update(in) construct. 5006 // kmp_depend_info[<Dependencies.size()>] deps; 5007 llvm::Value *NumDepsVal; 5008 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 5009 if (const auto *IE = 5010 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 5011 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 5012 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5013 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5014 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 5015 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 5016 } 5017 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 5018 NumDepsVal); 5019 CharUnits SizeInBytes = 5020 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5021 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5022 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5023 NumDepsVal = 5024 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5025 } else { 5026 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5027 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5028 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5029 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5030 Size = CGM.getSize(Sz.alignTo(Align)); 5031 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5032 } 5033 // Need to allocate on the dynamic memory. 5034 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5035 // Use default allocator. 5036 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5037 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5038 5039 llvm::Value *Addr = 5040 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5041 CGM.getModule(), OMPRTL___kmpc_alloc), 5042 Args, ".dep.arr.addr"); 5043 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5044 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5045 DependenciesArray = Address(Addr, Align); 5046 // Write number of elements in the first element of array for depobj. 5047 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5048 // deps[i].base_addr = NumDependencies; 5049 LValue BaseAddrLVal = CGF.EmitLValueForField( 5050 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5051 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5052 llvm::PointerUnion<unsigned *, LValue *> Pos; 5053 unsigned Idx = 1; 5054 LValue PosLVal; 5055 if (Dependencies.IteratorExpr) { 5056 PosLVal = CGF.MakeAddrLValue( 5057 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5058 C.getSizeType()); 5059 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5060 /*IsInit=*/true); 5061 Pos = &PosLVal; 5062 } else { 5063 Pos = &Idx; 5064 } 5065 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5066 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5067 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5068 return DependenciesArray; 5069 } 5070 5071 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5072 SourceLocation Loc) { 5073 ASTContext &C = CGM.getContext(); 5074 QualType FlagsTy; 5075 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5076 LValue Base = CGF.EmitLoadOfPointerLValue( 5077 DepobjLVal.getAddress(CGF), 5078 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5079 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5080 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5081 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5082 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5083 Addr.getElementType(), Addr.getPointer(), 5084 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5085 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5086 CGF.VoidPtrTy); 5087 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5088 // Use default allocator. 5089 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5090 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5091 5092 // _kmpc_free(gtid, addr, nullptr); 5093 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5094 CGM.getModule(), OMPRTL___kmpc_free), 5095 Args); 5096 } 5097 5098 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5099 OpenMPDependClauseKind NewDepKind, 5100 SourceLocation Loc) { 5101 ASTContext &C = CGM.getContext(); 5102 QualType FlagsTy; 5103 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5104 RecordDecl *KmpDependInfoRD = 5105 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5106 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5107 llvm::Value *NumDeps; 5108 LValue Base; 5109 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5110 5111 Address Begin = Base.getAddress(CGF); 5112 // Cast from pointer to array type to pointer to single element. 5113 llvm::Value *End = CGF.Builder.CreateGEP( 5114 Begin.getElementType(), Begin.getPointer(), NumDeps); 5115 // The basic structure here is a while-do loop. 5116 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5117 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5118 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5119 CGF.EmitBlock(BodyBB); 5120 llvm::PHINode *ElementPHI = 5121 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5122 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5123 Begin = Address(ElementPHI, Begin.getAlignment()); 5124 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5125 Base.getTBAAInfo()); 5126 // deps[i].flags = NewDepKind; 5127 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5128 LValue FlagsLVal = CGF.EmitLValueForField( 5129 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5130 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5131 FlagsLVal); 5132 5133 // Shift the address forward by one element. 5134 Address ElementNext = 5135 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5136 ElementPHI->addIncoming(ElementNext.getPointer(), 5137 CGF.Builder.GetInsertBlock()); 5138 llvm::Value *IsEmpty = 5139 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5140 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5141 // Done. 5142 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5143 } 5144 5145 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5146 const OMPExecutableDirective &D, 5147 llvm::Function *TaskFunction, 5148 QualType SharedsTy, Address Shareds, 5149 const Expr *IfCond, 5150 const OMPTaskDataTy &Data) { 5151 if (!CGF.HaveInsertPoint()) 5152 return; 5153 5154 TaskResultTy Result = 5155 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5156 llvm::Value *NewTask = Result.NewTask; 5157 llvm::Function *TaskEntry = Result.TaskEntry; 5158 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5159 LValue TDBase = Result.TDBase; 5160 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5161 // Process list of dependences. 5162 Address DependenciesArray = Address::invalid(); 5163 llvm::Value *NumOfElements; 5164 std::tie(NumOfElements, DependenciesArray) = 5165 emitDependClause(CGF, Data.Dependences, Loc); 5166 5167 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5168 // libcall. 5169 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5170 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5171 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5172 // list is not empty 5173 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5174 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5175 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5176 llvm::Value *DepTaskArgs[7]; 5177 if (!Data.Dependences.empty()) { 5178 DepTaskArgs[0] = UpLoc; 5179 DepTaskArgs[1] = ThreadID; 5180 DepTaskArgs[2] = NewTask; 5181 DepTaskArgs[3] = NumOfElements; 5182 DepTaskArgs[4] = DependenciesArray.getPointer(); 5183 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5184 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5185 } 5186 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5187 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5188 if (!Data.Tied) { 5189 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5190 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5191 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5192 } 5193 if (!Data.Dependences.empty()) { 5194 CGF.EmitRuntimeCall( 5195 OMPBuilder.getOrCreateRuntimeFunction( 5196 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5197 DepTaskArgs); 5198 } else { 5199 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5200 CGM.getModule(), OMPRTL___kmpc_omp_task), 5201 TaskArgs); 5202 } 5203 // Check if parent region is untied and build return for untied task; 5204 if (auto *Region = 5205 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5206 Region->emitUntiedSwitch(CGF); 5207 }; 5208 5209 llvm::Value *DepWaitTaskArgs[6]; 5210 if (!Data.Dependences.empty()) { 5211 DepWaitTaskArgs[0] = UpLoc; 5212 DepWaitTaskArgs[1] = ThreadID; 5213 DepWaitTaskArgs[2] = NumOfElements; 5214 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5215 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5216 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5217 } 5218 auto &M = CGM.getModule(); 5219 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5220 TaskEntry, &Data, &DepWaitTaskArgs, 5221 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5222 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5223 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5224 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5225 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5226 // is specified. 5227 if (!Data.Dependences.empty()) 5228 CGF.EmitRuntimeCall( 5229 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5230 DepWaitTaskArgs); 5231 // Call proxy_task_entry(gtid, new_task); 5232 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5233 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5234 Action.Enter(CGF); 5235 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5236 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5237 OutlinedFnArgs); 5238 }; 5239 5240 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5241 // kmp_task_t *new_task); 5242 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5243 // kmp_task_t *new_task); 5244 RegionCodeGenTy RCG(CodeGen); 5245 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5246 M, OMPRTL___kmpc_omp_task_begin_if0), 5247 TaskArgs, 5248 OMPBuilder.getOrCreateRuntimeFunction( 5249 M, OMPRTL___kmpc_omp_task_complete_if0), 5250 TaskArgs); 5251 RCG.setAction(Action); 5252 RCG(CGF); 5253 }; 5254 5255 if (IfCond) { 5256 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5257 } else { 5258 RegionCodeGenTy ThenRCG(ThenCodeGen); 5259 ThenRCG(CGF); 5260 } 5261 } 5262 5263 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5264 const OMPLoopDirective &D, 5265 llvm::Function *TaskFunction, 5266 QualType SharedsTy, Address Shareds, 5267 const Expr *IfCond, 5268 const OMPTaskDataTy &Data) { 5269 if (!CGF.HaveInsertPoint()) 5270 return; 5271 TaskResultTy Result = 5272 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5273 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5274 // libcall. 5275 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5276 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5277 // sched, kmp_uint64 grainsize, void *task_dup); 5278 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5279 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5280 llvm::Value *IfVal; 5281 if (IfCond) { 5282 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5283 /*isSigned=*/true); 5284 } else { 5285 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5286 } 5287 5288 LValue LBLVal = CGF.EmitLValueForField( 5289 Result.TDBase, 5290 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5291 const auto *LBVar = 5292 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5293 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5294 LBLVal.getQuals(), 5295 /*IsInitializer=*/true); 5296 LValue UBLVal = CGF.EmitLValueForField( 5297 Result.TDBase, 5298 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5299 const auto *UBVar = 5300 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5301 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5302 UBLVal.getQuals(), 5303 /*IsInitializer=*/true); 5304 LValue StLVal = CGF.EmitLValueForField( 5305 Result.TDBase, 5306 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5307 const auto *StVar = 5308 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5309 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5310 StLVal.getQuals(), 5311 /*IsInitializer=*/true); 5312 // Store reductions address. 5313 LValue RedLVal = CGF.EmitLValueForField( 5314 Result.TDBase, 5315 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5316 if (Data.Reductions) { 5317 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5318 } else { 5319 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5320 CGF.getContext().VoidPtrTy); 5321 } 5322 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5323 llvm::Value *TaskArgs[] = { 5324 UpLoc, 5325 ThreadID, 5326 Result.NewTask, 5327 IfVal, 5328 LBLVal.getPointer(CGF), 5329 UBLVal.getPointer(CGF), 5330 CGF.EmitLoadOfScalar(StLVal, Loc), 5331 llvm::ConstantInt::getSigned( 5332 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5333 llvm::ConstantInt::getSigned( 5334 CGF.IntTy, Data.Schedule.getPointer() 5335 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5336 : NoSchedule), 5337 Data.Schedule.getPointer() 5338 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5339 /*isSigned=*/false) 5340 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5341 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5342 Result.TaskDupFn, CGF.VoidPtrTy) 5343 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5344 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5345 CGM.getModule(), OMPRTL___kmpc_taskloop), 5346 TaskArgs); 5347 } 5348 5349 /// Emit reduction operation for each element of array (required for 5350 /// array sections) LHS op = RHS. 5351 /// \param Type Type of array. 5352 /// \param LHSVar Variable on the left side of the reduction operation 5353 /// (references element of array in original variable). 5354 /// \param RHSVar Variable on the right side of the reduction operation 5355 /// (references element of array in original variable). 5356 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5357 /// RHSVar. 5358 static void EmitOMPAggregateReduction( 5359 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5360 const VarDecl *RHSVar, 5361 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5362 const Expr *, const Expr *)> &RedOpGen, 5363 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5364 const Expr *UpExpr = nullptr) { 5365 // Perform element-by-element initialization. 5366 QualType ElementTy; 5367 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5368 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5369 5370 // Drill down to the base element type on both arrays. 5371 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5372 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5373 5374 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5375 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5376 // Cast from pointer to array type to pointer to single element. 5377 llvm::Value *LHSEnd = 5378 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5379 // The basic structure here is a while-do loop. 5380 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5381 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5382 llvm::Value *IsEmpty = 5383 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5384 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5385 5386 // Enter the loop body, making that address the current address. 5387 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5388 CGF.EmitBlock(BodyBB); 5389 5390 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5391 5392 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5393 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5394 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5395 Address RHSElementCurrent = 5396 Address(RHSElementPHI, 5397 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5398 5399 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5400 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5401 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5402 Address LHSElementCurrent = 5403 Address(LHSElementPHI, 5404 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5405 5406 // Emit copy. 5407 CodeGenFunction::OMPPrivateScope Scope(CGF); 5408 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5409 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5410 Scope.Privatize(); 5411 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5412 Scope.ForceCleanup(); 5413 5414 // Shift the address forward by one element. 5415 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5416 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5417 "omp.arraycpy.dest.element"); 5418 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5419 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5420 "omp.arraycpy.src.element"); 5421 // Check whether we've reached the end. 5422 llvm::Value *Done = 5423 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5424 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5425 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5426 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5427 5428 // Done. 5429 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5430 } 5431 5432 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5433 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5434 /// UDR combiner function. 5435 static void emitReductionCombiner(CodeGenFunction &CGF, 5436 const Expr *ReductionOp) { 5437 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5438 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5439 if (const auto *DRE = 5440 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5441 if (const auto *DRD = 5442 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5443 std::pair<llvm::Function *, llvm::Function *> Reduction = 5444 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5445 RValue Func = RValue::get(Reduction.first); 5446 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5447 CGF.EmitIgnoredExpr(ReductionOp); 5448 return; 5449 } 5450 CGF.EmitIgnoredExpr(ReductionOp); 5451 } 5452 5453 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5454 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5455 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5456 ArrayRef<const Expr *> ReductionOps) { 5457 ASTContext &C = CGM.getContext(); 5458 5459 // void reduction_func(void *LHSArg, void *RHSArg); 5460 FunctionArgList Args; 5461 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5462 ImplicitParamDecl::Other); 5463 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5464 ImplicitParamDecl::Other); 5465 Args.push_back(&LHSArg); 5466 Args.push_back(&RHSArg); 5467 const auto &CGFI = 5468 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5469 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5470 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5471 llvm::GlobalValue::InternalLinkage, Name, 5472 &CGM.getModule()); 5473 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5474 Fn->setDoesNotRecurse(); 5475 CodeGenFunction CGF(CGM); 5476 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5477 5478 // Dst = (void*[n])(LHSArg); 5479 // Src = (void*[n])(RHSArg); 5480 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5481 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5482 ArgsType), CGF.getPointerAlign()); 5483 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5484 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5485 ArgsType), CGF.getPointerAlign()); 5486 5487 // ... 5488 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5489 // ... 5490 CodeGenFunction::OMPPrivateScope Scope(CGF); 5491 auto IPriv = Privates.begin(); 5492 unsigned Idx = 0; 5493 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5494 const auto *RHSVar = 5495 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5496 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5497 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5498 }); 5499 const auto *LHSVar = 5500 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5501 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5502 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5503 }); 5504 QualType PrivTy = (*IPriv)->getType(); 5505 if (PrivTy->isVariablyModifiedType()) { 5506 // Get array size and emit VLA type. 5507 ++Idx; 5508 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5509 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5510 const VariableArrayType *VLA = 5511 CGF.getContext().getAsVariableArrayType(PrivTy); 5512 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5513 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5514 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5515 CGF.EmitVariablyModifiedType(PrivTy); 5516 } 5517 } 5518 Scope.Privatize(); 5519 IPriv = Privates.begin(); 5520 auto ILHS = LHSExprs.begin(); 5521 auto IRHS = RHSExprs.begin(); 5522 for (const Expr *E : ReductionOps) { 5523 if ((*IPriv)->getType()->isArrayType()) { 5524 // Emit reduction for array section. 5525 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5526 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5527 EmitOMPAggregateReduction( 5528 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5529 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5530 emitReductionCombiner(CGF, E); 5531 }); 5532 } else { 5533 // Emit reduction for array subscript or single variable. 5534 emitReductionCombiner(CGF, E); 5535 } 5536 ++IPriv; 5537 ++ILHS; 5538 ++IRHS; 5539 } 5540 Scope.ForceCleanup(); 5541 CGF.FinishFunction(); 5542 return Fn; 5543 } 5544 5545 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5546 const Expr *ReductionOp, 5547 const Expr *PrivateRef, 5548 const DeclRefExpr *LHS, 5549 const DeclRefExpr *RHS) { 5550 if (PrivateRef->getType()->isArrayType()) { 5551 // Emit reduction for array section. 5552 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5553 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5554 EmitOMPAggregateReduction( 5555 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5556 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5557 emitReductionCombiner(CGF, ReductionOp); 5558 }); 5559 } else { 5560 // Emit reduction for array subscript or single variable. 5561 emitReductionCombiner(CGF, ReductionOp); 5562 } 5563 } 5564 5565 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5566 ArrayRef<const Expr *> Privates, 5567 ArrayRef<const Expr *> LHSExprs, 5568 ArrayRef<const Expr *> RHSExprs, 5569 ArrayRef<const Expr *> ReductionOps, 5570 ReductionOptionsTy Options) { 5571 if (!CGF.HaveInsertPoint()) 5572 return; 5573 5574 bool WithNowait = Options.WithNowait; 5575 bool SimpleReduction = Options.SimpleReduction; 5576 5577 // Next code should be emitted for reduction: 5578 // 5579 // static kmp_critical_name lock = { 0 }; 5580 // 5581 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5582 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5583 // ... 5584 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5585 // *(Type<n>-1*)rhs[<n>-1]); 5586 // } 5587 // 5588 // ... 5589 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5590 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5591 // RedList, reduce_func, &<lock>)) { 5592 // case 1: 5593 // ... 5594 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5595 // ... 5596 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5597 // break; 5598 // case 2: 5599 // ... 5600 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5601 // ... 5602 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5603 // break; 5604 // default:; 5605 // } 5606 // 5607 // if SimpleReduction is true, only the next code is generated: 5608 // ... 5609 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5610 // ... 5611 5612 ASTContext &C = CGM.getContext(); 5613 5614 if (SimpleReduction) { 5615 CodeGenFunction::RunCleanupsScope Scope(CGF); 5616 auto IPriv = Privates.begin(); 5617 auto ILHS = LHSExprs.begin(); 5618 auto IRHS = RHSExprs.begin(); 5619 for (const Expr *E : ReductionOps) { 5620 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5621 cast<DeclRefExpr>(*IRHS)); 5622 ++IPriv; 5623 ++ILHS; 5624 ++IRHS; 5625 } 5626 return; 5627 } 5628 5629 // 1. Build a list of reduction variables. 5630 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5631 auto Size = RHSExprs.size(); 5632 for (const Expr *E : Privates) { 5633 if (E->getType()->isVariablyModifiedType()) 5634 // Reserve place for array size. 5635 ++Size; 5636 } 5637 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5638 QualType ReductionArrayTy = 5639 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5640 /*IndexTypeQuals=*/0); 5641 Address ReductionList = 5642 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5643 auto IPriv = Privates.begin(); 5644 unsigned Idx = 0; 5645 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5646 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5647 CGF.Builder.CreateStore( 5648 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5649 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5650 Elem); 5651 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5652 // Store array size. 5653 ++Idx; 5654 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5655 llvm::Value *Size = CGF.Builder.CreateIntCast( 5656 CGF.getVLASize( 5657 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5658 .NumElts, 5659 CGF.SizeTy, /*isSigned=*/false); 5660 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5661 Elem); 5662 } 5663 } 5664 5665 // 2. Emit reduce_func(). 5666 llvm::Function *ReductionFn = emitReductionFunction( 5667 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5668 LHSExprs, RHSExprs, ReductionOps); 5669 5670 // 3. Create static kmp_critical_name lock = { 0 }; 5671 std::string Name = getName({"reduction"}); 5672 llvm::Value *Lock = getCriticalRegionLock(Name); 5673 5674 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5675 // RedList, reduce_func, &<lock>); 5676 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5677 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5678 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5679 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5680 ReductionList.getPointer(), CGF.VoidPtrTy); 5681 llvm::Value *Args[] = { 5682 IdentTLoc, // ident_t *<loc> 5683 ThreadId, // i32 <gtid> 5684 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5685 ReductionArrayTySize, // size_type sizeof(RedList) 5686 RL, // void *RedList 5687 ReductionFn, // void (*) (void *, void *) <reduce_func> 5688 Lock // kmp_critical_name *&<lock> 5689 }; 5690 llvm::Value *Res = CGF.EmitRuntimeCall( 5691 OMPBuilder.getOrCreateRuntimeFunction( 5692 CGM.getModule(), 5693 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5694 Args); 5695 5696 // 5. Build switch(res) 5697 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5698 llvm::SwitchInst *SwInst = 5699 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5700 5701 // 6. Build case 1: 5702 // ... 5703 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5704 // ... 5705 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5706 // break; 5707 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5708 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5709 CGF.EmitBlock(Case1BB); 5710 5711 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5712 llvm::Value *EndArgs[] = { 5713 IdentTLoc, // ident_t *<loc> 5714 ThreadId, // i32 <gtid> 5715 Lock // kmp_critical_name *&<lock> 5716 }; 5717 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5718 CodeGenFunction &CGF, PrePostActionTy &Action) { 5719 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5720 auto IPriv = Privates.begin(); 5721 auto ILHS = LHSExprs.begin(); 5722 auto IRHS = RHSExprs.begin(); 5723 for (const Expr *E : ReductionOps) { 5724 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5725 cast<DeclRefExpr>(*IRHS)); 5726 ++IPriv; 5727 ++ILHS; 5728 ++IRHS; 5729 } 5730 }; 5731 RegionCodeGenTy RCG(CodeGen); 5732 CommonActionTy Action( 5733 nullptr, llvm::None, 5734 OMPBuilder.getOrCreateRuntimeFunction( 5735 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5736 : OMPRTL___kmpc_end_reduce), 5737 EndArgs); 5738 RCG.setAction(Action); 5739 RCG(CGF); 5740 5741 CGF.EmitBranch(DefaultBB); 5742 5743 // 7. Build case 2: 5744 // ... 5745 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5746 // ... 5747 // break; 5748 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5749 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5750 CGF.EmitBlock(Case2BB); 5751 5752 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5753 CodeGenFunction &CGF, PrePostActionTy &Action) { 5754 auto ILHS = LHSExprs.begin(); 5755 auto IRHS = RHSExprs.begin(); 5756 auto IPriv = Privates.begin(); 5757 for (const Expr *E : ReductionOps) { 5758 const Expr *XExpr = nullptr; 5759 const Expr *EExpr = nullptr; 5760 const Expr *UpExpr = nullptr; 5761 BinaryOperatorKind BO = BO_Comma; 5762 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5763 if (BO->getOpcode() == BO_Assign) { 5764 XExpr = BO->getLHS(); 5765 UpExpr = BO->getRHS(); 5766 } 5767 } 5768 // Try to emit update expression as a simple atomic. 5769 const Expr *RHSExpr = UpExpr; 5770 if (RHSExpr) { 5771 // Analyze RHS part of the whole expression. 5772 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5773 RHSExpr->IgnoreParenImpCasts())) { 5774 // If this is a conditional operator, analyze its condition for 5775 // min/max reduction operator. 5776 RHSExpr = ACO->getCond(); 5777 } 5778 if (const auto *BORHS = 5779 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5780 EExpr = BORHS->getRHS(); 5781 BO = BORHS->getOpcode(); 5782 } 5783 } 5784 if (XExpr) { 5785 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5786 auto &&AtomicRedGen = [BO, VD, 5787 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5788 const Expr *EExpr, const Expr *UpExpr) { 5789 LValue X = CGF.EmitLValue(XExpr); 5790 RValue E; 5791 if (EExpr) 5792 E = CGF.EmitAnyExpr(EExpr); 5793 CGF.EmitOMPAtomicSimpleUpdateExpr( 5794 X, E, BO, /*IsXLHSInRHSPart=*/true, 5795 llvm::AtomicOrdering::Monotonic, Loc, 5796 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5797 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5798 PrivateScope.addPrivate( 5799 VD, [&CGF, VD, XRValue, Loc]() { 5800 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5801 CGF.emitOMPSimpleStore( 5802 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5803 VD->getType().getNonReferenceType(), Loc); 5804 return LHSTemp; 5805 }); 5806 (void)PrivateScope.Privatize(); 5807 return CGF.EmitAnyExpr(UpExpr); 5808 }); 5809 }; 5810 if ((*IPriv)->getType()->isArrayType()) { 5811 // Emit atomic reduction for array section. 5812 const auto *RHSVar = 5813 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5814 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5815 AtomicRedGen, XExpr, EExpr, UpExpr); 5816 } else { 5817 // Emit atomic reduction for array subscript or single variable. 5818 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5819 } 5820 } else { 5821 // Emit as a critical region. 5822 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5823 const Expr *, const Expr *) { 5824 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5825 std::string Name = RT.getName({"atomic_reduction"}); 5826 RT.emitCriticalRegion( 5827 CGF, Name, 5828 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5829 Action.Enter(CGF); 5830 emitReductionCombiner(CGF, E); 5831 }, 5832 Loc); 5833 }; 5834 if ((*IPriv)->getType()->isArrayType()) { 5835 const auto *LHSVar = 5836 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5837 const auto *RHSVar = 5838 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5839 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5840 CritRedGen); 5841 } else { 5842 CritRedGen(CGF, nullptr, nullptr, nullptr); 5843 } 5844 } 5845 ++ILHS; 5846 ++IRHS; 5847 ++IPriv; 5848 } 5849 }; 5850 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5851 if (!WithNowait) { 5852 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5853 llvm::Value *EndArgs[] = { 5854 IdentTLoc, // ident_t *<loc> 5855 ThreadId, // i32 <gtid> 5856 Lock // kmp_critical_name *&<lock> 5857 }; 5858 CommonActionTy Action(nullptr, llvm::None, 5859 OMPBuilder.getOrCreateRuntimeFunction( 5860 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5861 EndArgs); 5862 AtomicRCG.setAction(Action); 5863 AtomicRCG(CGF); 5864 } else { 5865 AtomicRCG(CGF); 5866 } 5867 5868 CGF.EmitBranch(DefaultBB); 5869 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5870 } 5871 5872 /// Generates unique name for artificial threadprivate variables. 5873 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5874 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5875 const Expr *Ref) { 5876 SmallString<256> Buffer; 5877 llvm::raw_svector_ostream Out(Buffer); 5878 const clang::DeclRefExpr *DE; 5879 const VarDecl *D = ::getBaseDecl(Ref, DE); 5880 if (!D) 5881 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5882 D = D->getCanonicalDecl(); 5883 std::string Name = CGM.getOpenMPRuntime().getName( 5884 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5885 Out << Prefix << Name << "_" 5886 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5887 return std::string(Out.str()); 5888 } 5889 5890 /// Emits reduction initializer function: 5891 /// \code 5892 /// void @.red_init(void* %arg, void* %orig) { 5893 /// %0 = bitcast void* %arg to <type>* 5894 /// store <type> <init>, <type>* %0 5895 /// ret void 5896 /// } 5897 /// \endcode 5898 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5899 SourceLocation Loc, 5900 ReductionCodeGen &RCG, unsigned N) { 5901 ASTContext &C = CGM.getContext(); 5902 QualType VoidPtrTy = C.VoidPtrTy; 5903 VoidPtrTy.addRestrict(); 5904 FunctionArgList Args; 5905 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5906 ImplicitParamDecl::Other); 5907 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5908 ImplicitParamDecl::Other); 5909 Args.emplace_back(&Param); 5910 Args.emplace_back(&ParamOrig); 5911 const auto &FnInfo = 5912 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5913 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5914 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5915 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5916 Name, &CGM.getModule()); 5917 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5918 Fn->setDoesNotRecurse(); 5919 CodeGenFunction CGF(CGM); 5920 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5921 Address PrivateAddr = CGF.EmitLoadOfPointer( 5922 CGF.GetAddrOfLocalVar(&Param), 5923 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5924 llvm::Value *Size = nullptr; 5925 // If the size of the reduction item is non-constant, load it from global 5926 // threadprivate variable. 5927 if (RCG.getSizes(N).second) { 5928 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5929 CGF, CGM.getContext().getSizeType(), 5930 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5931 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5932 CGM.getContext().getSizeType(), Loc); 5933 } 5934 RCG.emitAggregateType(CGF, N, Size); 5935 LValue OrigLVal; 5936 // If initializer uses initializer from declare reduction construct, emit a 5937 // pointer to the address of the original reduction item (reuired by reduction 5938 // initializer) 5939 if (RCG.usesReductionInitializer(N)) { 5940 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5941 SharedAddr = CGF.EmitLoadOfPointer( 5942 SharedAddr, 5943 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5944 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5945 } else { 5946 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5947 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5948 CGM.getContext().VoidTy); 5949 } 5950 // Emit the initializer: 5951 // %0 = bitcast void* %arg to <type>* 5952 // store <type> <init>, <type>* %0 5953 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5954 [](CodeGenFunction &) { return false; }); 5955 CGF.FinishFunction(); 5956 return Fn; 5957 } 5958 5959 /// Emits reduction combiner function: 5960 /// \code 5961 /// void @.red_comb(void* %arg0, void* %arg1) { 5962 /// %lhs = bitcast void* %arg0 to <type>* 5963 /// %rhs = bitcast void* %arg1 to <type>* 5964 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5965 /// store <type> %2, <type>* %lhs 5966 /// ret void 5967 /// } 5968 /// \endcode 5969 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5970 SourceLocation Loc, 5971 ReductionCodeGen &RCG, unsigned N, 5972 const Expr *ReductionOp, 5973 const Expr *LHS, const Expr *RHS, 5974 const Expr *PrivateRef) { 5975 ASTContext &C = CGM.getContext(); 5976 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5977 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5978 FunctionArgList Args; 5979 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5980 C.VoidPtrTy, ImplicitParamDecl::Other); 5981 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5982 ImplicitParamDecl::Other); 5983 Args.emplace_back(&ParamInOut); 5984 Args.emplace_back(&ParamIn); 5985 const auto &FnInfo = 5986 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5987 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5988 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5989 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5990 Name, &CGM.getModule()); 5991 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5992 Fn->setDoesNotRecurse(); 5993 CodeGenFunction CGF(CGM); 5994 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5995 llvm::Value *Size = nullptr; 5996 // If the size of the reduction item is non-constant, load it from global 5997 // threadprivate variable. 5998 if (RCG.getSizes(N).second) { 5999 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6000 CGF, CGM.getContext().getSizeType(), 6001 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6002 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6003 CGM.getContext().getSizeType(), Loc); 6004 } 6005 RCG.emitAggregateType(CGF, N, Size); 6006 // Remap lhs and rhs variables to the addresses of the function arguments. 6007 // %lhs = bitcast void* %arg0 to <type>* 6008 // %rhs = bitcast void* %arg1 to <type>* 6009 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6010 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6011 // Pull out the pointer to the variable. 6012 Address PtrAddr = CGF.EmitLoadOfPointer( 6013 CGF.GetAddrOfLocalVar(&ParamInOut), 6014 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6015 return CGF.Builder.CreateElementBitCast( 6016 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6017 }); 6018 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6019 // Pull out the pointer to the variable. 6020 Address PtrAddr = CGF.EmitLoadOfPointer( 6021 CGF.GetAddrOfLocalVar(&ParamIn), 6022 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6023 return CGF.Builder.CreateElementBitCast( 6024 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6025 }); 6026 PrivateScope.Privatize(); 6027 // Emit the combiner body: 6028 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6029 // store <type> %2, <type>* %lhs 6030 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6031 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6032 cast<DeclRefExpr>(RHS)); 6033 CGF.FinishFunction(); 6034 return Fn; 6035 } 6036 6037 /// Emits reduction finalizer function: 6038 /// \code 6039 /// void @.red_fini(void* %arg) { 6040 /// %0 = bitcast void* %arg to <type>* 6041 /// <destroy>(<type>* %0) 6042 /// ret void 6043 /// } 6044 /// \endcode 6045 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6046 SourceLocation Loc, 6047 ReductionCodeGen &RCG, unsigned N) { 6048 if (!RCG.needCleanups(N)) 6049 return nullptr; 6050 ASTContext &C = CGM.getContext(); 6051 FunctionArgList Args; 6052 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6053 ImplicitParamDecl::Other); 6054 Args.emplace_back(&Param); 6055 const auto &FnInfo = 6056 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6057 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6058 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6059 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6060 Name, &CGM.getModule()); 6061 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6062 Fn->setDoesNotRecurse(); 6063 CodeGenFunction CGF(CGM); 6064 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6065 Address PrivateAddr = CGF.EmitLoadOfPointer( 6066 CGF.GetAddrOfLocalVar(&Param), 6067 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6068 llvm::Value *Size = nullptr; 6069 // If the size of the reduction item is non-constant, load it from global 6070 // threadprivate variable. 6071 if (RCG.getSizes(N).second) { 6072 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6073 CGF, CGM.getContext().getSizeType(), 6074 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6075 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6076 CGM.getContext().getSizeType(), Loc); 6077 } 6078 RCG.emitAggregateType(CGF, N, Size); 6079 // Emit the finalizer body: 6080 // <destroy>(<type>* %0) 6081 RCG.emitCleanups(CGF, N, PrivateAddr); 6082 CGF.FinishFunction(Loc); 6083 return Fn; 6084 } 6085 6086 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6087 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6088 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6089 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6090 return nullptr; 6091 6092 // Build typedef struct: 6093 // kmp_taskred_input { 6094 // void *reduce_shar; // shared reduction item 6095 // void *reduce_orig; // original reduction item used for initialization 6096 // size_t reduce_size; // size of data item 6097 // void *reduce_init; // data initialization routine 6098 // void *reduce_fini; // data finalization routine 6099 // void *reduce_comb; // data combiner routine 6100 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6101 // } kmp_taskred_input_t; 6102 ASTContext &C = CGM.getContext(); 6103 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6104 RD->startDefinition(); 6105 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6106 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6107 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6108 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6109 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6110 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6111 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6112 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6113 RD->completeDefinition(); 6114 QualType RDType = C.getRecordType(RD); 6115 unsigned Size = Data.ReductionVars.size(); 6116 llvm::APInt ArraySize(/*numBits=*/64, Size); 6117 QualType ArrayRDType = C.getConstantArrayType( 6118 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6119 // kmp_task_red_input_t .rd_input.[Size]; 6120 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6121 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6122 Data.ReductionCopies, Data.ReductionOps); 6123 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6124 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6125 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6126 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6127 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6128 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 6129 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6130 ".rd_input.gep."); 6131 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6132 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6133 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6134 RCG.emitSharedOrigLValue(CGF, Cnt); 6135 llvm::Value *CastedShared = 6136 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6137 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6138 // ElemLVal.reduce_orig = &Origs[Cnt]; 6139 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6140 llvm::Value *CastedOrig = 6141 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6142 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6143 RCG.emitAggregateType(CGF, Cnt); 6144 llvm::Value *SizeValInChars; 6145 llvm::Value *SizeVal; 6146 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6147 // We use delayed creation/initialization for VLAs and array sections. It is 6148 // required because runtime does not provide the way to pass the sizes of 6149 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6150 // threadprivate global variables are used to store these values and use 6151 // them in the functions. 6152 bool DelayedCreation = !!SizeVal; 6153 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6154 /*isSigned=*/false); 6155 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6156 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6157 // ElemLVal.reduce_init = init; 6158 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6159 llvm::Value *InitAddr = 6160 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6161 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6162 // ElemLVal.reduce_fini = fini; 6163 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6164 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6165 llvm::Value *FiniAddr = Fini 6166 ? CGF.EmitCastToVoidPtr(Fini) 6167 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6168 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6169 // ElemLVal.reduce_comb = comb; 6170 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6171 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6172 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6173 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6174 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6175 // ElemLVal.flags = 0; 6176 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6177 if (DelayedCreation) { 6178 CGF.EmitStoreOfScalar( 6179 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6180 FlagsLVal); 6181 } else 6182 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6183 FlagsLVal.getType()); 6184 } 6185 if (Data.IsReductionWithTaskMod) { 6186 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6187 // is_ws, int num, void *data); 6188 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6189 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6190 CGM.IntTy, /*isSigned=*/true); 6191 llvm::Value *Args[] = { 6192 IdentTLoc, GTid, 6193 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6194 /*isSigned=*/true), 6195 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6196 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6197 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6198 return CGF.EmitRuntimeCall( 6199 OMPBuilder.getOrCreateRuntimeFunction( 6200 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6201 Args); 6202 } 6203 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6204 llvm::Value *Args[] = { 6205 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6206 /*isSigned=*/true), 6207 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6208 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6209 CGM.VoidPtrTy)}; 6210 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6211 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6212 Args); 6213 } 6214 6215 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6216 SourceLocation Loc, 6217 bool IsWorksharingReduction) { 6218 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6219 // is_ws, int num, void *data); 6220 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6221 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6222 CGM.IntTy, /*isSigned=*/true); 6223 llvm::Value *Args[] = {IdentTLoc, GTid, 6224 llvm::ConstantInt::get(CGM.IntTy, 6225 IsWorksharingReduction ? 1 : 0, 6226 /*isSigned=*/true)}; 6227 (void)CGF.EmitRuntimeCall( 6228 OMPBuilder.getOrCreateRuntimeFunction( 6229 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6230 Args); 6231 } 6232 6233 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6234 SourceLocation Loc, 6235 ReductionCodeGen &RCG, 6236 unsigned N) { 6237 auto Sizes = RCG.getSizes(N); 6238 // Emit threadprivate global variable if the type is non-constant 6239 // (Sizes.second = nullptr). 6240 if (Sizes.second) { 6241 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6242 /*isSigned=*/false); 6243 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6244 CGF, CGM.getContext().getSizeType(), 6245 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6246 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6247 } 6248 } 6249 6250 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6251 SourceLocation Loc, 6252 llvm::Value *ReductionsPtr, 6253 LValue SharedLVal) { 6254 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6255 // *d); 6256 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6257 CGM.IntTy, 6258 /*isSigned=*/true), 6259 ReductionsPtr, 6260 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6261 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6262 return Address( 6263 CGF.EmitRuntimeCall( 6264 OMPBuilder.getOrCreateRuntimeFunction( 6265 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6266 Args), 6267 SharedLVal.getAlignment()); 6268 } 6269 6270 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6271 const OMPTaskDataTy &Data) { 6272 if (!CGF.HaveInsertPoint()) 6273 return; 6274 6275 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6276 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6277 OMPBuilder.createTaskwait(CGF.Builder); 6278 } else { 6279 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6280 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6281 auto &M = CGM.getModule(); 6282 Address DependenciesArray = Address::invalid(); 6283 llvm::Value *NumOfElements; 6284 std::tie(NumOfElements, DependenciesArray) = 6285 emitDependClause(CGF, Data.Dependences, Loc); 6286 llvm::Value *DepWaitTaskArgs[6]; 6287 if (!Data.Dependences.empty()) { 6288 DepWaitTaskArgs[0] = UpLoc; 6289 DepWaitTaskArgs[1] = ThreadID; 6290 DepWaitTaskArgs[2] = NumOfElements; 6291 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6292 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6293 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6294 6295 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6296 6297 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6298 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6299 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6300 // is specified. 6301 CGF.EmitRuntimeCall( 6302 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6303 DepWaitTaskArgs); 6304 6305 } else { 6306 6307 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6308 // global_tid); 6309 llvm::Value *Args[] = {UpLoc, ThreadID}; 6310 // Ignore return result until untied tasks are supported. 6311 CGF.EmitRuntimeCall( 6312 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6313 Args); 6314 } 6315 } 6316 6317 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6318 Region->emitUntiedSwitch(CGF); 6319 } 6320 6321 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6322 OpenMPDirectiveKind InnerKind, 6323 const RegionCodeGenTy &CodeGen, 6324 bool HasCancel) { 6325 if (!CGF.HaveInsertPoint()) 6326 return; 6327 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6328 InnerKind != OMPD_critical && 6329 InnerKind != OMPD_master && 6330 InnerKind != OMPD_masked); 6331 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6332 } 6333 6334 namespace { 6335 enum RTCancelKind { 6336 CancelNoreq = 0, 6337 CancelParallel = 1, 6338 CancelLoop = 2, 6339 CancelSections = 3, 6340 CancelTaskgroup = 4 6341 }; 6342 } // anonymous namespace 6343 6344 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6345 RTCancelKind CancelKind = CancelNoreq; 6346 if (CancelRegion == OMPD_parallel) 6347 CancelKind = CancelParallel; 6348 else if (CancelRegion == OMPD_for) 6349 CancelKind = CancelLoop; 6350 else if (CancelRegion == OMPD_sections) 6351 CancelKind = CancelSections; 6352 else { 6353 assert(CancelRegion == OMPD_taskgroup); 6354 CancelKind = CancelTaskgroup; 6355 } 6356 return CancelKind; 6357 } 6358 6359 void CGOpenMPRuntime::emitCancellationPointCall( 6360 CodeGenFunction &CGF, SourceLocation Loc, 6361 OpenMPDirectiveKind CancelRegion) { 6362 if (!CGF.HaveInsertPoint()) 6363 return; 6364 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6365 // global_tid, kmp_int32 cncl_kind); 6366 if (auto *OMPRegionInfo = 6367 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6368 // For 'cancellation point taskgroup', the task region info may not have a 6369 // cancel. This may instead happen in another adjacent task. 6370 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6371 llvm::Value *Args[] = { 6372 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6373 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6374 // Ignore return result until untied tasks are supported. 6375 llvm::Value *Result = CGF.EmitRuntimeCall( 6376 OMPBuilder.getOrCreateRuntimeFunction( 6377 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6378 Args); 6379 // if (__kmpc_cancellationpoint()) { 6380 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6381 // exit from construct; 6382 // } 6383 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6384 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6385 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6386 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6387 CGF.EmitBlock(ExitBB); 6388 if (CancelRegion == OMPD_parallel) 6389 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6390 // exit from construct; 6391 CodeGenFunction::JumpDest CancelDest = 6392 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6393 CGF.EmitBranchThroughCleanup(CancelDest); 6394 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6395 } 6396 } 6397 } 6398 6399 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6400 const Expr *IfCond, 6401 OpenMPDirectiveKind CancelRegion) { 6402 if (!CGF.HaveInsertPoint()) 6403 return; 6404 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6405 // kmp_int32 cncl_kind); 6406 auto &M = CGM.getModule(); 6407 if (auto *OMPRegionInfo = 6408 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6409 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6410 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6411 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6412 llvm::Value *Args[] = { 6413 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6414 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6415 // Ignore return result until untied tasks are supported. 6416 llvm::Value *Result = CGF.EmitRuntimeCall( 6417 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6418 // if (__kmpc_cancel()) { 6419 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6420 // exit from construct; 6421 // } 6422 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6423 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6424 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6425 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6426 CGF.EmitBlock(ExitBB); 6427 if (CancelRegion == OMPD_parallel) 6428 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6429 // exit from construct; 6430 CodeGenFunction::JumpDest CancelDest = 6431 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6432 CGF.EmitBranchThroughCleanup(CancelDest); 6433 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6434 }; 6435 if (IfCond) { 6436 emitIfClause(CGF, IfCond, ThenGen, 6437 [](CodeGenFunction &, PrePostActionTy &) {}); 6438 } else { 6439 RegionCodeGenTy ThenRCG(ThenGen); 6440 ThenRCG(CGF); 6441 } 6442 } 6443 } 6444 6445 namespace { 6446 /// Cleanup action for uses_allocators support. 6447 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6448 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6449 6450 public: 6451 OMPUsesAllocatorsActionTy( 6452 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6453 : Allocators(Allocators) {} 6454 void Enter(CodeGenFunction &CGF) override { 6455 if (!CGF.HaveInsertPoint()) 6456 return; 6457 for (const auto &AllocatorData : Allocators) { 6458 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6459 CGF, AllocatorData.first, AllocatorData.second); 6460 } 6461 } 6462 void Exit(CodeGenFunction &CGF) override { 6463 if (!CGF.HaveInsertPoint()) 6464 return; 6465 for (const auto &AllocatorData : Allocators) { 6466 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6467 AllocatorData.first); 6468 } 6469 } 6470 }; 6471 } // namespace 6472 6473 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6474 const OMPExecutableDirective &D, StringRef ParentName, 6475 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6476 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6477 assert(!ParentName.empty() && "Invalid target region parent name!"); 6478 HasEmittedTargetRegion = true; 6479 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6480 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6481 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6482 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6483 if (!D.AllocatorTraits) 6484 continue; 6485 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6486 } 6487 } 6488 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6489 CodeGen.setAction(UsesAllocatorAction); 6490 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6491 IsOffloadEntry, CodeGen); 6492 } 6493 6494 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6495 const Expr *Allocator, 6496 const Expr *AllocatorTraits) { 6497 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6498 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6499 // Use default memspace handle. 6500 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6501 llvm::Value *NumTraits = llvm::ConstantInt::get( 6502 CGF.IntTy, cast<ConstantArrayType>( 6503 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6504 ->getSize() 6505 .getLimitedValue()); 6506 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6507 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6508 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6509 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6510 AllocatorTraitsLVal.getBaseInfo(), 6511 AllocatorTraitsLVal.getTBAAInfo()); 6512 llvm::Value *Traits = 6513 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6514 6515 llvm::Value *AllocatorVal = 6516 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6517 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6518 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6519 // Store to allocator. 6520 CGF.EmitVarDecl(*cast<VarDecl>( 6521 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6522 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6523 AllocatorVal = 6524 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6525 Allocator->getType(), Allocator->getExprLoc()); 6526 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6527 } 6528 6529 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6530 const Expr *Allocator) { 6531 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6532 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6533 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6534 llvm::Value *AllocatorVal = 6535 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6536 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6537 CGF.getContext().VoidPtrTy, 6538 Allocator->getExprLoc()); 6539 (void)CGF.EmitRuntimeCall( 6540 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6541 OMPRTL___kmpc_destroy_allocator), 6542 {ThreadId, AllocatorVal}); 6543 } 6544 6545 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6546 const OMPExecutableDirective &D, StringRef ParentName, 6547 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6548 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6549 // Create a unique name for the entry function using the source location 6550 // information of the current target region. The name will be something like: 6551 // 6552 // __omp_offloading_DD_FFFF_PP_lBB 6553 // 6554 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6555 // mangled name of the function that encloses the target region and BB is the 6556 // line number of the target region. 6557 6558 unsigned DeviceID; 6559 unsigned FileID; 6560 unsigned Line; 6561 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6562 Line); 6563 SmallString<64> EntryFnName; 6564 { 6565 llvm::raw_svector_ostream OS(EntryFnName); 6566 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6567 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6568 } 6569 6570 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6571 6572 CodeGenFunction CGF(CGM, true); 6573 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6574 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6575 6576 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6577 6578 // If this target outline function is not an offload entry, we don't need to 6579 // register it. 6580 if (!IsOffloadEntry) 6581 return; 6582 6583 // The target region ID is used by the runtime library to identify the current 6584 // target region, so it only has to be unique and not necessarily point to 6585 // anything. It could be the pointer to the outlined function that implements 6586 // the target region, but we aren't using that so that the compiler doesn't 6587 // need to keep that, and could therefore inline the host function if proven 6588 // worthwhile during optimization. In the other hand, if emitting code for the 6589 // device, the ID has to be the function address so that it can retrieved from 6590 // the offloading entry and launched by the runtime library. We also mark the 6591 // outlined function to have external linkage in case we are emitting code for 6592 // the device, because these functions will be entry points to the device. 6593 6594 if (CGM.getLangOpts().OpenMPIsDevice) { 6595 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6596 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6597 OutlinedFn->setDSOLocal(false); 6598 if (CGM.getTriple().isAMDGCN()) 6599 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6600 } else { 6601 std::string Name = getName({EntryFnName, "region_id"}); 6602 OutlinedFnID = new llvm::GlobalVariable( 6603 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6604 llvm::GlobalValue::WeakAnyLinkage, 6605 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6606 } 6607 6608 // Register the information for the entry associated with this target region. 6609 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6610 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6611 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6612 6613 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6614 int32_t DefaultValTeams = -1; 6615 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6616 if (DefaultValTeams > 0) { 6617 OutlinedFn->addFnAttr("omp_target_num_teams", 6618 std::to_string(DefaultValTeams)); 6619 } 6620 int32_t DefaultValThreads = -1; 6621 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6622 if (DefaultValThreads > 0) { 6623 OutlinedFn->addFnAttr("omp_target_thread_limit", 6624 std::to_string(DefaultValThreads)); 6625 } 6626 6627 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6628 } 6629 6630 /// Checks if the expression is constant or does not have non-trivial function 6631 /// calls. 6632 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6633 // We can skip constant expressions. 6634 // We can skip expressions with trivial calls or simple expressions. 6635 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6636 !E->hasNonTrivialCall(Ctx)) && 6637 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6638 } 6639 6640 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6641 const Stmt *Body) { 6642 const Stmt *Child = Body->IgnoreContainers(); 6643 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6644 Child = nullptr; 6645 for (const Stmt *S : C->body()) { 6646 if (const auto *E = dyn_cast<Expr>(S)) { 6647 if (isTrivial(Ctx, E)) 6648 continue; 6649 } 6650 // Some of the statements can be ignored. 6651 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6652 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6653 continue; 6654 // Analyze declarations. 6655 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6656 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6657 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6658 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6659 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6660 isa<UsingDirectiveDecl>(D) || 6661 isa<OMPDeclareReductionDecl>(D) || 6662 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6663 return true; 6664 const auto *VD = dyn_cast<VarDecl>(D); 6665 if (!VD) 6666 return false; 6667 return VD->hasGlobalStorage() || !VD->isUsed(); 6668 })) 6669 continue; 6670 } 6671 // Found multiple children - cannot get the one child only. 6672 if (Child) 6673 return nullptr; 6674 Child = S; 6675 } 6676 if (Child) 6677 Child = Child->IgnoreContainers(); 6678 } 6679 return Child; 6680 } 6681 6682 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6683 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6684 int32_t &DefaultVal) { 6685 6686 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6687 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6688 "Expected target-based executable directive."); 6689 switch (DirectiveKind) { 6690 case OMPD_target: { 6691 const auto *CS = D.getInnermostCapturedStmt(); 6692 const auto *Body = 6693 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6694 const Stmt *ChildStmt = 6695 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6696 if (const auto *NestedDir = 6697 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6698 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6699 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6700 const Expr *NumTeams = 6701 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6702 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6703 if (auto Constant = 6704 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6705 DefaultVal = Constant->getExtValue(); 6706 return NumTeams; 6707 } 6708 DefaultVal = 0; 6709 return nullptr; 6710 } 6711 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6712 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6713 DefaultVal = 1; 6714 return nullptr; 6715 } 6716 DefaultVal = 1; 6717 return nullptr; 6718 } 6719 // A value of -1 is used to check if we need to emit no teams region 6720 DefaultVal = -1; 6721 return nullptr; 6722 } 6723 case OMPD_target_teams: 6724 case OMPD_target_teams_distribute: 6725 case OMPD_target_teams_distribute_simd: 6726 case OMPD_target_teams_distribute_parallel_for: 6727 case OMPD_target_teams_distribute_parallel_for_simd: { 6728 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6729 const Expr *NumTeams = 6730 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6731 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6732 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6733 DefaultVal = Constant->getExtValue(); 6734 return NumTeams; 6735 } 6736 DefaultVal = 0; 6737 return nullptr; 6738 } 6739 case OMPD_target_parallel: 6740 case OMPD_target_parallel_for: 6741 case OMPD_target_parallel_for_simd: 6742 case OMPD_target_simd: 6743 DefaultVal = 1; 6744 return nullptr; 6745 case OMPD_parallel: 6746 case OMPD_for: 6747 case OMPD_parallel_for: 6748 case OMPD_parallel_master: 6749 case OMPD_parallel_sections: 6750 case OMPD_for_simd: 6751 case OMPD_parallel_for_simd: 6752 case OMPD_cancel: 6753 case OMPD_cancellation_point: 6754 case OMPD_ordered: 6755 case OMPD_threadprivate: 6756 case OMPD_allocate: 6757 case OMPD_task: 6758 case OMPD_simd: 6759 case OMPD_tile: 6760 case OMPD_unroll: 6761 case OMPD_sections: 6762 case OMPD_section: 6763 case OMPD_single: 6764 case OMPD_master: 6765 case OMPD_critical: 6766 case OMPD_taskyield: 6767 case OMPD_barrier: 6768 case OMPD_taskwait: 6769 case OMPD_taskgroup: 6770 case OMPD_atomic: 6771 case OMPD_flush: 6772 case OMPD_depobj: 6773 case OMPD_scan: 6774 case OMPD_teams: 6775 case OMPD_target_data: 6776 case OMPD_target_exit_data: 6777 case OMPD_target_enter_data: 6778 case OMPD_distribute: 6779 case OMPD_distribute_simd: 6780 case OMPD_distribute_parallel_for: 6781 case OMPD_distribute_parallel_for_simd: 6782 case OMPD_teams_distribute: 6783 case OMPD_teams_distribute_simd: 6784 case OMPD_teams_distribute_parallel_for: 6785 case OMPD_teams_distribute_parallel_for_simd: 6786 case OMPD_target_update: 6787 case OMPD_declare_simd: 6788 case OMPD_declare_variant: 6789 case OMPD_begin_declare_variant: 6790 case OMPD_end_declare_variant: 6791 case OMPD_declare_target: 6792 case OMPD_end_declare_target: 6793 case OMPD_declare_reduction: 6794 case OMPD_declare_mapper: 6795 case OMPD_taskloop: 6796 case OMPD_taskloop_simd: 6797 case OMPD_master_taskloop: 6798 case OMPD_master_taskloop_simd: 6799 case OMPD_parallel_master_taskloop: 6800 case OMPD_parallel_master_taskloop_simd: 6801 case OMPD_requires: 6802 case OMPD_metadirective: 6803 case OMPD_unknown: 6804 break; 6805 default: 6806 break; 6807 } 6808 llvm_unreachable("Unexpected directive kind."); 6809 } 6810 6811 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6812 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6813 assert(!CGF.getLangOpts().OpenMPIsDevice && 6814 "Clauses associated with the teams directive expected to be emitted " 6815 "only for the host!"); 6816 CGBuilderTy &Bld = CGF.Builder; 6817 int32_t DefaultNT = -1; 6818 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6819 if (NumTeams != nullptr) { 6820 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6821 6822 switch (DirectiveKind) { 6823 case OMPD_target: { 6824 const auto *CS = D.getInnermostCapturedStmt(); 6825 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6826 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6827 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6828 /*IgnoreResultAssign*/ true); 6829 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6830 /*isSigned=*/true); 6831 } 6832 case OMPD_target_teams: 6833 case OMPD_target_teams_distribute: 6834 case OMPD_target_teams_distribute_simd: 6835 case OMPD_target_teams_distribute_parallel_for: 6836 case OMPD_target_teams_distribute_parallel_for_simd: { 6837 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6838 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6839 /*IgnoreResultAssign*/ true); 6840 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6841 /*isSigned=*/true); 6842 } 6843 default: 6844 break; 6845 } 6846 } else if (DefaultNT == -1) { 6847 return nullptr; 6848 } 6849 6850 return Bld.getInt32(DefaultNT); 6851 } 6852 6853 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6854 llvm::Value *DefaultThreadLimitVal) { 6855 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6856 CGF.getContext(), CS->getCapturedStmt()); 6857 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6858 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6859 llvm::Value *NumThreads = nullptr; 6860 llvm::Value *CondVal = nullptr; 6861 // Handle if clause. If if clause present, the number of threads is 6862 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6863 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6864 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6865 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6866 const OMPIfClause *IfClause = nullptr; 6867 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6868 if (C->getNameModifier() == OMPD_unknown || 6869 C->getNameModifier() == OMPD_parallel) { 6870 IfClause = C; 6871 break; 6872 } 6873 } 6874 if (IfClause) { 6875 const Expr *Cond = IfClause->getCondition(); 6876 bool Result; 6877 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6878 if (!Result) 6879 return CGF.Builder.getInt32(1); 6880 } else { 6881 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6882 if (const auto *PreInit = 6883 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6884 for (const auto *I : PreInit->decls()) { 6885 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6886 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6887 } else { 6888 CodeGenFunction::AutoVarEmission Emission = 6889 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6890 CGF.EmitAutoVarCleanups(Emission); 6891 } 6892 } 6893 } 6894 CondVal = CGF.EvaluateExprAsBool(Cond); 6895 } 6896 } 6897 } 6898 // Check the value of num_threads clause iff if clause was not specified 6899 // or is not evaluated to false. 6900 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6901 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6902 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6903 const auto *NumThreadsClause = 6904 Dir->getSingleClause<OMPNumThreadsClause>(); 6905 CodeGenFunction::LexicalScope Scope( 6906 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6907 if (const auto *PreInit = 6908 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6909 for (const auto *I : PreInit->decls()) { 6910 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6911 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6912 } else { 6913 CodeGenFunction::AutoVarEmission Emission = 6914 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6915 CGF.EmitAutoVarCleanups(Emission); 6916 } 6917 } 6918 } 6919 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6920 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6921 /*isSigned=*/false); 6922 if (DefaultThreadLimitVal) 6923 NumThreads = CGF.Builder.CreateSelect( 6924 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6925 DefaultThreadLimitVal, NumThreads); 6926 } else { 6927 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6928 : CGF.Builder.getInt32(0); 6929 } 6930 // Process condition of the if clause. 6931 if (CondVal) { 6932 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6933 CGF.Builder.getInt32(1)); 6934 } 6935 return NumThreads; 6936 } 6937 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6938 return CGF.Builder.getInt32(1); 6939 return DefaultThreadLimitVal; 6940 } 6941 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6942 : CGF.Builder.getInt32(0); 6943 } 6944 6945 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6946 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6947 int32_t &DefaultVal) { 6948 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6949 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6950 "Expected target-based executable directive."); 6951 6952 switch (DirectiveKind) { 6953 case OMPD_target: 6954 // Teams have no clause thread_limit 6955 return nullptr; 6956 case OMPD_target_teams: 6957 case OMPD_target_teams_distribute: 6958 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6959 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6960 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6961 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6962 if (auto Constant = 6963 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6964 DefaultVal = Constant->getExtValue(); 6965 return ThreadLimit; 6966 } 6967 return nullptr; 6968 case OMPD_target_parallel: 6969 case OMPD_target_parallel_for: 6970 case OMPD_target_parallel_for_simd: 6971 case OMPD_target_teams_distribute_parallel_for: 6972 case OMPD_target_teams_distribute_parallel_for_simd: { 6973 Expr *ThreadLimit = nullptr; 6974 Expr *NumThreads = nullptr; 6975 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6976 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6977 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6978 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6979 if (auto Constant = 6980 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6981 DefaultVal = Constant->getExtValue(); 6982 } 6983 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6984 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6985 NumThreads = NumThreadsClause->getNumThreads(); 6986 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6987 if (auto Constant = 6988 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6989 if (Constant->getExtValue() < DefaultVal) { 6990 DefaultVal = Constant->getExtValue(); 6991 ThreadLimit = NumThreads; 6992 } 6993 } 6994 } 6995 } 6996 return ThreadLimit; 6997 } 6998 case OMPD_target_teams_distribute_simd: 6999 case OMPD_target_simd: 7000 DefaultVal = 1; 7001 return nullptr; 7002 case OMPD_parallel: 7003 case OMPD_for: 7004 case OMPD_parallel_for: 7005 case OMPD_parallel_master: 7006 case OMPD_parallel_sections: 7007 case OMPD_for_simd: 7008 case OMPD_parallel_for_simd: 7009 case OMPD_cancel: 7010 case OMPD_cancellation_point: 7011 case OMPD_ordered: 7012 case OMPD_threadprivate: 7013 case OMPD_allocate: 7014 case OMPD_task: 7015 case OMPD_simd: 7016 case OMPD_tile: 7017 case OMPD_unroll: 7018 case OMPD_sections: 7019 case OMPD_section: 7020 case OMPD_single: 7021 case OMPD_master: 7022 case OMPD_critical: 7023 case OMPD_taskyield: 7024 case OMPD_barrier: 7025 case OMPD_taskwait: 7026 case OMPD_taskgroup: 7027 case OMPD_atomic: 7028 case OMPD_flush: 7029 case OMPD_depobj: 7030 case OMPD_scan: 7031 case OMPD_teams: 7032 case OMPD_target_data: 7033 case OMPD_target_exit_data: 7034 case OMPD_target_enter_data: 7035 case OMPD_distribute: 7036 case OMPD_distribute_simd: 7037 case OMPD_distribute_parallel_for: 7038 case OMPD_distribute_parallel_for_simd: 7039 case OMPD_teams_distribute: 7040 case OMPD_teams_distribute_simd: 7041 case OMPD_teams_distribute_parallel_for: 7042 case OMPD_teams_distribute_parallel_for_simd: 7043 case OMPD_target_update: 7044 case OMPD_declare_simd: 7045 case OMPD_declare_variant: 7046 case OMPD_begin_declare_variant: 7047 case OMPD_end_declare_variant: 7048 case OMPD_declare_target: 7049 case OMPD_end_declare_target: 7050 case OMPD_declare_reduction: 7051 case OMPD_declare_mapper: 7052 case OMPD_taskloop: 7053 case OMPD_taskloop_simd: 7054 case OMPD_master_taskloop: 7055 case OMPD_master_taskloop_simd: 7056 case OMPD_parallel_master_taskloop: 7057 case OMPD_parallel_master_taskloop_simd: 7058 case OMPD_requires: 7059 case OMPD_unknown: 7060 break; 7061 default: 7062 break; 7063 } 7064 llvm_unreachable("Unsupported directive kind."); 7065 } 7066 7067 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7068 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7069 assert(!CGF.getLangOpts().OpenMPIsDevice && 7070 "Clauses associated with the teams directive expected to be emitted " 7071 "only for the host!"); 7072 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7073 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7074 "Expected target-based executable directive."); 7075 CGBuilderTy &Bld = CGF.Builder; 7076 llvm::Value *ThreadLimitVal = nullptr; 7077 llvm::Value *NumThreadsVal = nullptr; 7078 switch (DirectiveKind) { 7079 case OMPD_target: { 7080 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7081 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7082 return NumThreads; 7083 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7084 CGF.getContext(), CS->getCapturedStmt()); 7085 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7086 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7087 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7088 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7089 const auto *ThreadLimitClause = 7090 Dir->getSingleClause<OMPThreadLimitClause>(); 7091 CodeGenFunction::LexicalScope Scope( 7092 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7093 if (const auto *PreInit = 7094 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7095 for (const auto *I : PreInit->decls()) { 7096 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7097 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7098 } else { 7099 CodeGenFunction::AutoVarEmission Emission = 7100 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7101 CGF.EmitAutoVarCleanups(Emission); 7102 } 7103 } 7104 } 7105 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7106 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7107 ThreadLimitVal = 7108 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7109 } 7110 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7111 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7112 CS = Dir->getInnermostCapturedStmt(); 7113 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7114 CGF.getContext(), CS->getCapturedStmt()); 7115 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7116 } 7117 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7118 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7119 CS = Dir->getInnermostCapturedStmt(); 7120 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7121 return NumThreads; 7122 } 7123 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7124 return Bld.getInt32(1); 7125 } 7126 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7127 } 7128 case OMPD_target_teams: { 7129 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7130 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7131 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7132 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7133 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7134 ThreadLimitVal = 7135 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7136 } 7137 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7138 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7139 return NumThreads; 7140 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7141 CGF.getContext(), CS->getCapturedStmt()); 7142 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7143 if (Dir->getDirectiveKind() == OMPD_distribute) { 7144 CS = Dir->getInnermostCapturedStmt(); 7145 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7146 return NumThreads; 7147 } 7148 } 7149 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7150 } 7151 case OMPD_target_teams_distribute: 7152 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7153 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7154 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7155 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7156 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7157 ThreadLimitVal = 7158 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7159 } 7160 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7161 case OMPD_target_parallel: 7162 case OMPD_target_parallel_for: 7163 case OMPD_target_parallel_for_simd: 7164 case OMPD_target_teams_distribute_parallel_for: 7165 case OMPD_target_teams_distribute_parallel_for_simd: { 7166 llvm::Value *CondVal = nullptr; 7167 // Handle if clause. If if clause present, the number of threads is 7168 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7169 if (D.hasClausesOfKind<OMPIfClause>()) { 7170 const OMPIfClause *IfClause = nullptr; 7171 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7172 if (C->getNameModifier() == OMPD_unknown || 7173 C->getNameModifier() == OMPD_parallel) { 7174 IfClause = C; 7175 break; 7176 } 7177 } 7178 if (IfClause) { 7179 const Expr *Cond = IfClause->getCondition(); 7180 bool Result; 7181 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7182 if (!Result) 7183 return Bld.getInt32(1); 7184 } else { 7185 CodeGenFunction::RunCleanupsScope Scope(CGF); 7186 CondVal = CGF.EvaluateExprAsBool(Cond); 7187 } 7188 } 7189 } 7190 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7191 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7192 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7193 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7194 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7195 ThreadLimitVal = 7196 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7197 } 7198 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7199 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7200 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7201 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7202 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7203 NumThreadsVal = 7204 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7205 ThreadLimitVal = ThreadLimitVal 7206 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7207 ThreadLimitVal), 7208 NumThreadsVal, ThreadLimitVal) 7209 : NumThreadsVal; 7210 } 7211 if (!ThreadLimitVal) 7212 ThreadLimitVal = Bld.getInt32(0); 7213 if (CondVal) 7214 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7215 return ThreadLimitVal; 7216 } 7217 case OMPD_target_teams_distribute_simd: 7218 case OMPD_target_simd: 7219 return Bld.getInt32(1); 7220 case OMPD_parallel: 7221 case OMPD_for: 7222 case OMPD_parallel_for: 7223 case OMPD_parallel_master: 7224 case OMPD_parallel_sections: 7225 case OMPD_for_simd: 7226 case OMPD_parallel_for_simd: 7227 case OMPD_cancel: 7228 case OMPD_cancellation_point: 7229 case OMPD_ordered: 7230 case OMPD_threadprivate: 7231 case OMPD_allocate: 7232 case OMPD_task: 7233 case OMPD_simd: 7234 case OMPD_tile: 7235 case OMPD_unroll: 7236 case OMPD_sections: 7237 case OMPD_section: 7238 case OMPD_single: 7239 case OMPD_master: 7240 case OMPD_critical: 7241 case OMPD_taskyield: 7242 case OMPD_barrier: 7243 case OMPD_taskwait: 7244 case OMPD_taskgroup: 7245 case OMPD_atomic: 7246 case OMPD_flush: 7247 case OMPD_depobj: 7248 case OMPD_scan: 7249 case OMPD_teams: 7250 case OMPD_target_data: 7251 case OMPD_target_exit_data: 7252 case OMPD_target_enter_data: 7253 case OMPD_distribute: 7254 case OMPD_distribute_simd: 7255 case OMPD_distribute_parallel_for: 7256 case OMPD_distribute_parallel_for_simd: 7257 case OMPD_teams_distribute: 7258 case OMPD_teams_distribute_simd: 7259 case OMPD_teams_distribute_parallel_for: 7260 case OMPD_teams_distribute_parallel_for_simd: 7261 case OMPD_target_update: 7262 case OMPD_declare_simd: 7263 case OMPD_declare_variant: 7264 case OMPD_begin_declare_variant: 7265 case OMPD_end_declare_variant: 7266 case OMPD_declare_target: 7267 case OMPD_end_declare_target: 7268 case OMPD_declare_reduction: 7269 case OMPD_declare_mapper: 7270 case OMPD_taskloop: 7271 case OMPD_taskloop_simd: 7272 case OMPD_master_taskloop: 7273 case OMPD_master_taskloop_simd: 7274 case OMPD_parallel_master_taskloop: 7275 case OMPD_parallel_master_taskloop_simd: 7276 case OMPD_requires: 7277 case OMPD_metadirective: 7278 case OMPD_unknown: 7279 break; 7280 default: 7281 break; 7282 } 7283 llvm_unreachable("Unsupported directive kind."); 7284 } 7285 7286 namespace { 7287 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7288 7289 // Utility to handle information from clauses associated with a given 7290 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7291 // It provides a convenient interface to obtain the information and generate 7292 // code for that information. 7293 class MappableExprsHandler { 7294 public: 7295 /// Values for bit flags used to specify the mapping type for 7296 /// offloading. 7297 enum OpenMPOffloadMappingFlags : uint64_t { 7298 /// No flags 7299 OMP_MAP_NONE = 0x0, 7300 /// Allocate memory on the device and move data from host to device. 7301 OMP_MAP_TO = 0x01, 7302 /// Allocate memory on the device and move data from device to host. 7303 OMP_MAP_FROM = 0x02, 7304 /// Always perform the requested mapping action on the element, even 7305 /// if it was already mapped before. 7306 OMP_MAP_ALWAYS = 0x04, 7307 /// Delete the element from the device environment, ignoring the 7308 /// current reference count associated with the element. 7309 OMP_MAP_DELETE = 0x08, 7310 /// The element being mapped is a pointer-pointee pair; both the 7311 /// pointer and the pointee should be mapped. 7312 OMP_MAP_PTR_AND_OBJ = 0x10, 7313 /// This flags signals that the base address of an entry should be 7314 /// passed to the target kernel as an argument. 7315 OMP_MAP_TARGET_PARAM = 0x20, 7316 /// Signal that the runtime library has to return the device pointer 7317 /// in the current position for the data being mapped. Used when we have the 7318 /// use_device_ptr or use_device_addr clause. 7319 OMP_MAP_RETURN_PARAM = 0x40, 7320 /// This flag signals that the reference being passed is a pointer to 7321 /// private data. 7322 OMP_MAP_PRIVATE = 0x80, 7323 /// Pass the element to the device by value. 7324 OMP_MAP_LITERAL = 0x100, 7325 /// Implicit map 7326 OMP_MAP_IMPLICIT = 0x200, 7327 /// Close is a hint to the runtime to allocate memory close to 7328 /// the target device. 7329 OMP_MAP_CLOSE = 0x400, 7330 /// 0x800 is reserved for compatibility with XLC. 7331 /// Produce a runtime error if the data is not already allocated. 7332 OMP_MAP_PRESENT = 0x1000, 7333 // Increment and decrement a separate reference counter so that the data 7334 // cannot be unmapped within the associated region. Thus, this flag is 7335 // intended to be used on 'target' and 'target data' directives because they 7336 // are inherently structured. It is not intended to be used on 'target 7337 // enter data' and 'target exit data' directives because they are inherently 7338 // dynamic. 7339 // This is an OpenMP extension for the sake of OpenACC support. 7340 OMP_MAP_OMPX_HOLD = 0x2000, 7341 /// Signal that the runtime library should use args as an array of 7342 /// descriptor_dim pointers and use args_size as dims. Used when we have 7343 /// non-contiguous list items in target update directive 7344 OMP_MAP_NON_CONTIG = 0x100000000000, 7345 /// The 16 MSBs of the flags indicate whether the entry is member of some 7346 /// struct/class. 7347 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7348 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7349 }; 7350 7351 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7352 static unsigned getFlagMemberOffset() { 7353 unsigned Offset = 0; 7354 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7355 Remain = Remain >> 1) 7356 Offset++; 7357 return Offset; 7358 } 7359 7360 /// Class that holds debugging information for a data mapping to be passed to 7361 /// the runtime library. 7362 class MappingExprInfo { 7363 /// The variable declaration used for the data mapping. 7364 const ValueDecl *MapDecl = nullptr; 7365 /// The original expression used in the map clause, or null if there is 7366 /// none. 7367 const Expr *MapExpr = nullptr; 7368 7369 public: 7370 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7371 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7372 7373 const ValueDecl *getMapDecl() const { return MapDecl; } 7374 const Expr *getMapExpr() const { return MapExpr; } 7375 }; 7376 7377 /// Class that associates information with a base pointer to be passed to the 7378 /// runtime library. 7379 class BasePointerInfo { 7380 /// The base pointer. 7381 llvm::Value *Ptr = nullptr; 7382 /// The base declaration that refers to this device pointer, or null if 7383 /// there is none. 7384 const ValueDecl *DevPtrDecl = nullptr; 7385 7386 public: 7387 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7388 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7389 llvm::Value *operator*() const { return Ptr; } 7390 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7391 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7392 }; 7393 7394 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7395 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7396 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7397 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7398 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7399 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7400 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7401 7402 /// This structure contains combined information generated for mappable 7403 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7404 /// mappers, and non-contiguous information. 7405 struct MapCombinedInfoTy { 7406 struct StructNonContiguousInfo { 7407 bool IsNonContiguous = false; 7408 MapDimArrayTy Dims; 7409 MapNonContiguousArrayTy Offsets; 7410 MapNonContiguousArrayTy Counts; 7411 MapNonContiguousArrayTy Strides; 7412 }; 7413 MapExprsArrayTy Exprs; 7414 MapBaseValuesArrayTy BasePointers; 7415 MapValuesArrayTy Pointers; 7416 MapValuesArrayTy Sizes; 7417 MapFlagsArrayTy Types; 7418 MapMappersArrayTy Mappers; 7419 StructNonContiguousInfo NonContigInfo; 7420 7421 /// Append arrays in \a CurInfo. 7422 void append(MapCombinedInfoTy &CurInfo) { 7423 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7424 BasePointers.append(CurInfo.BasePointers.begin(), 7425 CurInfo.BasePointers.end()); 7426 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7427 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7428 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7429 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7430 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7431 CurInfo.NonContigInfo.Dims.end()); 7432 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7433 CurInfo.NonContigInfo.Offsets.end()); 7434 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7435 CurInfo.NonContigInfo.Counts.end()); 7436 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7437 CurInfo.NonContigInfo.Strides.end()); 7438 } 7439 }; 7440 7441 /// Map between a struct and the its lowest & highest elements which have been 7442 /// mapped. 7443 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7444 /// HE(FieldIndex, Pointer)} 7445 struct StructRangeInfoTy { 7446 MapCombinedInfoTy PreliminaryMapData; 7447 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7448 0, Address::invalid()}; 7449 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7450 0, Address::invalid()}; 7451 Address Base = Address::invalid(); 7452 Address LB = Address::invalid(); 7453 bool IsArraySection = false; 7454 bool HasCompleteRecord = false; 7455 }; 7456 7457 private: 7458 /// Kind that defines how a device pointer has to be returned. 7459 struct MapInfo { 7460 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7461 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7462 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7463 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7464 bool ReturnDevicePointer = false; 7465 bool IsImplicit = false; 7466 const ValueDecl *Mapper = nullptr; 7467 const Expr *VarRef = nullptr; 7468 bool ForDeviceAddr = false; 7469 7470 MapInfo() = default; 7471 MapInfo( 7472 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7473 OpenMPMapClauseKind MapType, 7474 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7475 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7476 bool ReturnDevicePointer, bool IsImplicit, 7477 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7478 bool ForDeviceAddr = false) 7479 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7480 MotionModifiers(MotionModifiers), 7481 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7482 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7483 }; 7484 7485 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7486 /// member and there is no map information about it, then emission of that 7487 /// entry is deferred until the whole struct has been processed. 7488 struct DeferredDevicePtrEntryTy { 7489 const Expr *IE = nullptr; 7490 const ValueDecl *VD = nullptr; 7491 bool ForDeviceAddr = false; 7492 7493 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7494 bool ForDeviceAddr) 7495 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7496 }; 7497 7498 /// The target directive from where the mappable clauses were extracted. It 7499 /// is either a executable directive or a user-defined mapper directive. 7500 llvm::PointerUnion<const OMPExecutableDirective *, 7501 const OMPDeclareMapperDecl *> 7502 CurDir; 7503 7504 /// Function the directive is being generated for. 7505 CodeGenFunction &CGF; 7506 7507 /// Set of all first private variables in the current directive. 7508 /// bool data is set to true if the variable is implicitly marked as 7509 /// firstprivate, false otherwise. 7510 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7511 7512 /// Map between device pointer declarations and their expression components. 7513 /// The key value for declarations in 'this' is null. 7514 llvm::DenseMap< 7515 const ValueDecl *, 7516 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7517 DevPointersMap; 7518 7519 /// Map between lambda declarations and their map type. 7520 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7521 7522 llvm::Value *getExprTypeSize(const Expr *E) const { 7523 QualType ExprTy = E->getType().getCanonicalType(); 7524 7525 // Calculate the size for array shaping expression. 7526 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7527 llvm::Value *Size = 7528 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7529 for (const Expr *SE : OAE->getDimensions()) { 7530 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7531 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7532 CGF.getContext().getSizeType(), 7533 SE->getExprLoc()); 7534 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7535 } 7536 return Size; 7537 } 7538 7539 // Reference types are ignored for mapping purposes. 7540 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7541 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7542 7543 // Given that an array section is considered a built-in type, we need to 7544 // do the calculation based on the length of the section instead of relying 7545 // on CGF.getTypeSize(E->getType()). 7546 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7547 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7548 OAE->getBase()->IgnoreParenImpCasts()) 7549 .getCanonicalType(); 7550 7551 // If there is no length associated with the expression and lower bound is 7552 // not specified too, that means we are using the whole length of the 7553 // base. 7554 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7555 !OAE->getLowerBound()) 7556 return CGF.getTypeSize(BaseTy); 7557 7558 llvm::Value *ElemSize; 7559 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7560 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7561 } else { 7562 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7563 assert(ATy && "Expecting array type if not a pointer type."); 7564 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7565 } 7566 7567 // If we don't have a length at this point, that is because we have an 7568 // array section with a single element. 7569 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7570 return ElemSize; 7571 7572 if (const Expr *LenExpr = OAE->getLength()) { 7573 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7574 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7575 CGF.getContext().getSizeType(), 7576 LenExpr->getExprLoc()); 7577 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7578 } 7579 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7580 OAE->getLowerBound() && "expected array_section[lb:]."); 7581 // Size = sizetype - lb * elemtype; 7582 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7583 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7584 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7585 CGF.getContext().getSizeType(), 7586 OAE->getLowerBound()->getExprLoc()); 7587 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7588 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7589 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7590 LengthVal = CGF.Builder.CreateSelect( 7591 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7592 return LengthVal; 7593 } 7594 return CGF.getTypeSize(ExprTy); 7595 } 7596 7597 /// Return the corresponding bits for a given map clause modifier. Add 7598 /// a flag marking the map as a pointer if requested. Add a flag marking the 7599 /// map as the first one of a series of maps that relate to the same map 7600 /// expression. 7601 OpenMPOffloadMappingFlags getMapTypeBits( 7602 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7603 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7604 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7605 OpenMPOffloadMappingFlags Bits = 7606 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7607 switch (MapType) { 7608 case OMPC_MAP_alloc: 7609 case OMPC_MAP_release: 7610 // alloc and release is the default behavior in the runtime library, i.e. 7611 // if we don't pass any bits alloc/release that is what the runtime is 7612 // going to do. Therefore, we don't need to signal anything for these two 7613 // type modifiers. 7614 break; 7615 case OMPC_MAP_to: 7616 Bits |= OMP_MAP_TO; 7617 break; 7618 case OMPC_MAP_from: 7619 Bits |= OMP_MAP_FROM; 7620 break; 7621 case OMPC_MAP_tofrom: 7622 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7623 break; 7624 case OMPC_MAP_delete: 7625 Bits |= OMP_MAP_DELETE; 7626 break; 7627 case OMPC_MAP_unknown: 7628 llvm_unreachable("Unexpected map type!"); 7629 } 7630 if (AddPtrFlag) 7631 Bits |= OMP_MAP_PTR_AND_OBJ; 7632 if (AddIsTargetParamFlag) 7633 Bits |= OMP_MAP_TARGET_PARAM; 7634 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7635 Bits |= OMP_MAP_ALWAYS; 7636 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7637 Bits |= OMP_MAP_CLOSE; 7638 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7639 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7640 Bits |= OMP_MAP_PRESENT; 7641 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7642 Bits |= OMP_MAP_OMPX_HOLD; 7643 if (IsNonContiguous) 7644 Bits |= OMP_MAP_NON_CONTIG; 7645 return Bits; 7646 } 7647 7648 /// Return true if the provided expression is a final array section. A 7649 /// final array section, is one whose length can't be proved to be one. 7650 bool isFinalArraySectionExpression(const Expr *E) const { 7651 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7652 7653 // It is not an array section and therefore not a unity-size one. 7654 if (!OASE) 7655 return false; 7656 7657 // An array section with no colon always refer to a single element. 7658 if (OASE->getColonLocFirst().isInvalid()) 7659 return false; 7660 7661 const Expr *Length = OASE->getLength(); 7662 7663 // If we don't have a length we have to check if the array has size 1 7664 // for this dimension. Also, we should always expect a length if the 7665 // base type is pointer. 7666 if (!Length) { 7667 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7668 OASE->getBase()->IgnoreParenImpCasts()) 7669 .getCanonicalType(); 7670 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7671 return ATy->getSize().getSExtValue() != 1; 7672 // If we don't have a constant dimension length, we have to consider 7673 // the current section as having any size, so it is not necessarily 7674 // unitary. If it happen to be unity size, that's user fault. 7675 return true; 7676 } 7677 7678 // Check if the length evaluates to 1. 7679 Expr::EvalResult Result; 7680 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7681 return true; // Can have more that size 1. 7682 7683 llvm::APSInt ConstLength = Result.Val.getInt(); 7684 return ConstLength.getSExtValue() != 1; 7685 } 7686 7687 /// Generate the base pointers, section pointers, sizes, map type bits, and 7688 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7689 /// map type, map or motion modifiers, and expression components. 7690 /// \a IsFirstComponent should be set to true if the provided set of 7691 /// components is the first associated with a capture. 7692 void generateInfoForComponentList( 7693 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7694 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7695 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7696 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7697 bool IsFirstComponentList, bool IsImplicit, 7698 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7699 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7700 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7701 OverlappedElements = llvm::None) const { 7702 // The following summarizes what has to be generated for each map and the 7703 // types below. The generated information is expressed in this order: 7704 // base pointer, section pointer, size, flags 7705 // (to add to the ones that come from the map type and modifier). 7706 // 7707 // double d; 7708 // int i[100]; 7709 // float *p; 7710 // 7711 // struct S1 { 7712 // int i; 7713 // float f[50]; 7714 // } 7715 // struct S2 { 7716 // int i; 7717 // float f[50]; 7718 // S1 s; 7719 // double *p; 7720 // struct S2 *ps; 7721 // int &ref; 7722 // } 7723 // S2 s; 7724 // S2 *ps; 7725 // 7726 // map(d) 7727 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7728 // 7729 // map(i) 7730 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7731 // 7732 // map(i[1:23]) 7733 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7734 // 7735 // map(p) 7736 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7737 // 7738 // map(p[1:24]) 7739 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7740 // in unified shared memory mode or for local pointers 7741 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7742 // 7743 // map(s) 7744 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7745 // 7746 // map(s.i) 7747 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7748 // 7749 // map(s.s.f) 7750 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7751 // 7752 // map(s.p) 7753 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7754 // 7755 // map(to: s.p[:22]) 7756 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7757 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7758 // &(s.p), &(s.p[0]), 22*sizeof(double), 7759 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7760 // (*) alloc space for struct members, only this is a target parameter 7761 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7762 // optimizes this entry out, same in the examples below) 7763 // (***) map the pointee (map: to) 7764 // 7765 // map(to: s.ref) 7766 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7767 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7768 // (*) alloc space for struct members, only this is a target parameter 7769 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7770 // optimizes this entry out, same in the examples below) 7771 // (***) map the pointee (map: to) 7772 // 7773 // map(s.ps) 7774 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7775 // 7776 // map(from: s.ps->s.i) 7777 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7778 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7779 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7780 // 7781 // map(to: s.ps->ps) 7782 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7783 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7784 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7785 // 7786 // map(s.ps->ps->ps) 7787 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7788 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7789 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7790 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7791 // 7792 // map(to: s.ps->ps->s.f[:22]) 7793 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7794 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7795 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7796 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7797 // 7798 // map(ps) 7799 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7800 // 7801 // map(ps->i) 7802 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7803 // 7804 // map(ps->s.f) 7805 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7806 // 7807 // map(from: ps->p) 7808 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7809 // 7810 // map(to: ps->p[:22]) 7811 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7812 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7813 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7814 // 7815 // map(ps->ps) 7816 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7817 // 7818 // map(from: ps->ps->s.i) 7819 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7820 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7821 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7822 // 7823 // map(from: ps->ps->ps) 7824 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7825 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7826 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7827 // 7828 // map(ps->ps->ps->ps) 7829 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7830 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7831 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7832 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7833 // 7834 // map(to: ps->ps->ps->s.f[:22]) 7835 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7836 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7837 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7838 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7839 // 7840 // map(to: s.f[:22]) map(from: s.p[:33]) 7841 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7842 // sizeof(double*) (**), TARGET_PARAM 7843 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7844 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7845 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7846 // (*) allocate contiguous space needed to fit all mapped members even if 7847 // we allocate space for members not mapped (in this example, 7848 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7849 // them as well because they fall between &s.f[0] and &s.p) 7850 // 7851 // map(from: s.f[:22]) map(to: ps->p[:33]) 7852 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7853 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7854 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7855 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7856 // (*) the struct this entry pertains to is the 2nd element in the list of 7857 // arguments, hence MEMBER_OF(2) 7858 // 7859 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7860 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7861 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7862 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7863 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7864 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7865 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7866 // (*) the struct this entry pertains to is the 4th element in the list 7867 // of arguments, hence MEMBER_OF(4) 7868 7869 // Track if the map information being generated is the first for a capture. 7870 bool IsCaptureFirstInfo = IsFirstComponentList; 7871 // When the variable is on a declare target link or in a to clause with 7872 // unified memory, a reference is needed to hold the host/device address 7873 // of the variable. 7874 bool RequiresReference = false; 7875 7876 // Scan the components from the base to the complete expression. 7877 auto CI = Components.rbegin(); 7878 auto CE = Components.rend(); 7879 auto I = CI; 7880 7881 // Track if the map information being generated is the first for a list of 7882 // components. 7883 bool IsExpressionFirstInfo = true; 7884 bool FirstPointerInComplexData = false; 7885 Address BP = Address::invalid(); 7886 const Expr *AssocExpr = I->getAssociatedExpression(); 7887 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7888 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7889 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7890 7891 if (isa<MemberExpr>(AssocExpr)) { 7892 // The base is the 'this' pointer. The content of the pointer is going 7893 // to be the base of the field being mapped. 7894 BP = CGF.LoadCXXThisAddress(); 7895 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7896 (OASE && 7897 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7898 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7899 } else if (OAShE && 7900 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7901 BP = Address( 7902 CGF.EmitScalarExpr(OAShE->getBase()), 7903 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7904 } else { 7905 // The base is the reference to the variable. 7906 // BP = &Var. 7907 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7908 if (const auto *VD = 7909 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7910 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7911 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7912 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7913 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7914 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7915 RequiresReference = true; 7916 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7917 } 7918 } 7919 } 7920 7921 // If the variable is a pointer and is being dereferenced (i.e. is not 7922 // the last component), the base has to be the pointer itself, not its 7923 // reference. References are ignored for mapping purposes. 7924 QualType Ty = 7925 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7926 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7927 // No need to generate individual map information for the pointer, it 7928 // can be associated with the combined storage if shared memory mode is 7929 // active or the base declaration is not global variable. 7930 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7931 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7932 !VD || VD->hasLocalStorage()) 7933 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7934 else 7935 FirstPointerInComplexData = true; 7936 ++I; 7937 } 7938 } 7939 7940 // Track whether a component of the list should be marked as MEMBER_OF some 7941 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7942 // in a component list should be marked as MEMBER_OF, all subsequent entries 7943 // do not belong to the base struct. E.g. 7944 // struct S2 s; 7945 // s.ps->ps->ps->f[:] 7946 // (1) (2) (3) (4) 7947 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7948 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7949 // is the pointee of ps(2) which is not member of struct s, so it should not 7950 // be marked as such (it is still PTR_AND_OBJ). 7951 // The variable is initialized to false so that PTR_AND_OBJ entries which 7952 // are not struct members are not considered (e.g. array of pointers to 7953 // data). 7954 bool ShouldBeMemberOf = false; 7955 7956 // Variable keeping track of whether or not we have encountered a component 7957 // in the component list which is a member expression. Useful when we have a 7958 // pointer or a final array section, in which case it is the previous 7959 // component in the list which tells us whether we have a member expression. 7960 // E.g. X.f[:] 7961 // While processing the final array section "[:]" it is "f" which tells us 7962 // whether we are dealing with a member of a declared struct. 7963 const MemberExpr *EncounteredME = nullptr; 7964 7965 // Track for the total number of dimension. Start from one for the dummy 7966 // dimension. 7967 uint64_t DimSize = 1; 7968 7969 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7970 bool IsPrevMemberReference = false; 7971 7972 for (; I != CE; ++I) { 7973 // If the current component is member of a struct (parent struct) mark it. 7974 if (!EncounteredME) { 7975 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7976 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7977 // as MEMBER_OF the parent struct. 7978 if (EncounteredME) { 7979 ShouldBeMemberOf = true; 7980 // Do not emit as complex pointer if this is actually not array-like 7981 // expression. 7982 if (FirstPointerInComplexData) { 7983 QualType Ty = std::prev(I) 7984 ->getAssociatedDeclaration() 7985 ->getType() 7986 .getNonReferenceType(); 7987 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7988 FirstPointerInComplexData = false; 7989 } 7990 } 7991 } 7992 7993 auto Next = std::next(I); 7994 7995 // We need to generate the addresses and sizes if this is the last 7996 // component, if the component is a pointer or if it is an array section 7997 // whose length can't be proved to be one. If this is a pointer, it 7998 // becomes the base address for the following components. 7999 8000 // A final array section, is one whose length can't be proved to be one. 8001 // If the map item is non-contiguous then we don't treat any array section 8002 // as final array section. 8003 bool IsFinalArraySection = 8004 !IsNonContiguous && 8005 isFinalArraySectionExpression(I->getAssociatedExpression()); 8006 8007 // If we have a declaration for the mapping use that, otherwise use 8008 // the base declaration of the map clause. 8009 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 8010 ? I->getAssociatedDeclaration() 8011 : BaseDecl; 8012 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 8013 : MapExpr; 8014 8015 // Get information on whether the element is a pointer. Have to do a 8016 // special treatment for array sections given that they are built-in 8017 // types. 8018 const auto *OASE = 8019 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 8020 const auto *OAShE = 8021 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 8022 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 8023 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 8024 bool IsPointer = 8025 OAShE || 8026 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8027 .getCanonicalType() 8028 ->isAnyPointerType()) || 8029 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8030 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 8031 MapDecl && 8032 MapDecl->getType()->isLValueReferenceType(); 8033 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 8034 8035 if (OASE) 8036 ++DimSize; 8037 8038 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8039 IsFinalArraySection) { 8040 // If this is not the last component, we expect the pointer to be 8041 // associated with an array expression or member expression. 8042 assert((Next == CE || 8043 isa<MemberExpr>(Next->getAssociatedExpression()) || 8044 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8045 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8046 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8047 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8048 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8049 "Unexpected expression"); 8050 8051 Address LB = Address::invalid(); 8052 Address LowestElem = Address::invalid(); 8053 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8054 const MemberExpr *E) { 8055 const Expr *BaseExpr = E->getBase(); 8056 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8057 // scalar. 8058 LValue BaseLV; 8059 if (E->isArrow()) { 8060 LValueBaseInfo BaseInfo; 8061 TBAAAccessInfo TBAAInfo; 8062 Address Addr = 8063 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8064 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8065 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8066 } else { 8067 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8068 } 8069 return BaseLV; 8070 }; 8071 if (OAShE) { 8072 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 8073 CGF.getContext().getTypeAlignInChars( 8074 OAShE->getBase()->getType())); 8075 } else if (IsMemberReference) { 8076 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8077 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8078 LowestElem = CGF.EmitLValueForFieldInitialization( 8079 BaseLVal, cast<FieldDecl>(MapDecl)) 8080 .getAddress(CGF); 8081 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8082 .getAddress(CGF); 8083 } else { 8084 LowestElem = LB = 8085 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8086 .getAddress(CGF); 8087 } 8088 8089 // If this component is a pointer inside the base struct then we don't 8090 // need to create any entry for it - it will be combined with the object 8091 // it is pointing to into a single PTR_AND_OBJ entry. 8092 bool IsMemberPointerOrAddr = 8093 EncounteredME && 8094 (((IsPointer || ForDeviceAddr) && 8095 I->getAssociatedExpression() == EncounteredME) || 8096 (IsPrevMemberReference && !IsPointer) || 8097 (IsMemberReference && Next != CE && 8098 !Next->getAssociatedExpression()->getType()->isPointerType())); 8099 if (!OverlappedElements.empty() && Next == CE) { 8100 // Handle base element with the info for overlapped elements. 8101 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8102 assert(!IsPointer && 8103 "Unexpected base element with the pointer type."); 8104 // Mark the whole struct as the struct that requires allocation on the 8105 // device. 8106 PartialStruct.LowestElem = {0, LowestElem}; 8107 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8108 I->getAssociatedExpression()->getType()); 8109 Address HB = CGF.Builder.CreateConstGEP( 8110 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 8111 CGF.VoidPtrTy), 8112 TypeSize.getQuantity() - 1); 8113 PartialStruct.HighestElem = { 8114 std::numeric_limits<decltype( 8115 PartialStruct.HighestElem.first)>::max(), 8116 HB}; 8117 PartialStruct.Base = BP; 8118 PartialStruct.LB = LB; 8119 assert( 8120 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8121 "Overlapped elements must be used only once for the variable."); 8122 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8123 // Emit data for non-overlapped data. 8124 OpenMPOffloadMappingFlags Flags = 8125 OMP_MAP_MEMBER_OF | 8126 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8127 /*AddPtrFlag=*/false, 8128 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8129 llvm::Value *Size = nullptr; 8130 // Do bitcopy of all non-overlapped structure elements. 8131 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8132 Component : OverlappedElements) { 8133 Address ComponentLB = Address::invalid(); 8134 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8135 Component) { 8136 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8137 const auto *FD = dyn_cast<FieldDecl>(VD); 8138 if (FD && FD->getType()->isLValueReferenceType()) { 8139 const auto *ME = 8140 cast<MemberExpr>(MC.getAssociatedExpression()); 8141 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8142 ComponentLB = 8143 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8144 .getAddress(CGF); 8145 } else { 8146 ComponentLB = 8147 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8148 .getAddress(CGF); 8149 } 8150 Size = CGF.Builder.CreatePtrDiff( 8151 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8152 CGF.EmitCastToVoidPtr(LB.getPointer())); 8153 break; 8154 } 8155 } 8156 assert(Size && "Failed to determine structure size"); 8157 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8158 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8159 CombinedInfo.Pointers.push_back(LB.getPointer()); 8160 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8161 Size, CGF.Int64Ty, /*isSigned=*/true)); 8162 CombinedInfo.Types.push_back(Flags); 8163 CombinedInfo.Mappers.push_back(nullptr); 8164 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8165 : 1); 8166 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8167 } 8168 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8169 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8170 CombinedInfo.Pointers.push_back(LB.getPointer()); 8171 Size = CGF.Builder.CreatePtrDiff( 8172 CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8173 CGF.EmitCastToVoidPtr(LB.getPointer())); 8174 CombinedInfo.Sizes.push_back( 8175 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8176 CombinedInfo.Types.push_back(Flags); 8177 CombinedInfo.Mappers.push_back(nullptr); 8178 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8179 : 1); 8180 break; 8181 } 8182 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8183 if (!IsMemberPointerOrAddr || 8184 (Next == CE && MapType != OMPC_MAP_unknown)) { 8185 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8186 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8187 CombinedInfo.Pointers.push_back(LB.getPointer()); 8188 CombinedInfo.Sizes.push_back( 8189 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8190 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8191 : 1); 8192 8193 // If Mapper is valid, the last component inherits the mapper. 8194 bool HasMapper = Mapper && Next == CE; 8195 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8196 8197 // We need to add a pointer flag for each map that comes from the 8198 // same expression except for the first one. We also need to signal 8199 // this map is the first one that relates with the current capture 8200 // (there is a set of entries for each capture). 8201 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8202 MapType, MapModifiers, MotionModifiers, IsImplicit, 8203 !IsExpressionFirstInfo || RequiresReference || 8204 FirstPointerInComplexData || IsMemberReference, 8205 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8206 8207 if (!IsExpressionFirstInfo || IsMemberReference) { 8208 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8209 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8210 if (IsPointer || (IsMemberReference && Next != CE)) 8211 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8212 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8213 8214 if (ShouldBeMemberOf) { 8215 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8216 // should be later updated with the correct value of MEMBER_OF. 8217 Flags |= OMP_MAP_MEMBER_OF; 8218 // From now on, all subsequent PTR_AND_OBJ entries should not be 8219 // marked as MEMBER_OF. 8220 ShouldBeMemberOf = false; 8221 } 8222 } 8223 8224 CombinedInfo.Types.push_back(Flags); 8225 } 8226 8227 // If we have encountered a member expression so far, keep track of the 8228 // mapped member. If the parent is "*this", then the value declaration 8229 // is nullptr. 8230 if (EncounteredME) { 8231 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8232 unsigned FieldIndex = FD->getFieldIndex(); 8233 8234 // Update info about the lowest and highest elements for this struct 8235 if (!PartialStruct.Base.isValid()) { 8236 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8237 if (IsFinalArraySection) { 8238 Address HB = 8239 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8240 .getAddress(CGF); 8241 PartialStruct.HighestElem = {FieldIndex, HB}; 8242 } else { 8243 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8244 } 8245 PartialStruct.Base = BP; 8246 PartialStruct.LB = BP; 8247 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8248 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8249 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8250 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8251 } 8252 } 8253 8254 // Need to emit combined struct for array sections. 8255 if (IsFinalArraySection || IsNonContiguous) 8256 PartialStruct.IsArraySection = true; 8257 8258 // If we have a final array section, we are done with this expression. 8259 if (IsFinalArraySection) 8260 break; 8261 8262 // The pointer becomes the base for the next element. 8263 if (Next != CE) 8264 BP = IsMemberReference ? LowestElem : LB; 8265 8266 IsExpressionFirstInfo = false; 8267 IsCaptureFirstInfo = false; 8268 FirstPointerInComplexData = false; 8269 IsPrevMemberReference = IsMemberReference; 8270 } else if (FirstPointerInComplexData) { 8271 QualType Ty = Components.rbegin() 8272 ->getAssociatedDeclaration() 8273 ->getType() 8274 .getNonReferenceType(); 8275 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8276 FirstPointerInComplexData = false; 8277 } 8278 } 8279 // If ran into the whole component - allocate the space for the whole 8280 // record. 8281 if (!EncounteredME) 8282 PartialStruct.HasCompleteRecord = true; 8283 8284 if (!IsNonContiguous) 8285 return; 8286 8287 const ASTContext &Context = CGF.getContext(); 8288 8289 // For supporting stride in array section, we need to initialize the first 8290 // dimension size as 1, first offset as 0, and first count as 1 8291 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8292 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8293 MapValuesArrayTy CurStrides; 8294 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8295 uint64_t ElementTypeSize; 8296 8297 // Collect Size information for each dimension and get the element size as 8298 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8299 // should be [10, 10] and the first stride is 4 btyes. 8300 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8301 Components) { 8302 const Expr *AssocExpr = Component.getAssociatedExpression(); 8303 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8304 8305 if (!OASE) 8306 continue; 8307 8308 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8309 auto *CAT = Context.getAsConstantArrayType(Ty); 8310 auto *VAT = Context.getAsVariableArrayType(Ty); 8311 8312 // We need all the dimension size except for the last dimension. 8313 assert((VAT || CAT || &Component == &*Components.begin()) && 8314 "Should be either ConstantArray or VariableArray if not the " 8315 "first Component"); 8316 8317 // Get element size if CurStrides is empty. 8318 if (CurStrides.empty()) { 8319 const Type *ElementType = nullptr; 8320 if (CAT) 8321 ElementType = CAT->getElementType().getTypePtr(); 8322 else if (VAT) 8323 ElementType = VAT->getElementType().getTypePtr(); 8324 else 8325 assert(&Component == &*Components.begin() && 8326 "Only expect pointer (non CAT or VAT) when this is the " 8327 "first Component"); 8328 // If ElementType is null, then it means the base is a pointer 8329 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8330 // for next iteration. 8331 if (ElementType) { 8332 // For the case that having pointer as base, we need to remove one 8333 // level of indirection. 8334 if (&Component != &*Components.begin()) 8335 ElementType = ElementType->getPointeeOrArrayElementType(); 8336 ElementTypeSize = 8337 Context.getTypeSizeInChars(ElementType).getQuantity(); 8338 CurStrides.push_back( 8339 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8340 } 8341 } 8342 // Get dimension value except for the last dimension since we don't need 8343 // it. 8344 if (DimSizes.size() < Components.size() - 1) { 8345 if (CAT) 8346 DimSizes.push_back(llvm::ConstantInt::get( 8347 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8348 else if (VAT) 8349 DimSizes.push_back(CGF.Builder.CreateIntCast( 8350 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8351 /*IsSigned=*/false)); 8352 } 8353 } 8354 8355 // Skip the dummy dimension since we have already have its information. 8356 auto DI = DimSizes.begin() + 1; 8357 // Product of dimension. 8358 llvm::Value *DimProd = 8359 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8360 8361 // Collect info for non-contiguous. Notice that offset, count, and stride 8362 // are only meaningful for array-section, so we insert a null for anything 8363 // other than array-section. 8364 // Also, the size of offset, count, and stride are not the same as 8365 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8366 // count, and stride are the same as the number of non-contiguous 8367 // declaration in target update to/from clause. 8368 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8369 Components) { 8370 const Expr *AssocExpr = Component.getAssociatedExpression(); 8371 8372 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8373 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8374 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8375 /*isSigned=*/false); 8376 CurOffsets.push_back(Offset); 8377 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8378 CurStrides.push_back(CurStrides.back()); 8379 continue; 8380 } 8381 8382 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8383 8384 if (!OASE) 8385 continue; 8386 8387 // Offset 8388 const Expr *OffsetExpr = OASE->getLowerBound(); 8389 llvm::Value *Offset = nullptr; 8390 if (!OffsetExpr) { 8391 // If offset is absent, then we just set it to zero. 8392 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8393 } else { 8394 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8395 CGF.Int64Ty, 8396 /*isSigned=*/false); 8397 } 8398 CurOffsets.push_back(Offset); 8399 8400 // Count 8401 const Expr *CountExpr = OASE->getLength(); 8402 llvm::Value *Count = nullptr; 8403 if (!CountExpr) { 8404 // In Clang, once a high dimension is an array section, we construct all 8405 // the lower dimension as array section, however, for case like 8406 // arr[0:2][2], Clang construct the inner dimension as an array section 8407 // but it actually is not in an array section form according to spec. 8408 if (!OASE->getColonLocFirst().isValid() && 8409 !OASE->getColonLocSecond().isValid()) { 8410 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8411 } else { 8412 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8413 // When the length is absent it defaults to ⌈(size − 8414 // lower-bound)/stride⌉, where size is the size of the array 8415 // dimension. 8416 const Expr *StrideExpr = OASE->getStride(); 8417 llvm::Value *Stride = 8418 StrideExpr 8419 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8420 CGF.Int64Ty, /*isSigned=*/false) 8421 : nullptr; 8422 if (Stride) 8423 Count = CGF.Builder.CreateUDiv( 8424 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8425 else 8426 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8427 } 8428 } else { 8429 Count = CGF.EmitScalarExpr(CountExpr); 8430 } 8431 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8432 CurCounts.push_back(Count); 8433 8434 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8435 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8436 // Offset Count Stride 8437 // D0 0 1 4 (int) <- dummy dimension 8438 // D1 0 2 8 (2 * (1) * 4) 8439 // D2 1 2 20 (1 * (1 * 5) * 4) 8440 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8441 const Expr *StrideExpr = OASE->getStride(); 8442 llvm::Value *Stride = 8443 StrideExpr 8444 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8445 CGF.Int64Ty, /*isSigned=*/false) 8446 : nullptr; 8447 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8448 if (Stride) 8449 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8450 else 8451 CurStrides.push_back(DimProd); 8452 if (DI != DimSizes.end()) 8453 ++DI; 8454 } 8455 8456 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8457 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8458 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8459 } 8460 8461 /// Return the adjusted map modifiers if the declaration a capture refers to 8462 /// appears in a first-private clause. This is expected to be used only with 8463 /// directives that start with 'target'. 8464 MappableExprsHandler::OpenMPOffloadMappingFlags 8465 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8466 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8467 8468 // A first private variable captured by reference will use only the 8469 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8470 // declaration is known as first-private in this handler. 8471 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8472 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8473 return MappableExprsHandler::OMP_MAP_TO | 8474 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8475 return MappableExprsHandler::OMP_MAP_PRIVATE | 8476 MappableExprsHandler::OMP_MAP_TO; 8477 } 8478 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8479 if (I != LambdasMap.end()) 8480 // for map(to: lambda): using user specified map type. 8481 return getMapTypeBits( 8482 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8483 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8484 /*AddPtrFlag=*/false, 8485 /*AddIsTargetParamFlag=*/false, 8486 /*isNonContiguous=*/false); 8487 return MappableExprsHandler::OMP_MAP_TO | 8488 MappableExprsHandler::OMP_MAP_FROM; 8489 } 8490 8491 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8492 // Rotate by getFlagMemberOffset() bits. 8493 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8494 << getFlagMemberOffset()); 8495 } 8496 8497 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8498 OpenMPOffloadMappingFlags MemberOfFlag) { 8499 // If the entry is PTR_AND_OBJ but has not been marked with the special 8500 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8501 // marked as MEMBER_OF. 8502 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8503 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8504 return; 8505 8506 // Reset the placeholder value to prepare the flag for the assignment of the 8507 // proper MEMBER_OF value. 8508 Flags &= ~OMP_MAP_MEMBER_OF; 8509 Flags |= MemberOfFlag; 8510 } 8511 8512 void getPlainLayout(const CXXRecordDecl *RD, 8513 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8514 bool AsBase) const { 8515 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8516 8517 llvm::StructType *St = 8518 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8519 8520 unsigned NumElements = St->getNumElements(); 8521 llvm::SmallVector< 8522 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8523 RecordLayout(NumElements); 8524 8525 // Fill bases. 8526 for (const auto &I : RD->bases()) { 8527 if (I.isVirtual()) 8528 continue; 8529 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8530 // Ignore empty bases. 8531 if (Base->isEmpty() || CGF.getContext() 8532 .getASTRecordLayout(Base) 8533 .getNonVirtualSize() 8534 .isZero()) 8535 continue; 8536 8537 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8538 RecordLayout[FieldIndex] = Base; 8539 } 8540 // Fill in virtual bases. 8541 for (const auto &I : RD->vbases()) { 8542 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8543 // Ignore empty bases. 8544 if (Base->isEmpty()) 8545 continue; 8546 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8547 if (RecordLayout[FieldIndex]) 8548 continue; 8549 RecordLayout[FieldIndex] = Base; 8550 } 8551 // Fill in all the fields. 8552 assert(!RD->isUnion() && "Unexpected union."); 8553 for (const auto *Field : RD->fields()) { 8554 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8555 // will fill in later.) 8556 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8557 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8558 RecordLayout[FieldIndex] = Field; 8559 } 8560 } 8561 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8562 &Data : RecordLayout) { 8563 if (Data.isNull()) 8564 continue; 8565 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8566 getPlainLayout(Base, Layout, /*AsBase=*/true); 8567 else 8568 Layout.push_back(Data.get<const FieldDecl *>()); 8569 } 8570 } 8571 8572 /// Generate all the base pointers, section pointers, sizes, map types, and 8573 /// mappers for the extracted mappable expressions (all included in \a 8574 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8575 /// pair of the relevant declaration and index where it occurs is appended to 8576 /// the device pointers info array. 8577 void generateAllInfoForClauses( 8578 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8579 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8580 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8581 // We have to process the component lists that relate with the same 8582 // declaration in a single chunk so that we can generate the map flags 8583 // correctly. Therefore, we organize all lists in a map. 8584 enum MapKind { Present, Allocs, Other, Total }; 8585 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8586 SmallVector<SmallVector<MapInfo, 8>, 4>> 8587 Info; 8588 8589 // Helper function to fill the information map for the different supported 8590 // clauses. 8591 auto &&InfoGen = 8592 [&Info, &SkipVarSet]( 8593 const ValueDecl *D, MapKind Kind, 8594 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8595 OpenMPMapClauseKind MapType, 8596 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8597 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8598 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8599 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8600 if (SkipVarSet.contains(D)) 8601 return; 8602 auto It = Info.find(D); 8603 if (It == Info.end()) 8604 It = Info 8605 .insert(std::make_pair( 8606 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8607 .first; 8608 It->second[Kind].emplace_back( 8609 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8610 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8611 }; 8612 8613 for (const auto *Cl : Clauses) { 8614 const auto *C = dyn_cast<OMPMapClause>(Cl); 8615 if (!C) 8616 continue; 8617 MapKind Kind = Other; 8618 if (llvm::is_contained(C->getMapTypeModifiers(), 8619 OMPC_MAP_MODIFIER_present)) 8620 Kind = Present; 8621 else if (C->getMapType() == OMPC_MAP_alloc) 8622 Kind = Allocs; 8623 const auto *EI = C->getVarRefs().begin(); 8624 for (const auto L : C->component_lists()) { 8625 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8626 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8627 C->getMapTypeModifiers(), llvm::None, 8628 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8629 E); 8630 ++EI; 8631 } 8632 } 8633 for (const auto *Cl : Clauses) { 8634 const auto *C = dyn_cast<OMPToClause>(Cl); 8635 if (!C) 8636 continue; 8637 MapKind Kind = Other; 8638 if (llvm::is_contained(C->getMotionModifiers(), 8639 OMPC_MOTION_MODIFIER_present)) 8640 Kind = Present; 8641 const auto *EI = C->getVarRefs().begin(); 8642 for (const auto L : C->component_lists()) { 8643 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8644 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8645 C->isImplicit(), std::get<2>(L), *EI); 8646 ++EI; 8647 } 8648 } 8649 for (const auto *Cl : Clauses) { 8650 const auto *C = dyn_cast<OMPFromClause>(Cl); 8651 if (!C) 8652 continue; 8653 MapKind Kind = Other; 8654 if (llvm::is_contained(C->getMotionModifiers(), 8655 OMPC_MOTION_MODIFIER_present)) 8656 Kind = Present; 8657 const auto *EI = C->getVarRefs().begin(); 8658 for (const auto L : C->component_lists()) { 8659 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8660 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8661 C->isImplicit(), std::get<2>(L), *EI); 8662 ++EI; 8663 } 8664 } 8665 8666 // Look at the use_device_ptr clause information and mark the existing map 8667 // entries as such. If there is no map information for an entry in the 8668 // use_device_ptr list, we create one with map type 'alloc' and zero size 8669 // section. It is the user fault if that was not mapped before. If there is 8670 // no map information and the pointer is a struct member, then we defer the 8671 // emission of that entry until the whole struct has been processed. 8672 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8673 SmallVector<DeferredDevicePtrEntryTy, 4>> 8674 DeferredInfo; 8675 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8676 8677 for (const auto *Cl : Clauses) { 8678 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8679 if (!C) 8680 continue; 8681 for (const auto L : C->component_lists()) { 8682 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8683 std::get<1>(L); 8684 assert(!Components.empty() && 8685 "Not expecting empty list of components!"); 8686 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8687 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8688 const Expr *IE = Components.back().getAssociatedExpression(); 8689 // If the first component is a member expression, we have to look into 8690 // 'this', which maps to null in the map of map information. Otherwise 8691 // look directly for the information. 8692 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8693 8694 // We potentially have map information for this declaration already. 8695 // Look for the first set of components that refer to it. 8696 if (It != Info.end()) { 8697 bool Found = false; 8698 for (auto &Data : It->second) { 8699 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8700 return MI.Components.back().getAssociatedDeclaration() == VD; 8701 }); 8702 // If we found a map entry, signal that the pointer has to be 8703 // returned and move on to the next declaration. Exclude cases where 8704 // the base pointer is mapped as array subscript, array section or 8705 // array shaping. The base address is passed as a pointer to base in 8706 // this case and cannot be used as a base for use_device_ptr list 8707 // item. 8708 if (CI != Data.end()) { 8709 auto PrevCI = std::next(CI->Components.rbegin()); 8710 const auto *VarD = dyn_cast<VarDecl>(VD); 8711 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8712 isa<MemberExpr>(IE) || 8713 !VD->getType().getNonReferenceType()->isPointerType() || 8714 PrevCI == CI->Components.rend() || 8715 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8716 VarD->hasLocalStorage()) { 8717 CI->ReturnDevicePointer = true; 8718 Found = true; 8719 break; 8720 } 8721 } 8722 } 8723 if (Found) 8724 continue; 8725 } 8726 8727 // We didn't find any match in our map information - generate a zero 8728 // size array section - if the pointer is a struct member we defer this 8729 // action until the whole struct has been processed. 8730 if (isa<MemberExpr>(IE)) { 8731 // Insert the pointer into Info to be processed by 8732 // generateInfoForComponentList. Because it is a member pointer 8733 // without a pointee, no entry will be generated for it, therefore 8734 // we need to generate one after the whole struct has been processed. 8735 // Nonetheless, generateInfoForComponentList must be called to take 8736 // the pointer into account for the calculation of the range of the 8737 // partial struct. 8738 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8739 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8740 nullptr); 8741 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8742 } else { 8743 llvm::Value *Ptr = 8744 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8745 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8746 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8747 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8748 UseDevicePtrCombinedInfo.Sizes.push_back( 8749 llvm::Constant::getNullValue(CGF.Int64Ty)); 8750 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8751 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8752 } 8753 } 8754 } 8755 8756 // Look at the use_device_addr clause information and mark the existing map 8757 // entries as such. If there is no map information for an entry in the 8758 // use_device_addr list, we create one with map type 'alloc' and zero size 8759 // section. It is the user fault if that was not mapped before. If there is 8760 // no map information and the pointer is a struct member, then we defer the 8761 // emission of that entry until the whole struct has been processed. 8762 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8763 for (const auto *Cl : Clauses) { 8764 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8765 if (!C) 8766 continue; 8767 for (const auto L : C->component_lists()) { 8768 assert(!std::get<1>(L).empty() && 8769 "Not expecting empty list of components!"); 8770 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8771 if (!Processed.insert(VD).second) 8772 continue; 8773 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8774 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8775 // If the first component is a member expression, we have to look into 8776 // 'this', which maps to null in the map of map information. Otherwise 8777 // look directly for the information. 8778 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8779 8780 // We potentially have map information for this declaration already. 8781 // Look for the first set of components that refer to it. 8782 if (It != Info.end()) { 8783 bool Found = false; 8784 for (auto &Data : It->second) { 8785 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8786 return MI.Components.back().getAssociatedDeclaration() == VD; 8787 }); 8788 // If we found a map entry, signal that the pointer has to be 8789 // returned and move on to the next declaration. 8790 if (CI != Data.end()) { 8791 CI->ReturnDevicePointer = true; 8792 Found = true; 8793 break; 8794 } 8795 } 8796 if (Found) 8797 continue; 8798 } 8799 8800 // We didn't find any match in our map information - generate a zero 8801 // size array section - if the pointer is a struct member we defer this 8802 // action until the whole struct has been processed. 8803 if (isa<MemberExpr>(IE)) { 8804 // Insert the pointer into Info to be processed by 8805 // generateInfoForComponentList. Because it is a member pointer 8806 // without a pointee, no entry will be generated for it, therefore 8807 // we need to generate one after the whole struct has been processed. 8808 // Nonetheless, generateInfoForComponentList must be called to take 8809 // the pointer into account for the calculation of the range of the 8810 // partial struct. 8811 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8812 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8813 nullptr, nullptr, /*ForDeviceAddr=*/true); 8814 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8815 } else { 8816 llvm::Value *Ptr; 8817 if (IE->isGLValue()) 8818 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8819 else 8820 Ptr = CGF.EmitScalarExpr(IE); 8821 CombinedInfo.Exprs.push_back(VD); 8822 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8823 CombinedInfo.Pointers.push_back(Ptr); 8824 CombinedInfo.Sizes.push_back( 8825 llvm::Constant::getNullValue(CGF.Int64Ty)); 8826 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8827 CombinedInfo.Mappers.push_back(nullptr); 8828 } 8829 } 8830 } 8831 8832 for (const auto &Data : Info) { 8833 StructRangeInfoTy PartialStruct; 8834 // Temporary generated information. 8835 MapCombinedInfoTy CurInfo; 8836 const Decl *D = Data.first; 8837 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8838 for (const auto &M : Data.second) { 8839 for (const MapInfo &L : M) { 8840 assert(!L.Components.empty() && 8841 "Not expecting declaration with no component lists."); 8842 8843 // Remember the current base pointer index. 8844 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8845 CurInfo.NonContigInfo.IsNonContiguous = 8846 L.Components.back().isNonContiguous(); 8847 generateInfoForComponentList( 8848 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8849 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8850 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8851 8852 // If this entry relates with a device pointer, set the relevant 8853 // declaration and add the 'return pointer' flag. 8854 if (L.ReturnDevicePointer) { 8855 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8856 "Unexpected number of mapped base pointers."); 8857 8858 const ValueDecl *RelevantVD = 8859 L.Components.back().getAssociatedDeclaration(); 8860 assert(RelevantVD && 8861 "No relevant declaration related with device pointer??"); 8862 8863 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8864 RelevantVD); 8865 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8866 } 8867 } 8868 } 8869 8870 // Append any pending zero-length pointers which are struct members and 8871 // used with use_device_ptr or use_device_addr. 8872 auto CI = DeferredInfo.find(Data.first); 8873 if (CI != DeferredInfo.end()) { 8874 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8875 llvm::Value *BasePtr; 8876 llvm::Value *Ptr; 8877 if (L.ForDeviceAddr) { 8878 if (L.IE->isGLValue()) 8879 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8880 else 8881 Ptr = this->CGF.EmitScalarExpr(L.IE); 8882 BasePtr = Ptr; 8883 // Entry is RETURN_PARAM. Also, set the placeholder value 8884 // MEMBER_OF=FFFF so that the entry is later updated with the 8885 // correct value of MEMBER_OF. 8886 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8887 } else { 8888 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8889 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8890 L.IE->getExprLoc()); 8891 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8892 // placeholder value MEMBER_OF=FFFF so that the entry is later 8893 // updated with the correct value of MEMBER_OF. 8894 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8895 OMP_MAP_MEMBER_OF); 8896 } 8897 CurInfo.Exprs.push_back(L.VD); 8898 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8899 CurInfo.Pointers.push_back(Ptr); 8900 CurInfo.Sizes.push_back( 8901 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8902 CurInfo.Mappers.push_back(nullptr); 8903 } 8904 } 8905 // If there is an entry in PartialStruct it means we have a struct with 8906 // individual members mapped. Emit an extra combined entry. 8907 if (PartialStruct.Base.isValid()) { 8908 CurInfo.NonContigInfo.Dims.push_back(0); 8909 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8910 } 8911 8912 // We need to append the results of this capture to what we already 8913 // have. 8914 CombinedInfo.append(CurInfo); 8915 } 8916 // Append data for use_device_ptr clauses. 8917 CombinedInfo.append(UseDevicePtrCombinedInfo); 8918 } 8919 8920 public: 8921 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8922 : CurDir(&Dir), CGF(CGF) { 8923 // Extract firstprivate clause information. 8924 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8925 for (const auto *D : C->varlists()) 8926 FirstPrivateDecls.try_emplace( 8927 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8928 // Extract implicit firstprivates from uses_allocators clauses. 8929 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8930 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8931 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8932 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8933 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8934 /*Implicit=*/true); 8935 else if (const auto *VD = dyn_cast<VarDecl>( 8936 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8937 ->getDecl())) 8938 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8939 } 8940 } 8941 // Extract device pointer clause information. 8942 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8943 for (auto L : C->component_lists()) 8944 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8945 // Extract map information. 8946 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8947 if (C->getMapType() != OMPC_MAP_to) 8948 continue; 8949 for (auto L : C->component_lists()) { 8950 const ValueDecl *VD = std::get<0>(L); 8951 const auto *RD = VD ? VD->getType() 8952 .getCanonicalType() 8953 .getNonReferenceType() 8954 ->getAsCXXRecordDecl() 8955 : nullptr; 8956 if (RD && RD->isLambda()) 8957 LambdasMap.try_emplace(std::get<0>(L), C); 8958 } 8959 } 8960 } 8961 8962 /// Constructor for the declare mapper directive. 8963 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8964 : CurDir(&Dir), CGF(CGF) {} 8965 8966 /// Generate code for the combined entry if we have a partially mapped struct 8967 /// and take care of the mapping flags of the arguments corresponding to 8968 /// individual struct members. 8969 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8970 MapFlagsArrayTy &CurTypes, 8971 const StructRangeInfoTy &PartialStruct, 8972 const ValueDecl *VD = nullptr, 8973 bool NotTargetParams = true) const { 8974 if (CurTypes.size() == 1 && 8975 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8976 !PartialStruct.IsArraySection) 8977 return; 8978 Address LBAddr = PartialStruct.LowestElem.second; 8979 Address HBAddr = PartialStruct.HighestElem.second; 8980 if (PartialStruct.HasCompleteRecord) { 8981 LBAddr = PartialStruct.LB; 8982 HBAddr = PartialStruct.LB; 8983 } 8984 CombinedInfo.Exprs.push_back(VD); 8985 // Base is the base of the struct 8986 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8987 // Pointer is the address of the lowest element 8988 llvm::Value *LB = LBAddr.getPointer(); 8989 CombinedInfo.Pointers.push_back(LB); 8990 // There should not be a mapper for a combined entry. 8991 CombinedInfo.Mappers.push_back(nullptr); 8992 // Size is (addr of {highest+1} element) - (addr of lowest element) 8993 llvm::Value *HB = HBAddr.getPointer(); 8994 llvm::Value *HAddr = 8995 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8996 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8997 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8998 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8999 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 9000 /*isSigned=*/false); 9001 CombinedInfo.Sizes.push_back(Size); 9002 // Map type is always TARGET_PARAM, if generate info for captures. 9003 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 9004 : OMP_MAP_TARGET_PARAM); 9005 // If any element has the present modifier, then make sure the runtime 9006 // doesn't attempt to allocate the struct. 9007 if (CurTypes.end() != 9008 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9009 return Type & OMP_MAP_PRESENT; 9010 })) 9011 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 9012 // Remove TARGET_PARAM flag from the first element 9013 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 9014 // If any element has the ompx_hold modifier, then make sure the runtime 9015 // uses the hold reference count for the struct as a whole so that it won't 9016 // be unmapped by an extra dynamic reference count decrement. Add it to all 9017 // elements as well so the runtime knows which reference count to check 9018 // when determining whether it's time for device-to-host transfers of 9019 // individual elements. 9020 if (CurTypes.end() != 9021 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9022 return Type & OMP_MAP_OMPX_HOLD; 9023 })) { 9024 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 9025 for (auto &M : CurTypes) 9026 M |= OMP_MAP_OMPX_HOLD; 9027 } 9028 9029 // All other current entries will be MEMBER_OF the combined entry 9030 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9031 // 0xFFFF in the MEMBER_OF field). 9032 OpenMPOffloadMappingFlags MemberOfFlag = 9033 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 9034 for (auto &M : CurTypes) 9035 setCorrectMemberOfFlag(M, MemberOfFlag); 9036 } 9037 9038 /// Generate all the base pointers, section pointers, sizes, map types, and 9039 /// mappers for the extracted mappable expressions (all included in \a 9040 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9041 /// pair of the relevant declaration and index where it occurs is appended to 9042 /// the device pointers info array. 9043 void generateAllInfo( 9044 MapCombinedInfoTy &CombinedInfo, 9045 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9046 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9047 assert(CurDir.is<const OMPExecutableDirective *>() && 9048 "Expect a executable directive"); 9049 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9050 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9051 } 9052 9053 /// Generate all the base pointers, section pointers, sizes, map types, and 9054 /// mappers for the extracted map clauses of user-defined mapper (all included 9055 /// in \a CombinedInfo). 9056 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9057 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9058 "Expect a declare mapper directive"); 9059 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9060 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9061 } 9062 9063 /// Emit capture info for lambdas for variables captured by reference. 9064 void generateInfoForLambdaCaptures( 9065 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9066 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9067 const auto *RD = VD->getType() 9068 .getCanonicalType() 9069 .getNonReferenceType() 9070 ->getAsCXXRecordDecl(); 9071 if (!RD || !RD->isLambda()) 9072 return; 9073 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 9074 LValue VDLVal = CGF.MakeAddrLValue( 9075 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 9076 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9077 FieldDecl *ThisCapture = nullptr; 9078 RD->getCaptureFields(Captures, ThisCapture); 9079 if (ThisCapture) { 9080 LValue ThisLVal = 9081 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9082 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9083 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9084 VDLVal.getPointer(CGF)); 9085 CombinedInfo.Exprs.push_back(VD); 9086 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9087 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9088 CombinedInfo.Sizes.push_back( 9089 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9090 CGF.Int64Ty, /*isSigned=*/true)); 9091 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9092 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9093 CombinedInfo.Mappers.push_back(nullptr); 9094 } 9095 for (const LambdaCapture &LC : RD->captures()) { 9096 if (!LC.capturesVariable()) 9097 continue; 9098 const VarDecl *VD = LC.getCapturedVar(); 9099 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9100 continue; 9101 auto It = Captures.find(VD); 9102 assert(It != Captures.end() && "Found lambda capture without field."); 9103 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9104 if (LC.getCaptureKind() == LCK_ByRef) { 9105 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9106 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9107 VDLVal.getPointer(CGF)); 9108 CombinedInfo.Exprs.push_back(VD); 9109 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9110 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9111 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9112 CGF.getTypeSize( 9113 VD->getType().getCanonicalType().getNonReferenceType()), 9114 CGF.Int64Ty, /*isSigned=*/true)); 9115 } else { 9116 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9117 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9118 VDLVal.getPointer(CGF)); 9119 CombinedInfo.Exprs.push_back(VD); 9120 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9121 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9122 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9123 } 9124 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9125 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9126 CombinedInfo.Mappers.push_back(nullptr); 9127 } 9128 } 9129 9130 /// Set correct indices for lambdas captures. 9131 void adjustMemberOfForLambdaCaptures( 9132 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9133 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9134 MapFlagsArrayTy &Types) const { 9135 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9136 // Set correct member_of idx for all implicit lambda captures. 9137 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9138 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9139 continue; 9140 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9141 assert(BasePtr && "Unable to find base lambda address."); 9142 int TgtIdx = -1; 9143 for (unsigned J = I; J > 0; --J) { 9144 unsigned Idx = J - 1; 9145 if (Pointers[Idx] != BasePtr) 9146 continue; 9147 TgtIdx = Idx; 9148 break; 9149 } 9150 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9151 // All other current entries will be MEMBER_OF the combined entry 9152 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9153 // 0xFFFF in the MEMBER_OF field). 9154 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9155 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9156 } 9157 } 9158 9159 /// Generate the base pointers, section pointers, sizes, map types, and 9160 /// mappers associated to a given capture (all included in \a CombinedInfo). 9161 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9162 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9163 StructRangeInfoTy &PartialStruct) const { 9164 assert(!Cap->capturesVariableArrayType() && 9165 "Not expecting to generate map info for a variable array type!"); 9166 9167 // We need to know when we generating information for the first component 9168 const ValueDecl *VD = Cap->capturesThis() 9169 ? nullptr 9170 : Cap->getCapturedVar()->getCanonicalDecl(); 9171 9172 // for map(to: lambda): skip here, processing it in 9173 // generateDefaultMapInfo 9174 if (LambdasMap.count(VD)) 9175 return; 9176 9177 // If this declaration appears in a is_device_ptr clause we just have to 9178 // pass the pointer by value. If it is a reference to a declaration, we just 9179 // pass its value. 9180 if (DevPointersMap.count(VD)) { 9181 CombinedInfo.Exprs.push_back(VD); 9182 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9183 CombinedInfo.Pointers.push_back(Arg); 9184 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9185 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9186 /*isSigned=*/true)); 9187 CombinedInfo.Types.push_back( 9188 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9189 OMP_MAP_TARGET_PARAM); 9190 CombinedInfo.Mappers.push_back(nullptr); 9191 return; 9192 } 9193 9194 using MapData = 9195 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9196 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9197 const ValueDecl *, const Expr *>; 9198 SmallVector<MapData, 4> DeclComponentLists; 9199 assert(CurDir.is<const OMPExecutableDirective *>() && 9200 "Expect a executable directive"); 9201 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9202 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9203 const auto *EI = C->getVarRefs().begin(); 9204 for (const auto L : C->decl_component_lists(VD)) { 9205 const ValueDecl *VDecl, *Mapper; 9206 // The Expression is not correct if the mapping is implicit 9207 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9208 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9209 std::tie(VDecl, Components, Mapper) = L; 9210 assert(VDecl == VD && "We got information for the wrong declaration??"); 9211 assert(!Components.empty() && 9212 "Not expecting declaration with no component lists."); 9213 DeclComponentLists.emplace_back(Components, C->getMapType(), 9214 C->getMapTypeModifiers(), 9215 C->isImplicit(), Mapper, E); 9216 ++EI; 9217 } 9218 } 9219 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9220 const MapData &RHS) { 9221 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9222 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9223 bool HasPresent = 9224 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9225 bool HasAllocs = MapType == OMPC_MAP_alloc; 9226 MapModifiers = std::get<2>(RHS); 9227 MapType = std::get<1>(LHS); 9228 bool HasPresentR = 9229 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9230 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9231 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9232 }); 9233 9234 // Find overlapping elements (including the offset from the base element). 9235 llvm::SmallDenseMap< 9236 const MapData *, 9237 llvm::SmallVector< 9238 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9239 4> 9240 OverlappedData; 9241 size_t Count = 0; 9242 for (const MapData &L : DeclComponentLists) { 9243 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9244 OpenMPMapClauseKind MapType; 9245 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9246 bool IsImplicit; 9247 const ValueDecl *Mapper; 9248 const Expr *VarRef; 9249 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9250 L; 9251 ++Count; 9252 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9253 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9254 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9255 VarRef) = L1; 9256 auto CI = Components.rbegin(); 9257 auto CE = Components.rend(); 9258 auto SI = Components1.rbegin(); 9259 auto SE = Components1.rend(); 9260 for (; CI != CE && SI != SE; ++CI, ++SI) { 9261 if (CI->getAssociatedExpression()->getStmtClass() != 9262 SI->getAssociatedExpression()->getStmtClass()) 9263 break; 9264 // Are we dealing with different variables/fields? 9265 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9266 break; 9267 } 9268 // Found overlapping if, at least for one component, reached the head 9269 // of the components list. 9270 if (CI == CE || SI == SE) { 9271 // Ignore it if it is the same component. 9272 if (CI == CE && SI == SE) 9273 continue; 9274 const auto It = (SI == SE) ? CI : SI; 9275 // If one component is a pointer and another one is a kind of 9276 // dereference of this pointer (array subscript, section, dereference, 9277 // etc.), it is not an overlapping. 9278 // Same, if one component is a base and another component is a 9279 // dereferenced pointer memberexpr with the same base. 9280 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9281 (std::prev(It)->getAssociatedDeclaration() && 9282 std::prev(It) 9283 ->getAssociatedDeclaration() 9284 ->getType() 9285 ->isPointerType()) || 9286 (It->getAssociatedDeclaration() && 9287 It->getAssociatedDeclaration()->getType()->isPointerType() && 9288 std::next(It) != CE && std::next(It) != SE)) 9289 continue; 9290 const MapData &BaseData = CI == CE ? L : L1; 9291 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9292 SI == SE ? Components : Components1; 9293 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9294 OverlappedElements.getSecond().push_back(SubData); 9295 } 9296 } 9297 } 9298 // Sort the overlapped elements for each item. 9299 llvm::SmallVector<const FieldDecl *, 4> Layout; 9300 if (!OverlappedData.empty()) { 9301 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9302 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9303 while (BaseType != OrigType) { 9304 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9305 OrigType = BaseType->getPointeeOrArrayElementType(); 9306 } 9307 9308 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9309 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9310 else { 9311 const auto *RD = BaseType->getAsRecordDecl(); 9312 Layout.append(RD->field_begin(), RD->field_end()); 9313 } 9314 } 9315 for (auto &Pair : OverlappedData) { 9316 llvm::stable_sort( 9317 Pair.getSecond(), 9318 [&Layout]( 9319 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9320 OMPClauseMappableExprCommon::MappableExprComponentListRef 9321 Second) { 9322 auto CI = First.rbegin(); 9323 auto CE = First.rend(); 9324 auto SI = Second.rbegin(); 9325 auto SE = Second.rend(); 9326 for (; CI != CE && SI != SE; ++CI, ++SI) { 9327 if (CI->getAssociatedExpression()->getStmtClass() != 9328 SI->getAssociatedExpression()->getStmtClass()) 9329 break; 9330 // Are we dealing with different variables/fields? 9331 if (CI->getAssociatedDeclaration() != 9332 SI->getAssociatedDeclaration()) 9333 break; 9334 } 9335 9336 // Lists contain the same elements. 9337 if (CI == CE && SI == SE) 9338 return false; 9339 9340 // List with less elements is less than list with more elements. 9341 if (CI == CE || SI == SE) 9342 return CI == CE; 9343 9344 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9345 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9346 if (FD1->getParent() == FD2->getParent()) 9347 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9348 const auto *It = 9349 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9350 return FD == FD1 || FD == FD2; 9351 }); 9352 return *It == FD1; 9353 }); 9354 } 9355 9356 // Associated with a capture, because the mapping flags depend on it. 9357 // Go through all of the elements with the overlapped elements. 9358 bool IsFirstComponentList = true; 9359 for (const auto &Pair : OverlappedData) { 9360 const MapData &L = *Pair.getFirst(); 9361 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9362 OpenMPMapClauseKind MapType; 9363 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9364 bool IsImplicit; 9365 const ValueDecl *Mapper; 9366 const Expr *VarRef; 9367 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9368 L; 9369 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9370 OverlappedComponents = Pair.getSecond(); 9371 generateInfoForComponentList( 9372 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9373 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9374 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9375 IsFirstComponentList = false; 9376 } 9377 // Go through other elements without overlapped elements. 9378 for (const MapData &L : DeclComponentLists) { 9379 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9380 OpenMPMapClauseKind MapType; 9381 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9382 bool IsImplicit; 9383 const ValueDecl *Mapper; 9384 const Expr *VarRef; 9385 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9386 L; 9387 auto It = OverlappedData.find(&L); 9388 if (It == OverlappedData.end()) 9389 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9390 Components, CombinedInfo, PartialStruct, 9391 IsFirstComponentList, IsImplicit, Mapper, 9392 /*ForDeviceAddr=*/false, VD, VarRef); 9393 IsFirstComponentList = false; 9394 } 9395 } 9396 9397 /// Generate the default map information for a given capture \a CI, 9398 /// record field declaration \a RI and captured value \a CV. 9399 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9400 const FieldDecl &RI, llvm::Value *CV, 9401 MapCombinedInfoTy &CombinedInfo) const { 9402 bool IsImplicit = true; 9403 // Do the default mapping. 9404 if (CI.capturesThis()) { 9405 CombinedInfo.Exprs.push_back(nullptr); 9406 CombinedInfo.BasePointers.push_back(CV); 9407 CombinedInfo.Pointers.push_back(CV); 9408 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9409 CombinedInfo.Sizes.push_back( 9410 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9411 CGF.Int64Ty, /*isSigned=*/true)); 9412 // Default map type. 9413 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9414 } else if (CI.capturesVariableByCopy()) { 9415 const VarDecl *VD = CI.getCapturedVar(); 9416 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9417 CombinedInfo.BasePointers.push_back(CV); 9418 CombinedInfo.Pointers.push_back(CV); 9419 if (!RI.getType()->isAnyPointerType()) { 9420 // We have to signal to the runtime captures passed by value that are 9421 // not pointers. 9422 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9423 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9424 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9425 } else { 9426 // Pointers are implicitly mapped with a zero size and no flags 9427 // (other than first map that is added for all implicit maps). 9428 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9429 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9430 } 9431 auto I = FirstPrivateDecls.find(VD); 9432 if (I != FirstPrivateDecls.end()) 9433 IsImplicit = I->getSecond(); 9434 } else { 9435 assert(CI.capturesVariable() && "Expected captured reference."); 9436 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9437 QualType ElementType = PtrTy->getPointeeType(); 9438 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9439 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9440 // The default map type for a scalar/complex type is 'to' because by 9441 // default the value doesn't have to be retrieved. For an aggregate 9442 // type, the default is 'tofrom'. 9443 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9444 const VarDecl *VD = CI.getCapturedVar(); 9445 auto I = FirstPrivateDecls.find(VD); 9446 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9447 CombinedInfo.BasePointers.push_back(CV); 9448 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9449 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9450 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9451 AlignmentSource::Decl)); 9452 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9453 } else { 9454 CombinedInfo.Pointers.push_back(CV); 9455 } 9456 if (I != FirstPrivateDecls.end()) 9457 IsImplicit = I->getSecond(); 9458 } 9459 // Every default map produces a single argument which is a target parameter. 9460 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9461 9462 // Add flag stating this is an implicit map. 9463 if (IsImplicit) 9464 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9465 9466 // No user-defined mapper for default mapping. 9467 CombinedInfo.Mappers.push_back(nullptr); 9468 } 9469 }; 9470 } // anonymous namespace 9471 9472 static void emitNonContiguousDescriptor( 9473 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9474 CGOpenMPRuntime::TargetDataInfo &Info) { 9475 CodeGenModule &CGM = CGF.CGM; 9476 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9477 &NonContigInfo = CombinedInfo.NonContigInfo; 9478 9479 // Build an array of struct descriptor_dim and then assign it to 9480 // offload_args. 9481 // 9482 // struct descriptor_dim { 9483 // uint64_t offset; 9484 // uint64_t count; 9485 // uint64_t stride 9486 // }; 9487 ASTContext &C = CGF.getContext(); 9488 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9489 RecordDecl *RD; 9490 RD = C.buildImplicitRecord("descriptor_dim"); 9491 RD->startDefinition(); 9492 addFieldToRecordDecl(C, RD, Int64Ty); 9493 addFieldToRecordDecl(C, RD, Int64Ty); 9494 addFieldToRecordDecl(C, RD, Int64Ty); 9495 RD->completeDefinition(); 9496 QualType DimTy = C.getRecordType(RD); 9497 9498 enum { OffsetFD = 0, CountFD, StrideFD }; 9499 // We need two index variable here since the size of "Dims" is the same as the 9500 // size of Components, however, the size of offset, count, and stride is equal 9501 // to the size of base declaration that is non-contiguous. 9502 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9503 // Skip emitting ir if dimension size is 1 since it cannot be 9504 // non-contiguous. 9505 if (NonContigInfo.Dims[I] == 1) 9506 continue; 9507 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9508 QualType ArrayTy = 9509 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9510 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9511 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9512 unsigned RevIdx = EE - II - 1; 9513 LValue DimsLVal = CGF.MakeAddrLValue( 9514 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9515 // Offset 9516 LValue OffsetLVal = CGF.EmitLValueForField( 9517 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9518 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9519 // Count 9520 LValue CountLVal = CGF.EmitLValueForField( 9521 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9522 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9523 // Stride 9524 LValue StrideLVal = CGF.EmitLValueForField( 9525 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9526 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9527 } 9528 // args[I] = &dims 9529 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9530 DimsAddr, CGM.Int8PtrTy); 9531 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9532 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9533 Info.PointersArray, 0, I); 9534 Address PAddr(P, CGF.getPointerAlign()); 9535 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9536 ++L; 9537 } 9538 } 9539 9540 // Try to extract the base declaration from a `this->x` expression if possible. 9541 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9542 if (!E) 9543 return nullptr; 9544 9545 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9546 if (const MemberExpr *ME = 9547 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9548 return ME->getMemberDecl(); 9549 return nullptr; 9550 } 9551 9552 /// Emit a string constant containing the names of the values mapped to the 9553 /// offloading runtime library. 9554 llvm::Constant * 9555 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9556 MappableExprsHandler::MappingExprInfo &MapExprs) { 9557 9558 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9559 return OMPBuilder.getOrCreateDefaultSrcLocStr(); 9560 9561 SourceLocation Loc; 9562 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9563 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9564 Loc = VD->getLocation(); 9565 else 9566 Loc = MapExprs.getMapExpr()->getExprLoc(); 9567 } else { 9568 Loc = MapExprs.getMapDecl()->getLocation(); 9569 } 9570 9571 std::string ExprName = ""; 9572 if (MapExprs.getMapExpr()) { 9573 PrintingPolicy P(CGF.getContext().getLangOpts()); 9574 llvm::raw_string_ostream OS(ExprName); 9575 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9576 OS.flush(); 9577 } else { 9578 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9579 } 9580 9581 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9582 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(), 9583 PLoc.getLine(), PLoc.getColumn()); 9584 } 9585 9586 /// Emit the arrays used to pass the captures and map information to the 9587 /// offloading runtime library. If there is no map or capture information, 9588 /// return nullptr by reference. 9589 static void emitOffloadingArrays( 9590 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9591 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9592 bool IsNonContiguous = false) { 9593 CodeGenModule &CGM = CGF.CGM; 9594 ASTContext &Ctx = CGF.getContext(); 9595 9596 // Reset the array information. 9597 Info.clearArrayInfo(); 9598 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9599 9600 if (Info.NumberOfPtrs) { 9601 // Detect if we have any capture size requiring runtime evaluation of the 9602 // size so that a constant array could be eventually used. 9603 bool hasRuntimeEvaluationCaptureSize = false; 9604 for (llvm::Value *S : CombinedInfo.Sizes) 9605 if (!isa<llvm::Constant>(S)) { 9606 hasRuntimeEvaluationCaptureSize = true; 9607 break; 9608 } 9609 9610 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9611 QualType PointerArrayType = Ctx.getConstantArrayType( 9612 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9613 /*IndexTypeQuals=*/0); 9614 9615 Info.BasePointersArray = 9616 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9617 Info.PointersArray = 9618 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9619 Address MappersArray = 9620 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9621 Info.MappersArray = MappersArray.getPointer(); 9622 9623 // If we don't have any VLA types or other types that require runtime 9624 // evaluation, we can use a constant array for the map sizes, otherwise we 9625 // need to fill up the arrays as we do for the pointers. 9626 QualType Int64Ty = 9627 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9628 if (hasRuntimeEvaluationCaptureSize) { 9629 QualType SizeArrayType = Ctx.getConstantArrayType( 9630 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9631 /*IndexTypeQuals=*/0); 9632 Info.SizesArray = 9633 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9634 } else { 9635 // We expect all the sizes to be constant, so we collect them to create 9636 // a constant array. 9637 SmallVector<llvm::Constant *, 16> ConstSizes; 9638 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9639 if (IsNonContiguous && 9640 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9641 ConstSizes.push_back(llvm::ConstantInt::get( 9642 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9643 } else { 9644 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9645 } 9646 } 9647 9648 auto *SizesArrayInit = llvm::ConstantArray::get( 9649 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9650 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9651 auto *SizesArrayGbl = new llvm::GlobalVariable( 9652 CGM.getModule(), SizesArrayInit->getType(), 9653 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9654 SizesArrayInit, Name); 9655 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9656 Info.SizesArray = SizesArrayGbl; 9657 } 9658 9659 // The map types are always constant so we don't need to generate code to 9660 // fill arrays. Instead, we create an array constant. 9661 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9662 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9663 std::string MaptypesName = 9664 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9665 auto *MapTypesArrayGbl = 9666 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9667 Info.MapTypesArray = MapTypesArrayGbl; 9668 9669 // The information types are only built if there is debug information 9670 // requested. 9671 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9672 Info.MapNamesArray = llvm::Constant::getNullValue( 9673 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9674 } else { 9675 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9676 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9677 }; 9678 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9679 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9680 std::string MapnamesName = 9681 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9682 auto *MapNamesArrayGbl = 9683 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9684 Info.MapNamesArray = MapNamesArrayGbl; 9685 } 9686 9687 // If there's a present map type modifier, it must not be applied to the end 9688 // of a region, so generate a separate map type array in that case. 9689 if (Info.separateBeginEndCalls()) { 9690 bool EndMapTypesDiffer = false; 9691 for (uint64_t &Type : Mapping) { 9692 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9693 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9694 EndMapTypesDiffer = true; 9695 } 9696 } 9697 if (EndMapTypesDiffer) { 9698 MapTypesArrayGbl = 9699 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9700 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9701 } 9702 } 9703 9704 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9705 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9706 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9707 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9708 Info.BasePointersArray, 0, I); 9709 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9710 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9711 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9712 CGF.Builder.CreateStore(BPVal, BPAddr); 9713 9714 if (Info.requiresDevicePointerInfo()) 9715 if (const ValueDecl *DevVD = 9716 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9717 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9718 9719 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9720 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9721 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9722 Info.PointersArray, 0, I); 9723 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9724 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9725 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9726 CGF.Builder.CreateStore(PVal, PAddr); 9727 9728 if (hasRuntimeEvaluationCaptureSize) { 9729 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9730 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9731 Info.SizesArray, 9732 /*Idx0=*/0, 9733 /*Idx1=*/I); 9734 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9735 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9736 CGM.Int64Ty, 9737 /*isSigned=*/true), 9738 SAddr); 9739 } 9740 9741 // Fill up the mapper array. 9742 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9743 if (CombinedInfo.Mappers[I]) { 9744 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9745 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9746 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9747 Info.HasMapper = true; 9748 } 9749 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9750 CGF.Builder.CreateStore(MFunc, MAddr); 9751 } 9752 } 9753 9754 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9755 Info.NumberOfPtrs == 0) 9756 return; 9757 9758 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9759 } 9760 9761 namespace { 9762 /// Additional arguments for emitOffloadingArraysArgument function. 9763 struct ArgumentsOptions { 9764 bool ForEndCall = false; 9765 ArgumentsOptions() = default; 9766 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9767 }; 9768 } // namespace 9769 9770 /// Emit the arguments to be passed to the runtime library based on the 9771 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9772 /// ForEndCall, emit map types to be passed for the end of the region instead of 9773 /// the beginning. 9774 static void emitOffloadingArraysArgument( 9775 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9776 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9777 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9778 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9779 const ArgumentsOptions &Options = ArgumentsOptions()) { 9780 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9781 "expected region end call to runtime only when end call is separate"); 9782 CodeGenModule &CGM = CGF.CGM; 9783 if (Info.NumberOfPtrs) { 9784 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9785 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9786 Info.BasePointersArray, 9787 /*Idx0=*/0, /*Idx1=*/0); 9788 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9789 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9790 Info.PointersArray, 9791 /*Idx0=*/0, 9792 /*Idx1=*/0); 9793 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9794 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9795 /*Idx0=*/0, /*Idx1=*/0); 9796 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9797 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9798 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9799 : Info.MapTypesArray, 9800 /*Idx0=*/0, 9801 /*Idx1=*/0); 9802 9803 // Only emit the mapper information arrays if debug information is 9804 // requested. 9805 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9806 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9807 else 9808 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9809 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9810 Info.MapNamesArray, 9811 /*Idx0=*/0, 9812 /*Idx1=*/0); 9813 // If there is no user-defined mapper, set the mapper array to nullptr to 9814 // avoid an unnecessary data privatization 9815 if (!Info.HasMapper) 9816 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9817 else 9818 MappersArrayArg = 9819 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9820 } else { 9821 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9822 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9823 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9824 MapTypesArrayArg = 9825 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9826 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9827 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9828 } 9829 } 9830 9831 /// Check for inner distribute directive. 9832 static const OMPExecutableDirective * 9833 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9834 const auto *CS = D.getInnermostCapturedStmt(); 9835 const auto *Body = 9836 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9837 const Stmt *ChildStmt = 9838 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9839 9840 if (const auto *NestedDir = 9841 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9842 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9843 switch (D.getDirectiveKind()) { 9844 case OMPD_target: 9845 if (isOpenMPDistributeDirective(DKind)) 9846 return NestedDir; 9847 if (DKind == OMPD_teams) { 9848 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9849 /*IgnoreCaptured=*/true); 9850 if (!Body) 9851 return nullptr; 9852 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9853 if (const auto *NND = 9854 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9855 DKind = NND->getDirectiveKind(); 9856 if (isOpenMPDistributeDirective(DKind)) 9857 return NND; 9858 } 9859 } 9860 return nullptr; 9861 case OMPD_target_teams: 9862 if (isOpenMPDistributeDirective(DKind)) 9863 return NestedDir; 9864 return nullptr; 9865 case OMPD_target_parallel: 9866 case OMPD_target_simd: 9867 case OMPD_target_parallel_for: 9868 case OMPD_target_parallel_for_simd: 9869 return nullptr; 9870 case OMPD_target_teams_distribute: 9871 case OMPD_target_teams_distribute_simd: 9872 case OMPD_target_teams_distribute_parallel_for: 9873 case OMPD_target_teams_distribute_parallel_for_simd: 9874 case OMPD_parallel: 9875 case OMPD_for: 9876 case OMPD_parallel_for: 9877 case OMPD_parallel_master: 9878 case OMPD_parallel_sections: 9879 case OMPD_for_simd: 9880 case OMPD_parallel_for_simd: 9881 case OMPD_cancel: 9882 case OMPD_cancellation_point: 9883 case OMPD_ordered: 9884 case OMPD_threadprivate: 9885 case OMPD_allocate: 9886 case OMPD_task: 9887 case OMPD_simd: 9888 case OMPD_tile: 9889 case OMPD_unroll: 9890 case OMPD_sections: 9891 case OMPD_section: 9892 case OMPD_single: 9893 case OMPD_master: 9894 case OMPD_critical: 9895 case OMPD_taskyield: 9896 case OMPD_barrier: 9897 case OMPD_taskwait: 9898 case OMPD_taskgroup: 9899 case OMPD_atomic: 9900 case OMPD_flush: 9901 case OMPD_depobj: 9902 case OMPD_scan: 9903 case OMPD_teams: 9904 case OMPD_target_data: 9905 case OMPD_target_exit_data: 9906 case OMPD_target_enter_data: 9907 case OMPD_distribute: 9908 case OMPD_distribute_simd: 9909 case OMPD_distribute_parallel_for: 9910 case OMPD_distribute_parallel_for_simd: 9911 case OMPD_teams_distribute: 9912 case OMPD_teams_distribute_simd: 9913 case OMPD_teams_distribute_parallel_for: 9914 case OMPD_teams_distribute_parallel_for_simd: 9915 case OMPD_target_update: 9916 case OMPD_declare_simd: 9917 case OMPD_declare_variant: 9918 case OMPD_begin_declare_variant: 9919 case OMPD_end_declare_variant: 9920 case OMPD_declare_target: 9921 case OMPD_end_declare_target: 9922 case OMPD_declare_reduction: 9923 case OMPD_declare_mapper: 9924 case OMPD_taskloop: 9925 case OMPD_taskloop_simd: 9926 case OMPD_master_taskloop: 9927 case OMPD_master_taskloop_simd: 9928 case OMPD_parallel_master_taskloop: 9929 case OMPD_parallel_master_taskloop_simd: 9930 case OMPD_requires: 9931 case OMPD_metadirective: 9932 case OMPD_unknown: 9933 default: 9934 llvm_unreachable("Unexpected directive."); 9935 } 9936 } 9937 9938 return nullptr; 9939 } 9940 9941 /// Emit the user-defined mapper function. The code generation follows the 9942 /// pattern in the example below. 9943 /// \code 9944 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9945 /// void *base, void *begin, 9946 /// int64_t size, int64_t type, 9947 /// void *name = nullptr) { 9948 /// // Allocate space for an array section first or add a base/begin for 9949 /// // pointer dereference. 9950 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9951 /// !maptype.IsDelete) 9952 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9953 /// size*sizeof(Ty), clearToFromMember(type)); 9954 /// // Map members. 9955 /// for (unsigned i = 0; i < size; i++) { 9956 /// // For each component specified by this mapper: 9957 /// for (auto c : begin[i]->all_components) { 9958 /// if (c.hasMapper()) 9959 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9960 /// c.arg_type, c.arg_name); 9961 /// else 9962 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9963 /// c.arg_begin, c.arg_size, c.arg_type, 9964 /// c.arg_name); 9965 /// } 9966 /// } 9967 /// // Delete the array section. 9968 /// if (size > 1 && maptype.IsDelete) 9969 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9970 /// size*sizeof(Ty), clearToFromMember(type)); 9971 /// } 9972 /// \endcode 9973 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9974 CodeGenFunction *CGF) { 9975 if (UDMMap.count(D) > 0) 9976 return; 9977 ASTContext &C = CGM.getContext(); 9978 QualType Ty = D->getType(); 9979 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9980 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9981 auto *MapperVarDecl = 9982 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9983 SourceLocation Loc = D->getLocation(); 9984 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9985 9986 // Prepare mapper function arguments and attributes. 9987 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9988 C.VoidPtrTy, ImplicitParamDecl::Other); 9989 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9990 ImplicitParamDecl::Other); 9991 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9992 C.VoidPtrTy, ImplicitParamDecl::Other); 9993 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9994 ImplicitParamDecl::Other); 9995 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9996 ImplicitParamDecl::Other); 9997 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9998 ImplicitParamDecl::Other); 9999 FunctionArgList Args; 10000 Args.push_back(&HandleArg); 10001 Args.push_back(&BaseArg); 10002 Args.push_back(&BeginArg); 10003 Args.push_back(&SizeArg); 10004 Args.push_back(&TypeArg); 10005 Args.push_back(&NameArg); 10006 const CGFunctionInfo &FnInfo = 10007 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 10008 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 10009 SmallString<64> TyStr; 10010 llvm::raw_svector_ostream Out(TyStr); 10011 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 10012 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 10013 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 10014 Name, &CGM.getModule()); 10015 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 10016 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 10017 // Start the mapper function code generation. 10018 CodeGenFunction MapperCGF(CGM); 10019 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 10020 // Compute the starting and end addresses of array elements. 10021 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 10022 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 10023 C.getPointerType(Int64Ty), Loc); 10024 // Prepare common arguments for array initiation and deletion. 10025 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 10026 MapperCGF.GetAddrOfLocalVar(&HandleArg), 10027 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10028 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10029 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10030 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10031 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10032 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10033 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10034 // Convert the size in bytes into the number of array elements. 10035 Size = MapperCGF.Builder.CreateExactUDiv( 10036 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10037 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10038 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10039 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP( 10040 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size); 10041 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10042 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10043 C.getPointerType(Int64Ty), Loc); 10044 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10045 MapperCGF.GetAddrOfLocalVar(&NameArg), 10046 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10047 10048 // Emit array initiation if this is an array section and \p MapType indicates 10049 // that memory allocation is required. 10050 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10051 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10052 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10053 10054 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10055 10056 // Emit the loop header block. 10057 MapperCGF.EmitBlock(HeadBB); 10058 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10059 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10060 // Evaluate whether the initial condition is satisfied. 10061 llvm::Value *IsEmpty = 10062 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10063 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10064 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10065 10066 // Emit the loop body block. 10067 MapperCGF.EmitBlock(BodyBB); 10068 llvm::BasicBlock *LastBB = BodyBB; 10069 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10070 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10071 PtrPHI->addIncoming(PtrBegin, EntryBB); 10072 Address PtrCurrent = 10073 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 10074 .getAlignment() 10075 .alignmentOfArrayElement(ElementSize)); 10076 // Privatize the declared variable of mapper to be the current array element. 10077 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10078 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 10079 (void)Scope.Privatize(); 10080 10081 // Get map clause information. Fill up the arrays with all mapped variables. 10082 MappableExprsHandler::MapCombinedInfoTy Info; 10083 MappableExprsHandler MEHandler(*D, MapperCGF); 10084 MEHandler.generateAllInfoForMapper(Info); 10085 10086 // Call the runtime API __tgt_mapper_num_components to get the number of 10087 // pre-existing components. 10088 llvm::Value *OffloadingArgs[] = {Handle}; 10089 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10090 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10091 OMPRTL___tgt_mapper_num_components), 10092 OffloadingArgs); 10093 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10094 PreviousSize, 10095 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10096 10097 // Fill up the runtime mapper handle for all components. 10098 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10099 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10100 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10101 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10102 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10103 llvm::Value *CurSizeArg = Info.Sizes[I]; 10104 llvm::Value *CurNameArg = 10105 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10106 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10107 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10108 10109 // Extract the MEMBER_OF field from the map type. 10110 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10111 llvm::Value *MemberMapType = 10112 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10113 10114 // Combine the map type inherited from user-defined mapper with that 10115 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10116 // bits of the \a MapType, which is the input argument of the mapper 10117 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10118 // bits of MemberMapType. 10119 // [OpenMP 5.0], 1.2.6. map-type decay. 10120 // | alloc | to | from | tofrom | release | delete 10121 // ---------------------------------------------------------- 10122 // alloc | alloc | alloc | alloc | alloc | release | delete 10123 // to | alloc | to | alloc | to | release | delete 10124 // from | alloc | alloc | from | from | release | delete 10125 // tofrom | alloc | to | from | tofrom | release | delete 10126 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10127 MapType, 10128 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10129 MappableExprsHandler::OMP_MAP_FROM)); 10130 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10131 llvm::BasicBlock *AllocElseBB = 10132 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10133 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10134 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10135 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10136 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10137 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10138 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10139 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10140 MapperCGF.EmitBlock(AllocBB); 10141 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10142 MemberMapType, 10143 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10144 MappableExprsHandler::OMP_MAP_FROM))); 10145 MapperCGF.Builder.CreateBr(EndBB); 10146 MapperCGF.EmitBlock(AllocElseBB); 10147 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10148 LeftToFrom, 10149 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10150 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10151 // In case of to, clear OMP_MAP_FROM. 10152 MapperCGF.EmitBlock(ToBB); 10153 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10154 MemberMapType, 10155 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10156 MapperCGF.Builder.CreateBr(EndBB); 10157 MapperCGF.EmitBlock(ToElseBB); 10158 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10159 LeftToFrom, 10160 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10161 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10162 // In case of from, clear OMP_MAP_TO. 10163 MapperCGF.EmitBlock(FromBB); 10164 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10165 MemberMapType, 10166 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10167 // In case of tofrom, do nothing. 10168 MapperCGF.EmitBlock(EndBB); 10169 LastBB = EndBB; 10170 llvm::PHINode *CurMapType = 10171 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10172 CurMapType->addIncoming(AllocMapType, AllocBB); 10173 CurMapType->addIncoming(ToMapType, ToBB); 10174 CurMapType->addIncoming(FromMapType, FromBB); 10175 CurMapType->addIncoming(MemberMapType, ToElseBB); 10176 10177 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10178 CurSizeArg, CurMapType, CurNameArg}; 10179 if (Info.Mappers[I]) { 10180 // Call the corresponding mapper function. 10181 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10182 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10183 assert(MapperFunc && "Expect a valid mapper function is available."); 10184 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10185 } else { 10186 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10187 // data structure. 10188 MapperCGF.EmitRuntimeCall( 10189 OMPBuilder.getOrCreateRuntimeFunction( 10190 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10191 OffloadingArgs); 10192 } 10193 } 10194 10195 // Update the pointer to point to the next element that needs to be mapped, 10196 // and check whether we have mapped all elements. 10197 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType(); 10198 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10199 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10200 PtrPHI->addIncoming(PtrNext, LastBB); 10201 llvm::Value *IsDone = 10202 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10203 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10204 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10205 10206 MapperCGF.EmitBlock(ExitBB); 10207 // Emit array deletion if this is an array section and \p MapType indicates 10208 // that deletion is required. 10209 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10210 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10211 10212 // Emit the function exit block. 10213 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10214 MapperCGF.FinishFunction(); 10215 UDMMap.try_emplace(D, Fn); 10216 if (CGF) { 10217 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10218 Decls.second.push_back(D); 10219 } 10220 } 10221 10222 /// Emit the array initialization or deletion portion for user-defined mapper 10223 /// code generation. First, it evaluates whether an array section is mapped and 10224 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10225 /// true, and \a MapType indicates to not delete this array, array 10226 /// initialization code is generated. If \a IsInit is false, and \a MapType 10227 /// indicates to not this array, array deletion code is generated. 10228 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10229 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10230 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10231 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10232 bool IsInit) { 10233 StringRef Prefix = IsInit ? ".init" : ".del"; 10234 10235 // Evaluate if this is an array section. 10236 llvm::BasicBlock *BodyBB = 10237 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10238 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10239 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10240 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10241 MapType, 10242 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10243 llvm::Value *DeleteCond; 10244 llvm::Value *Cond; 10245 if (IsInit) { 10246 // base != begin? 10247 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 10248 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 10249 // IsPtrAndObj? 10250 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10251 MapType, 10252 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10253 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10254 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10255 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10256 DeleteCond = MapperCGF.Builder.CreateIsNull( 10257 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10258 } else { 10259 Cond = IsArray; 10260 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10261 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10262 } 10263 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10264 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10265 10266 MapperCGF.EmitBlock(BodyBB); 10267 // Get the array size by multiplying element size and element number (i.e., \p 10268 // Size). 10269 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10270 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10271 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10272 // memory allocation/deletion purpose only. 10273 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10274 MapType, 10275 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10276 MappableExprsHandler::OMP_MAP_FROM))); 10277 MapTypeArg = MapperCGF.Builder.CreateOr( 10278 MapTypeArg, 10279 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10280 10281 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10282 // data structure. 10283 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10284 ArraySize, MapTypeArg, MapName}; 10285 MapperCGF.EmitRuntimeCall( 10286 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10287 OMPRTL___tgt_push_mapper_component), 10288 OffloadingArgs); 10289 } 10290 10291 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10292 const OMPDeclareMapperDecl *D) { 10293 auto I = UDMMap.find(D); 10294 if (I != UDMMap.end()) 10295 return I->second; 10296 emitUserDefinedMapper(D); 10297 return UDMMap.lookup(D); 10298 } 10299 10300 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10301 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10302 llvm::Value *DeviceID, 10303 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10304 const OMPLoopDirective &D)> 10305 SizeEmitter) { 10306 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10307 const OMPExecutableDirective *TD = &D; 10308 // Get nested teams distribute kind directive, if any. 10309 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10310 TD = getNestedDistributeDirective(CGM.getContext(), D); 10311 if (!TD) 10312 return; 10313 const auto *LD = cast<OMPLoopDirective>(TD); 10314 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10315 PrePostActionTy &) { 10316 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10317 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10318 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10319 CGF.EmitRuntimeCall( 10320 OMPBuilder.getOrCreateRuntimeFunction( 10321 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10322 Args); 10323 } 10324 }; 10325 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10326 } 10327 10328 void CGOpenMPRuntime::emitTargetCall( 10329 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10330 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10331 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10332 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10333 const OMPLoopDirective &D)> 10334 SizeEmitter) { 10335 if (!CGF.HaveInsertPoint()) 10336 return; 10337 10338 assert(OutlinedFn && "Invalid outlined function!"); 10339 10340 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10341 D.hasClausesOfKind<OMPNowaitClause>(); 10342 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10343 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10344 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10345 PrePostActionTy &) { 10346 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10347 }; 10348 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10349 10350 CodeGenFunction::OMPTargetDataInfo InputInfo; 10351 llvm::Value *MapTypesArray = nullptr; 10352 llvm::Value *MapNamesArray = nullptr; 10353 // Fill up the pointer arrays and transfer execution to the device. 10354 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10355 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10356 &CapturedVars, 10357 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10358 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10359 // Reverse offloading is not supported, so just execute on the host. 10360 if (RequiresOuterTask) { 10361 CapturedVars.clear(); 10362 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10363 } 10364 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10365 return; 10366 } 10367 10368 // On top of the arrays that were filled up, the target offloading call 10369 // takes as arguments the device id as well as the host pointer. The host 10370 // pointer is used by the runtime library to identify the current target 10371 // region, so it only has to be unique and not necessarily point to 10372 // anything. It could be the pointer to the outlined function that 10373 // implements the target region, but we aren't using that so that the 10374 // compiler doesn't need to keep that, and could therefore inline the host 10375 // function if proven worthwhile during optimization. 10376 10377 // From this point on, we need to have an ID of the target region defined. 10378 assert(OutlinedFnID && "Invalid outlined function ID!"); 10379 10380 // Emit device ID if any. 10381 llvm::Value *DeviceID; 10382 if (Device.getPointer()) { 10383 assert((Device.getInt() == OMPC_DEVICE_unknown || 10384 Device.getInt() == OMPC_DEVICE_device_num) && 10385 "Expected device_num modifier."); 10386 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10387 DeviceID = 10388 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10389 } else { 10390 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10391 } 10392 10393 // Emit the number of elements in the offloading arrays. 10394 llvm::Value *PointerNum = 10395 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10396 10397 // Return value of the runtime offloading call. 10398 llvm::Value *Return; 10399 10400 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10401 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10402 10403 // Source location for the ident struct 10404 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10405 10406 // Emit tripcount for the target loop-based directive. 10407 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10408 10409 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10410 // The target region is an outlined function launched by the runtime 10411 // via calls __tgt_target() or __tgt_target_teams(). 10412 // 10413 // __tgt_target() launches a target region with one team and one thread, 10414 // executing a serial region. This master thread may in turn launch 10415 // more threads within its team upon encountering a parallel region, 10416 // however, no additional teams can be launched on the device. 10417 // 10418 // __tgt_target_teams() launches a target region with one or more teams, 10419 // each with one or more threads. This call is required for target 10420 // constructs such as: 10421 // 'target teams' 10422 // 'target' / 'teams' 10423 // 'target teams distribute parallel for' 10424 // 'target parallel' 10425 // and so on. 10426 // 10427 // Note that on the host and CPU targets, the runtime implementation of 10428 // these calls simply call the outlined function without forking threads. 10429 // The outlined functions themselves have runtime calls to 10430 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10431 // the compiler in emitTeamsCall() and emitParallelCall(). 10432 // 10433 // In contrast, on the NVPTX target, the implementation of 10434 // __tgt_target_teams() launches a GPU kernel with the requested number 10435 // of teams and threads so no additional calls to the runtime are required. 10436 if (NumTeams) { 10437 // If we have NumTeams defined this means that we have an enclosed teams 10438 // region. Therefore we also expect to have NumThreads defined. These two 10439 // values should be defined in the presence of a teams directive, 10440 // regardless of having any clauses associated. If the user is using teams 10441 // but no clauses, these two values will be the default that should be 10442 // passed to the runtime library - a 32-bit integer with the value zero. 10443 assert(NumThreads && "Thread limit expression should be available along " 10444 "with number of teams."); 10445 SmallVector<llvm::Value *> OffloadingArgs = { 10446 RTLoc, 10447 DeviceID, 10448 OutlinedFnID, 10449 PointerNum, 10450 InputInfo.BasePointersArray.getPointer(), 10451 InputInfo.PointersArray.getPointer(), 10452 InputInfo.SizesArray.getPointer(), 10453 MapTypesArray, 10454 MapNamesArray, 10455 InputInfo.MappersArray.getPointer(), 10456 NumTeams, 10457 NumThreads}; 10458 if (HasNowait) { 10459 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10460 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10461 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10462 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10463 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10464 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10465 } 10466 Return = CGF.EmitRuntimeCall( 10467 OMPBuilder.getOrCreateRuntimeFunction( 10468 CGM.getModule(), HasNowait 10469 ? OMPRTL___tgt_target_teams_nowait_mapper 10470 : OMPRTL___tgt_target_teams_mapper), 10471 OffloadingArgs); 10472 } else { 10473 SmallVector<llvm::Value *> OffloadingArgs = { 10474 RTLoc, 10475 DeviceID, 10476 OutlinedFnID, 10477 PointerNum, 10478 InputInfo.BasePointersArray.getPointer(), 10479 InputInfo.PointersArray.getPointer(), 10480 InputInfo.SizesArray.getPointer(), 10481 MapTypesArray, 10482 MapNamesArray, 10483 InputInfo.MappersArray.getPointer()}; 10484 if (HasNowait) { 10485 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10486 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10487 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10488 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10489 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10490 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10491 } 10492 Return = CGF.EmitRuntimeCall( 10493 OMPBuilder.getOrCreateRuntimeFunction( 10494 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10495 : OMPRTL___tgt_target_mapper), 10496 OffloadingArgs); 10497 } 10498 10499 // Check the error code and execute the host version if required. 10500 llvm::BasicBlock *OffloadFailedBlock = 10501 CGF.createBasicBlock("omp_offload.failed"); 10502 llvm::BasicBlock *OffloadContBlock = 10503 CGF.createBasicBlock("omp_offload.cont"); 10504 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10505 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10506 10507 CGF.EmitBlock(OffloadFailedBlock); 10508 if (RequiresOuterTask) { 10509 CapturedVars.clear(); 10510 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10511 } 10512 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10513 CGF.EmitBranch(OffloadContBlock); 10514 10515 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10516 }; 10517 10518 // Notify that the host version must be executed. 10519 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10520 RequiresOuterTask](CodeGenFunction &CGF, 10521 PrePostActionTy &) { 10522 if (RequiresOuterTask) { 10523 CapturedVars.clear(); 10524 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10525 } 10526 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10527 }; 10528 10529 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10530 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10531 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10532 // Fill up the arrays with all the captured variables. 10533 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10534 10535 // Get mappable expression information. 10536 MappableExprsHandler MEHandler(D, CGF); 10537 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10538 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10539 10540 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10541 auto *CV = CapturedVars.begin(); 10542 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10543 CE = CS.capture_end(); 10544 CI != CE; ++CI, ++RI, ++CV) { 10545 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10546 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10547 10548 // VLA sizes are passed to the outlined region by copy and do not have map 10549 // information associated. 10550 if (CI->capturesVariableArrayType()) { 10551 CurInfo.Exprs.push_back(nullptr); 10552 CurInfo.BasePointers.push_back(*CV); 10553 CurInfo.Pointers.push_back(*CV); 10554 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10555 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10556 // Copy to the device as an argument. No need to retrieve it. 10557 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10558 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10559 MappableExprsHandler::OMP_MAP_IMPLICIT); 10560 CurInfo.Mappers.push_back(nullptr); 10561 } else { 10562 // If we have any information in the map clause, we use it, otherwise we 10563 // just do a default mapping. 10564 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10565 if (!CI->capturesThis()) 10566 MappedVarSet.insert(CI->getCapturedVar()); 10567 else 10568 MappedVarSet.insert(nullptr); 10569 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10570 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10571 // Generate correct mapping for variables captured by reference in 10572 // lambdas. 10573 if (CI->capturesVariable()) 10574 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10575 CurInfo, LambdaPointers); 10576 } 10577 // We expect to have at least an element of information for this capture. 10578 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10579 "Non-existing map pointer for capture!"); 10580 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10581 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10582 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10583 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10584 "Inconsistent map information sizes!"); 10585 10586 // If there is an entry in PartialStruct it means we have a struct with 10587 // individual members mapped. Emit an extra combined entry. 10588 if (PartialStruct.Base.isValid()) { 10589 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10590 MEHandler.emitCombinedEntry( 10591 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10592 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10593 } 10594 10595 // We need to append the results of this capture to what we already have. 10596 CombinedInfo.append(CurInfo); 10597 } 10598 // Adjust MEMBER_OF flags for the lambdas captures. 10599 MEHandler.adjustMemberOfForLambdaCaptures( 10600 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10601 CombinedInfo.Types); 10602 // Map any list items in a map clause that were not captures because they 10603 // weren't referenced within the construct. 10604 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10605 10606 TargetDataInfo Info; 10607 // Fill up the arrays and create the arguments. 10608 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10609 emitOffloadingArraysArgument( 10610 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10611 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10612 {/*ForEndTask=*/false}); 10613 10614 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10615 InputInfo.BasePointersArray = 10616 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10617 InputInfo.PointersArray = 10618 Address(Info.PointersArray, CGM.getPointerAlign()); 10619 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10620 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10621 MapTypesArray = Info.MapTypesArray; 10622 MapNamesArray = Info.MapNamesArray; 10623 if (RequiresOuterTask) 10624 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10625 else 10626 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10627 }; 10628 10629 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10630 CodeGenFunction &CGF, PrePostActionTy &) { 10631 if (RequiresOuterTask) { 10632 CodeGenFunction::OMPTargetDataInfo InputInfo; 10633 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10634 } else { 10635 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10636 } 10637 }; 10638 10639 // If we have a target function ID it means that we need to support 10640 // offloading, otherwise, just execute on the host. We need to execute on host 10641 // regardless of the conditional in the if clause if, e.g., the user do not 10642 // specify target triples. 10643 if (OutlinedFnID) { 10644 if (IfCond) { 10645 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10646 } else { 10647 RegionCodeGenTy ThenRCG(TargetThenGen); 10648 ThenRCG(CGF); 10649 } 10650 } else { 10651 RegionCodeGenTy ElseRCG(TargetElseGen); 10652 ElseRCG(CGF); 10653 } 10654 } 10655 10656 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10657 StringRef ParentName) { 10658 if (!S) 10659 return; 10660 10661 // Codegen OMP target directives that offload compute to the device. 10662 bool RequiresDeviceCodegen = 10663 isa<OMPExecutableDirective>(S) && 10664 isOpenMPTargetExecutionDirective( 10665 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10666 10667 if (RequiresDeviceCodegen) { 10668 const auto &E = *cast<OMPExecutableDirective>(S); 10669 unsigned DeviceID; 10670 unsigned FileID; 10671 unsigned Line; 10672 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10673 FileID, Line); 10674 10675 // Is this a target region that should not be emitted as an entry point? If 10676 // so just signal we are done with this target region. 10677 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10678 ParentName, Line)) 10679 return; 10680 10681 switch (E.getDirectiveKind()) { 10682 case OMPD_target: 10683 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10684 cast<OMPTargetDirective>(E)); 10685 break; 10686 case OMPD_target_parallel: 10687 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10688 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10689 break; 10690 case OMPD_target_teams: 10691 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10692 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10693 break; 10694 case OMPD_target_teams_distribute: 10695 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10696 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10697 break; 10698 case OMPD_target_teams_distribute_simd: 10699 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10700 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10701 break; 10702 case OMPD_target_parallel_for: 10703 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10704 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10705 break; 10706 case OMPD_target_parallel_for_simd: 10707 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10708 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10709 break; 10710 case OMPD_target_simd: 10711 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10712 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10713 break; 10714 case OMPD_target_teams_distribute_parallel_for: 10715 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10716 CGM, ParentName, 10717 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10718 break; 10719 case OMPD_target_teams_distribute_parallel_for_simd: 10720 CodeGenFunction:: 10721 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10722 CGM, ParentName, 10723 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10724 break; 10725 case OMPD_parallel: 10726 case OMPD_for: 10727 case OMPD_parallel_for: 10728 case OMPD_parallel_master: 10729 case OMPD_parallel_sections: 10730 case OMPD_for_simd: 10731 case OMPD_parallel_for_simd: 10732 case OMPD_cancel: 10733 case OMPD_cancellation_point: 10734 case OMPD_ordered: 10735 case OMPD_threadprivate: 10736 case OMPD_allocate: 10737 case OMPD_task: 10738 case OMPD_simd: 10739 case OMPD_tile: 10740 case OMPD_unroll: 10741 case OMPD_sections: 10742 case OMPD_section: 10743 case OMPD_single: 10744 case OMPD_master: 10745 case OMPD_critical: 10746 case OMPD_taskyield: 10747 case OMPD_barrier: 10748 case OMPD_taskwait: 10749 case OMPD_taskgroup: 10750 case OMPD_atomic: 10751 case OMPD_flush: 10752 case OMPD_depobj: 10753 case OMPD_scan: 10754 case OMPD_teams: 10755 case OMPD_target_data: 10756 case OMPD_target_exit_data: 10757 case OMPD_target_enter_data: 10758 case OMPD_distribute: 10759 case OMPD_distribute_simd: 10760 case OMPD_distribute_parallel_for: 10761 case OMPD_distribute_parallel_for_simd: 10762 case OMPD_teams_distribute: 10763 case OMPD_teams_distribute_simd: 10764 case OMPD_teams_distribute_parallel_for: 10765 case OMPD_teams_distribute_parallel_for_simd: 10766 case OMPD_target_update: 10767 case OMPD_declare_simd: 10768 case OMPD_declare_variant: 10769 case OMPD_begin_declare_variant: 10770 case OMPD_end_declare_variant: 10771 case OMPD_declare_target: 10772 case OMPD_end_declare_target: 10773 case OMPD_declare_reduction: 10774 case OMPD_declare_mapper: 10775 case OMPD_taskloop: 10776 case OMPD_taskloop_simd: 10777 case OMPD_master_taskloop: 10778 case OMPD_master_taskloop_simd: 10779 case OMPD_parallel_master_taskloop: 10780 case OMPD_parallel_master_taskloop_simd: 10781 case OMPD_requires: 10782 case OMPD_metadirective: 10783 case OMPD_unknown: 10784 default: 10785 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10786 } 10787 return; 10788 } 10789 10790 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10791 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10792 return; 10793 10794 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10795 return; 10796 } 10797 10798 // If this is a lambda function, look into its body. 10799 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10800 S = L->getBody(); 10801 10802 // Keep looking for target regions recursively. 10803 for (const Stmt *II : S->children()) 10804 scanForTargetRegionsFunctions(II, ParentName); 10805 } 10806 10807 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10808 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10809 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10810 if (!DevTy) 10811 return false; 10812 // Do not emit device_type(nohost) functions for the host. 10813 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10814 return true; 10815 // Do not emit device_type(host) functions for the device. 10816 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10817 return true; 10818 return false; 10819 } 10820 10821 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10822 // If emitting code for the host, we do not process FD here. Instead we do 10823 // the normal code generation. 10824 if (!CGM.getLangOpts().OpenMPIsDevice) { 10825 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10826 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10827 CGM.getLangOpts().OpenMPIsDevice)) 10828 return true; 10829 return false; 10830 } 10831 10832 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10833 // Try to detect target regions in the function. 10834 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10835 StringRef Name = CGM.getMangledName(GD); 10836 scanForTargetRegionsFunctions(FD->getBody(), Name); 10837 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10838 CGM.getLangOpts().OpenMPIsDevice)) 10839 return true; 10840 } 10841 10842 // Do not to emit function if it is not marked as declare target. 10843 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10844 AlreadyEmittedTargetDecls.count(VD) == 0; 10845 } 10846 10847 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10848 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10849 CGM.getLangOpts().OpenMPIsDevice)) 10850 return true; 10851 10852 if (!CGM.getLangOpts().OpenMPIsDevice) 10853 return false; 10854 10855 // Check if there are Ctors/Dtors in this declaration and look for target 10856 // regions in it. We use the complete variant to produce the kernel name 10857 // mangling. 10858 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10859 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10860 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10861 StringRef ParentName = 10862 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10863 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10864 } 10865 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10866 StringRef ParentName = 10867 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10868 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10869 } 10870 } 10871 10872 // Do not to emit variable if it is not marked as declare target. 10873 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10874 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10875 cast<VarDecl>(GD.getDecl())); 10876 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10877 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10878 HasRequiresUnifiedSharedMemory)) { 10879 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10880 return true; 10881 } 10882 return false; 10883 } 10884 10885 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10886 llvm::Constant *Addr) { 10887 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10888 !CGM.getLangOpts().OpenMPIsDevice) 10889 return; 10890 10891 // If we have host/nohost variables, they do not need to be registered. 10892 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10893 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10894 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10895 return; 10896 10897 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10898 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10899 if (!Res) { 10900 if (CGM.getLangOpts().OpenMPIsDevice) { 10901 // Register non-target variables being emitted in device code (debug info 10902 // may cause this). 10903 StringRef VarName = CGM.getMangledName(VD); 10904 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10905 } 10906 return; 10907 } 10908 // Register declare target variables. 10909 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10910 StringRef VarName; 10911 CharUnits VarSize; 10912 llvm::GlobalValue::LinkageTypes Linkage; 10913 10914 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10915 !HasRequiresUnifiedSharedMemory) { 10916 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10917 VarName = CGM.getMangledName(VD); 10918 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10919 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10920 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10921 } else { 10922 VarSize = CharUnits::Zero(); 10923 } 10924 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10925 // Temp solution to prevent optimizations of the internal variables. 10926 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10927 // Do not create a "ref-variable" if the original is not also available 10928 // on the host. 10929 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10930 return; 10931 std::string RefName = getName({VarName, "ref"}); 10932 if (!CGM.GetGlobalValue(RefName)) { 10933 llvm::Constant *AddrRef = 10934 getOrCreateInternalVariable(Addr->getType(), RefName); 10935 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10936 GVAddrRef->setConstant(/*Val=*/true); 10937 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10938 GVAddrRef->setInitializer(Addr); 10939 CGM.addCompilerUsedGlobal(GVAddrRef); 10940 } 10941 } 10942 } else { 10943 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10944 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10945 HasRequiresUnifiedSharedMemory)) && 10946 "Declare target attribute must link or to with unified memory."); 10947 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10948 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10949 else 10950 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10951 10952 if (CGM.getLangOpts().OpenMPIsDevice) { 10953 VarName = Addr->getName(); 10954 Addr = nullptr; 10955 } else { 10956 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10957 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10958 } 10959 VarSize = CGM.getPointerSize(); 10960 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10961 } 10962 10963 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10964 VarName, Addr, VarSize, Flags, Linkage); 10965 } 10966 10967 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10968 if (isa<FunctionDecl>(GD.getDecl()) || 10969 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10970 return emitTargetFunctions(GD); 10971 10972 return emitTargetGlobalVariable(GD); 10973 } 10974 10975 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10976 for (const VarDecl *VD : DeferredGlobalVariables) { 10977 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10978 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10979 if (!Res) 10980 continue; 10981 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10982 !HasRequiresUnifiedSharedMemory) { 10983 CGM.EmitGlobal(VD); 10984 } else { 10985 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10986 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10987 HasRequiresUnifiedSharedMemory)) && 10988 "Expected link clause or to clause with unified memory."); 10989 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10990 } 10991 } 10992 } 10993 10994 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10995 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10996 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10997 " Expected target-based directive."); 10998 } 10999 11000 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 11001 for (const OMPClause *Clause : D->clauselists()) { 11002 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 11003 HasRequiresUnifiedSharedMemory = true; 11004 } else if (const auto *AC = 11005 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 11006 switch (AC->getAtomicDefaultMemOrderKind()) { 11007 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 11008 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 11009 break; 11010 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 11011 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 11012 break; 11013 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 11014 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 11015 break; 11016 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 11017 break; 11018 } 11019 } 11020 } 11021 } 11022 11023 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 11024 return RequiresAtomicOrdering; 11025 } 11026 11027 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11028 LangAS &AS) { 11029 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11030 return false; 11031 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11032 switch(A->getAllocatorType()) { 11033 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11034 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11035 // Not supported, fallback to the default mem space. 11036 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11037 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11038 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11039 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11040 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11041 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11042 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11043 AS = LangAS::Default; 11044 return true; 11045 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11046 llvm_unreachable("Expected predefined allocator for the variables with the " 11047 "static storage."); 11048 } 11049 return false; 11050 } 11051 11052 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11053 return HasRequiresUnifiedSharedMemory; 11054 } 11055 11056 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11057 CodeGenModule &CGM) 11058 : CGM(CGM) { 11059 if (CGM.getLangOpts().OpenMPIsDevice) { 11060 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11061 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11062 } 11063 } 11064 11065 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11066 if (CGM.getLangOpts().OpenMPIsDevice) 11067 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11068 } 11069 11070 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11071 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11072 return true; 11073 11074 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11075 // Do not to emit function if it is marked as declare target as it was already 11076 // emitted. 11077 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11078 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11079 if (auto *F = dyn_cast_or_null<llvm::Function>( 11080 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11081 return !F->isDeclaration(); 11082 return false; 11083 } 11084 return true; 11085 } 11086 11087 return !AlreadyEmittedTargetDecls.insert(D).second; 11088 } 11089 11090 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11091 // If we don't have entries or if we are emitting code for the device, we 11092 // don't need to do anything. 11093 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11094 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11095 (OffloadEntriesInfoManager.empty() && 11096 !HasEmittedDeclareTargetRegion && 11097 !HasEmittedTargetRegion)) 11098 return nullptr; 11099 11100 // Create and register the function that handles the requires directives. 11101 ASTContext &C = CGM.getContext(); 11102 11103 llvm::Function *RequiresRegFn; 11104 { 11105 CodeGenFunction CGF(CGM); 11106 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11107 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11108 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11109 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11110 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11111 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11112 // TODO: check for other requires clauses. 11113 // The requires directive takes effect only when a target region is 11114 // present in the compilation unit. Otherwise it is ignored and not 11115 // passed to the runtime. This avoids the runtime from throwing an error 11116 // for mismatching requires clauses across compilation units that don't 11117 // contain at least 1 target region. 11118 assert((HasEmittedTargetRegion || 11119 HasEmittedDeclareTargetRegion || 11120 !OffloadEntriesInfoManager.empty()) && 11121 "Target or declare target region expected."); 11122 if (HasRequiresUnifiedSharedMemory) 11123 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11124 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11125 CGM.getModule(), OMPRTL___tgt_register_requires), 11126 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11127 CGF.FinishFunction(); 11128 } 11129 return RequiresRegFn; 11130 } 11131 11132 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11133 const OMPExecutableDirective &D, 11134 SourceLocation Loc, 11135 llvm::Function *OutlinedFn, 11136 ArrayRef<llvm::Value *> CapturedVars) { 11137 if (!CGF.HaveInsertPoint()) 11138 return; 11139 11140 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11141 CodeGenFunction::RunCleanupsScope Scope(CGF); 11142 11143 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11144 llvm::Value *Args[] = { 11145 RTLoc, 11146 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11147 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11148 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11149 RealArgs.append(std::begin(Args), std::end(Args)); 11150 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11151 11152 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11153 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11154 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11155 } 11156 11157 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11158 const Expr *NumTeams, 11159 const Expr *ThreadLimit, 11160 SourceLocation Loc) { 11161 if (!CGF.HaveInsertPoint()) 11162 return; 11163 11164 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11165 11166 llvm::Value *NumTeamsVal = 11167 NumTeams 11168 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11169 CGF.CGM.Int32Ty, /* isSigned = */ true) 11170 : CGF.Builder.getInt32(0); 11171 11172 llvm::Value *ThreadLimitVal = 11173 ThreadLimit 11174 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11175 CGF.CGM.Int32Ty, /* isSigned = */ true) 11176 : CGF.Builder.getInt32(0); 11177 11178 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11179 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11180 ThreadLimitVal}; 11181 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11182 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11183 PushNumTeamsArgs); 11184 } 11185 11186 void CGOpenMPRuntime::emitTargetDataCalls( 11187 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11188 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11189 if (!CGF.HaveInsertPoint()) 11190 return; 11191 11192 // Action used to replace the default codegen action and turn privatization 11193 // off. 11194 PrePostActionTy NoPrivAction; 11195 11196 // Generate the code for the opening of the data environment. Capture all the 11197 // arguments of the runtime call by reference because they are used in the 11198 // closing of the region. 11199 auto &&BeginThenGen = [this, &D, Device, &Info, 11200 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11201 // Fill up the arrays with all the mapped variables. 11202 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11203 11204 // Get map clause information. 11205 MappableExprsHandler MEHandler(D, CGF); 11206 MEHandler.generateAllInfo(CombinedInfo); 11207 11208 // Fill up the arrays and create the arguments. 11209 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11210 /*IsNonContiguous=*/true); 11211 11212 llvm::Value *BasePointersArrayArg = nullptr; 11213 llvm::Value *PointersArrayArg = nullptr; 11214 llvm::Value *SizesArrayArg = nullptr; 11215 llvm::Value *MapTypesArrayArg = nullptr; 11216 llvm::Value *MapNamesArrayArg = nullptr; 11217 llvm::Value *MappersArrayArg = nullptr; 11218 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11219 SizesArrayArg, MapTypesArrayArg, 11220 MapNamesArrayArg, MappersArrayArg, Info); 11221 11222 // Emit device ID if any. 11223 llvm::Value *DeviceID = nullptr; 11224 if (Device) { 11225 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11226 CGF.Int64Ty, /*isSigned=*/true); 11227 } else { 11228 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11229 } 11230 11231 // Emit the number of elements in the offloading arrays. 11232 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11233 // 11234 // Source location for the ident struct 11235 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11236 11237 llvm::Value *OffloadingArgs[] = {RTLoc, 11238 DeviceID, 11239 PointerNum, 11240 BasePointersArrayArg, 11241 PointersArrayArg, 11242 SizesArrayArg, 11243 MapTypesArrayArg, 11244 MapNamesArrayArg, 11245 MappersArrayArg}; 11246 CGF.EmitRuntimeCall( 11247 OMPBuilder.getOrCreateRuntimeFunction( 11248 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11249 OffloadingArgs); 11250 11251 // If device pointer privatization is required, emit the body of the region 11252 // here. It will have to be duplicated: with and without privatization. 11253 if (!Info.CaptureDeviceAddrMap.empty()) 11254 CodeGen(CGF); 11255 }; 11256 11257 // Generate code for the closing of the data region. 11258 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11259 PrePostActionTy &) { 11260 assert(Info.isValid() && "Invalid data environment closing arguments."); 11261 11262 llvm::Value *BasePointersArrayArg = nullptr; 11263 llvm::Value *PointersArrayArg = nullptr; 11264 llvm::Value *SizesArrayArg = nullptr; 11265 llvm::Value *MapTypesArrayArg = nullptr; 11266 llvm::Value *MapNamesArrayArg = nullptr; 11267 llvm::Value *MappersArrayArg = nullptr; 11268 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11269 SizesArrayArg, MapTypesArrayArg, 11270 MapNamesArrayArg, MappersArrayArg, Info, 11271 {/*ForEndCall=*/true}); 11272 11273 // Emit device ID if any. 11274 llvm::Value *DeviceID = nullptr; 11275 if (Device) { 11276 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11277 CGF.Int64Ty, /*isSigned=*/true); 11278 } else { 11279 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11280 } 11281 11282 // Emit the number of elements in the offloading arrays. 11283 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11284 11285 // Source location for the ident struct 11286 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11287 11288 llvm::Value *OffloadingArgs[] = {RTLoc, 11289 DeviceID, 11290 PointerNum, 11291 BasePointersArrayArg, 11292 PointersArrayArg, 11293 SizesArrayArg, 11294 MapTypesArrayArg, 11295 MapNamesArrayArg, 11296 MappersArrayArg}; 11297 CGF.EmitRuntimeCall( 11298 OMPBuilder.getOrCreateRuntimeFunction( 11299 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11300 OffloadingArgs); 11301 }; 11302 11303 // If we need device pointer privatization, we need to emit the body of the 11304 // region with no privatization in the 'else' branch of the conditional. 11305 // Otherwise, we don't have to do anything. 11306 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11307 PrePostActionTy &) { 11308 if (!Info.CaptureDeviceAddrMap.empty()) { 11309 CodeGen.setAction(NoPrivAction); 11310 CodeGen(CGF); 11311 } 11312 }; 11313 11314 // We don't have to do anything to close the region if the if clause evaluates 11315 // to false. 11316 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11317 11318 if (IfCond) { 11319 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11320 } else { 11321 RegionCodeGenTy RCG(BeginThenGen); 11322 RCG(CGF); 11323 } 11324 11325 // If we don't require privatization of device pointers, we emit the body in 11326 // between the runtime calls. This avoids duplicating the body code. 11327 if (Info.CaptureDeviceAddrMap.empty()) { 11328 CodeGen.setAction(NoPrivAction); 11329 CodeGen(CGF); 11330 } 11331 11332 if (IfCond) { 11333 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11334 } else { 11335 RegionCodeGenTy RCG(EndThenGen); 11336 RCG(CGF); 11337 } 11338 } 11339 11340 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11341 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11342 const Expr *Device) { 11343 if (!CGF.HaveInsertPoint()) 11344 return; 11345 11346 assert((isa<OMPTargetEnterDataDirective>(D) || 11347 isa<OMPTargetExitDataDirective>(D) || 11348 isa<OMPTargetUpdateDirective>(D)) && 11349 "Expecting either target enter, exit data, or update directives."); 11350 11351 CodeGenFunction::OMPTargetDataInfo InputInfo; 11352 llvm::Value *MapTypesArray = nullptr; 11353 llvm::Value *MapNamesArray = nullptr; 11354 // Generate the code for the opening of the data environment. 11355 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11356 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11357 // Emit device ID if any. 11358 llvm::Value *DeviceID = nullptr; 11359 if (Device) { 11360 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11361 CGF.Int64Ty, /*isSigned=*/true); 11362 } else { 11363 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11364 } 11365 11366 // Emit the number of elements in the offloading arrays. 11367 llvm::Constant *PointerNum = 11368 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11369 11370 // Source location for the ident struct 11371 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11372 11373 llvm::Value *OffloadingArgs[] = {RTLoc, 11374 DeviceID, 11375 PointerNum, 11376 InputInfo.BasePointersArray.getPointer(), 11377 InputInfo.PointersArray.getPointer(), 11378 InputInfo.SizesArray.getPointer(), 11379 MapTypesArray, 11380 MapNamesArray, 11381 InputInfo.MappersArray.getPointer()}; 11382 11383 // Select the right runtime function call for each standalone 11384 // directive. 11385 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11386 RuntimeFunction RTLFn; 11387 switch (D.getDirectiveKind()) { 11388 case OMPD_target_enter_data: 11389 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11390 : OMPRTL___tgt_target_data_begin_mapper; 11391 break; 11392 case OMPD_target_exit_data: 11393 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11394 : OMPRTL___tgt_target_data_end_mapper; 11395 break; 11396 case OMPD_target_update: 11397 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11398 : OMPRTL___tgt_target_data_update_mapper; 11399 break; 11400 case OMPD_parallel: 11401 case OMPD_for: 11402 case OMPD_parallel_for: 11403 case OMPD_parallel_master: 11404 case OMPD_parallel_sections: 11405 case OMPD_for_simd: 11406 case OMPD_parallel_for_simd: 11407 case OMPD_cancel: 11408 case OMPD_cancellation_point: 11409 case OMPD_ordered: 11410 case OMPD_threadprivate: 11411 case OMPD_allocate: 11412 case OMPD_task: 11413 case OMPD_simd: 11414 case OMPD_tile: 11415 case OMPD_unroll: 11416 case OMPD_sections: 11417 case OMPD_section: 11418 case OMPD_single: 11419 case OMPD_master: 11420 case OMPD_critical: 11421 case OMPD_taskyield: 11422 case OMPD_barrier: 11423 case OMPD_taskwait: 11424 case OMPD_taskgroup: 11425 case OMPD_atomic: 11426 case OMPD_flush: 11427 case OMPD_depobj: 11428 case OMPD_scan: 11429 case OMPD_teams: 11430 case OMPD_target_data: 11431 case OMPD_distribute: 11432 case OMPD_distribute_simd: 11433 case OMPD_distribute_parallel_for: 11434 case OMPD_distribute_parallel_for_simd: 11435 case OMPD_teams_distribute: 11436 case OMPD_teams_distribute_simd: 11437 case OMPD_teams_distribute_parallel_for: 11438 case OMPD_teams_distribute_parallel_for_simd: 11439 case OMPD_declare_simd: 11440 case OMPD_declare_variant: 11441 case OMPD_begin_declare_variant: 11442 case OMPD_end_declare_variant: 11443 case OMPD_declare_target: 11444 case OMPD_end_declare_target: 11445 case OMPD_declare_reduction: 11446 case OMPD_declare_mapper: 11447 case OMPD_taskloop: 11448 case OMPD_taskloop_simd: 11449 case OMPD_master_taskloop: 11450 case OMPD_master_taskloop_simd: 11451 case OMPD_parallel_master_taskloop: 11452 case OMPD_parallel_master_taskloop_simd: 11453 case OMPD_target: 11454 case OMPD_target_simd: 11455 case OMPD_target_teams_distribute: 11456 case OMPD_target_teams_distribute_simd: 11457 case OMPD_target_teams_distribute_parallel_for: 11458 case OMPD_target_teams_distribute_parallel_for_simd: 11459 case OMPD_target_teams: 11460 case OMPD_target_parallel: 11461 case OMPD_target_parallel_for: 11462 case OMPD_target_parallel_for_simd: 11463 case OMPD_requires: 11464 case OMPD_metadirective: 11465 case OMPD_unknown: 11466 default: 11467 llvm_unreachable("Unexpected standalone target data directive."); 11468 break; 11469 } 11470 CGF.EmitRuntimeCall( 11471 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11472 OffloadingArgs); 11473 }; 11474 11475 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11476 &MapNamesArray](CodeGenFunction &CGF, 11477 PrePostActionTy &) { 11478 // Fill up the arrays with all the mapped variables. 11479 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11480 11481 // Get map clause information. 11482 MappableExprsHandler MEHandler(D, CGF); 11483 MEHandler.generateAllInfo(CombinedInfo); 11484 11485 TargetDataInfo Info; 11486 // Fill up the arrays and create the arguments. 11487 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11488 /*IsNonContiguous=*/true); 11489 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11490 D.hasClausesOfKind<OMPNowaitClause>(); 11491 emitOffloadingArraysArgument( 11492 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11493 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11494 {/*ForEndTask=*/false}); 11495 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11496 InputInfo.BasePointersArray = 11497 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11498 InputInfo.PointersArray = 11499 Address(Info.PointersArray, CGM.getPointerAlign()); 11500 InputInfo.SizesArray = 11501 Address(Info.SizesArray, CGM.getPointerAlign()); 11502 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11503 MapTypesArray = Info.MapTypesArray; 11504 MapNamesArray = Info.MapNamesArray; 11505 if (RequiresOuterTask) 11506 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11507 else 11508 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11509 }; 11510 11511 if (IfCond) { 11512 emitIfClause(CGF, IfCond, TargetThenGen, 11513 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11514 } else { 11515 RegionCodeGenTy ThenRCG(TargetThenGen); 11516 ThenRCG(CGF); 11517 } 11518 } 11519 11520 namespace { 11521 /// Kind of parameter in a function with 'declare simd' directive. 11522 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11523 /// Attribute set of the parameter. 11524 struct ParamAttrTy { 11525 ParamKindTy Kind = Vector; 11526 llvm::APSInt StrideOrArg; 11527 llvm::APSInt Alignment; 11528 }; 11529 } // namespace 11530 11531 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11532 ArrayRef<ParamAttrTy> ParamAttrs) { 11533 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11534 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11535 // of that clause. The VLEN value must be power of 2. 11536 // In other case the notion of the function`s "characteristic data type" (CDT) 11537 // is used to compute the vector length. 11538 // CDT is defined in the following order: 11539 // a) For non-void function, the CDT is the return type. 11540 // b) If the function has any non-uniform, non-linear parameters, then the 11541 // CDT is the type of the first such parameter. 11542 // c) If the CDT determined by a) or b) above is struct, union, or class 11543 // type which is pass-by-value (except for the type that maps to the 11544 // built-in complex data type), the characteristic data type is int. 11545 // d) If none of the above three cases is applicable, the CDT is int. 11546 // The VLEN is then determined based on the CDT and the size of vector 11547 // register of that ISA for which current vector version is generated. The 11548 // VLEN is computed using the formula below: 11549 // VLEN = sizeof(vector_register) / sizeof(CDT), 11550 // where vector register size specified in section 3.2.1 Registers and the 11551 // Stack Frame of original AMD64 ABI document. 11552 QualType RetType = FD->getReturnType(); 11553 if (RetType.isNull()) 11554 return 0; 11555 ASTContext &C = FD->getASTContext(); 11556 QualType CDT; 11557 if (!RetType.isNull() && !RetType->isVoidType()) { 11558 CDT = RetType; 11559 } else { 11560 unsigned Offset = 0; 11561 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11562 if (ParamAttrs[Offset].Kind == Vector) 11563 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11564 ++Offset; 11565 } 11566 if (CDT.isNull()) { 11567 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11568 if (ParamAttrs[I + Offset].Kind == Vector) { 11569 CDT = FD->getParamDecl(I)->getType(); 11570 break; 11571 } 11572 } 11573 } 11574 } 11575 if (CDT.isNull()) 11576 CDT = C.IntTy; 11577 CDT = CDT->getCanonicalTypeUnqualified(); 11578 if (CDT->isRecordType() || CDT->isUnionType()) 11579 CDT = C.IntTy; 11580 return C.getTypeSize(CDT); 11581 } 11582 11583 static void 11584 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11585 const llvm::APSInt &VLENVal, 11586 ArrayRef<ParamAttrTy> ParamAttrs, 11587 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11588 struct ISADataTy { 11589 char ISA; 11590 unsigned VecRegSize; 11591 }; 11592 ISADataTy ISAData[] = { 11593 { 11594 'b', 128 11595 }, // SSE 11596 { 11597 'c', 256 11598 }, // AVX 11599 { 11600 'd', 256 11601 }, // AVX2 11602 { 11603 'e', 512 11604 }, // AVX512 11605 }; 11606 llvm::SmallVector<char, 2> Masked; 11607 switch (State) { 11608 case OMPDeclareSimdDeclAttr::BS_Undefined: 11609 Masked.push_back('N'); 11610 Masked.push_back('M'); 11611 break; 11612 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11613 Masked.push_back('N'); 11614 break; 11615 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11616 Masked.push_back('M'); 11617 break; 11618 } 11619 for (char Mask : Masked) { 11620 for (const ISADataTy &Data : ISAData) { 11621 SmallString<256> Buffer; 11622 llvm::raw_svector_ostream Out(Buffer); 11623 Out << "_ZGV" << Data.ISA << Mask; 11624 if (!VLENVal) { 11625 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11626 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11627 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11628 } else { 11629 Out << VLENVal; 11630 } 11631 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11632 switch (ParamAttr.Kind){ 11633 case LinearWithVarStride: 11634 Out << 's' << ParamAttr.StrideOrArg; 11635 break; 11636 case Linear: 11637 Out << 'l'; 11638 if (ParamAttr.StrideOrArg != 1) 11639 Out << ParamAttr.StrideOrArg; 11640 break; 11641 case Uniform: 11642 Out << 'u'; 11643 break; 11644 case Vector: 11645 Out << 'v'; 11646 break; 11647 } 11648 if (!!ParamAttr.Alignment) 11649 Out << 'a' << ParamAttr.Alignment; 11650 } 11651 Out << '_' << Fn->getName(); 11652 Fn->addFnAttr(Out.str()); 11653 } 11654 } 11655 } 11656 11657 // This are the Functions that are needed to mangle the name of the 11658 // vector functions generated by the compiler, according to the rules 11659 // defined in the "Vector Function ABI specifications for AArch64", 11660 // available at 11661 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11662 11663 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11664 /// 11665 /// TODO: Need to implement the behavior for reference marked with a 11666 /// var or no linear modifiers (1.b in the section). For this, we 11667 /// need to extend ParamKindTy to support the linear modifiers. 11668 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11669 QT = QT.getCanonicalType(); 11670 11671 if (QT->isVoidType()) 11672 return false; 11673 11674 if (Kind == ParamKindTy::Uniform) 11675 return false; 11676 11677 if (Kind == ParamKindTy::Linear) 11678 return false; 11679 11680 // TODO: Handle linear references with modifiers 11681 11682 if (Kind == ParamKindTy::LinearWithVarStride) 11683 return false; 11684 11685 return true; 11686 } 11687 11688 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11689 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11690 QT = QT.getCanonicalType(); 11691 unsigned Size = C.getTypeSize(QT); 11692 11693 // Only scalars and complex within 16 bytes wide set PVB to true. 11694 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11695 return false; 11696 11697 if (QT->isFloatingType()) 11698 return true; 11699 11700 if (QT->isIntegerType()) 11701 return true; 11702 11703 if (QT->isPointerType()) 11704 return true; 11705 11706 // TODO: Add support for complex types (section 3.1.2, item 2). 11707 11708 return false; 11709 } 11710 11711 /// Computes the lane size (LS) of a return type or of an input parameter, 11712 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11713 /// TODO: Add support for references, section 3.2.1, item 1. 11714 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11715 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11716 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11717 if (getAArch64PBV(PTy, C)) 11718 return C.getTypeSize(PTy); 11719 } 11720 if (getAArch64PBV(QT, C)) 11721 return C.getTypeSize(QT); 11722 11723 return C.getTypeSize(C.getUIntPtrType()); 11724 } 11725 11726 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11727 // signature of the scalar function, as defined in 3.2.2 of the 11728 // AAVFABI. 11729 static std::tuple<unsigned, unsigned, bool> 11730 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11731 QualType RetType = FD->getReturnType().getCanonicalType(); 11732 11733 ASTContext &C = FD->getASTContext(); 11734 11735 bool OutputBecomesInput = false; 11736 11737 llvm::SmallVector<unsigned, 8> Sizes; 11738 if (!RetType->isVoidType()) { 11739 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11740 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11741 OutputBecomesInput = true; 11742 } 11743 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11744 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11745 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11746 } 11747 11748 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11749 // The LS of a function parameter / return value can only be a power 11750 // of 2, starting from 8 bits, up to 128. 11751 assert(llvm::all_of(Sizes, 11752 [](unsigned Size) { 11753 return Size == 8 || Size == 16 || Size == 32 || 11754 Size == 64 || Size == 128; 11755 }) && 11756 "Invalid size"); 11757 11758 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11759 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11760 OutputBecomesInput); 11761 } 11762 11763 /// Mangle the parameter part of the vector function name according to 11764 /// their OpenMP classification. The mangling function is defined in 11765 /// section 3.5 of the AAVFABI. 11766 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11767 SmallString<256> Buffer; 11768 llvm::raw_svector_ostream Out(Buffer); 11769 for (const auto &ParamAttr : ParamAttrs) { 11770 switch (ParamAttr.Kind) { 11771 case LinearWithVarStride: 11772 Out << "ls" << ParamAttr.StrideOrArg; 11773 break; 11774 case Linear: 11775 Out << 'l'; 11776 // Don't print the step value if it is not present or if it is 11777 // equal to 1. 11778 if (ParamAttr.StrideOrArg != 1) 11779 Out << ParamAttr.StrideOrArg; 11780 break; 11781 case Uniform: 11782 Out << 'u'; 11783 break; 11784 case Vector: 11785 Out << 'v'; 11786 break; 11787 } 11788 11789 if (!!ParamAttr.Alignment) 11790 Out << 'a' << ParamAttr.Alignment; 11791 } 11792 11793 return std::string(Out.str()); 11794 } 11795 11796 // Function used to add the attribute. The parameter `VLEN` is 11797 // templated to allow the use of "x" when targeting scalable functions 11798 // for SVE. 11799 template <typename T> 11800 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11801 char ISA, StringRef ParSeq, 11802 StringRef MangledName, bool OutputBecomesInput, 11803 llvm::Function *Fn) { 11804 SmallString<256> Buffer; 11805 llvm::raw_svector_ostream Out(Buffer); 11806 Out << Prefix << ISA << LMask << VLEN; 11807 if (OutputBecomesInput) 11808 Out << "v"; 11809 Out << ParSeq << "_" << MangledName; 11810 Fn->addFnAttr(Out.str()); 11811 } 11812 11813 // Helper function to generate the Advanced SIMD names depending on 11814 // the value of the NDS when simdlen is not present. 11815 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11816 StringRef Prefix, char ISA, 11817 StringRef ParSeq, StringRef MangledName, 11818 bool OutputBecomesInput, 11819 llvm::Function *Fn) { 11820 switch (NDS) { 11821 case 8: 11822 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11823 OutputBecomesInput, Fn); 11824 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11825 OutputBecomesInput, Fn); 11826 break; 11827 case 16: 11828 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11829 OutputBecomesInput, Fn); 11830 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11831 OutputBecomesInput, Fn); 11832 break; 11833 case 32: 11834 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11835 OutputBecomesInput, Fn); 11836 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11837 OutputBecomesInput, Fn); 11838 break; 11839 case 64: 11840 case 128: 11841 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11842 OutputBecomesInput, Fn); 11843 break; 11844 default: 11845 llvm_unreachable("Scalar type is too wide."); 11846 } 11847 } 11848 11849 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11850 static void emitAArch64DeclareSimdFunction( 11851 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11852 ArrayRef<ParamAttrTy> ParamAttrs, 11853 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11854 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11855 11856 // Get basic data for building the vector signature. 11857 const auto Data = getNDSWDS(FD, ParamAttrs); 11858 const unsigned NDS = std::get<0>(Data); 11859 const unsigned WDS = std::get<1>(Data); 11860 const bool OutputBecomesInput = std::get<2>(Data); 11861 11862 // Check the values provided via `simdlen` by the user. 11863 // 1. A `simdlen(1)` doesn't produce vector signatures, 11864 if (UserVLEN == 1) { 11865 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11866 DiagnosticsEngine::Warning, 11867 "The clause simdlen(1) has no effect when targeting aarch64."); 11868 CGM.getDiags().Report(SLoc, DiagID); 11869 return; 11870 } 11871 11872 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11873 // Advanced SIMD output. 11874 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11875 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11876 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11877 "power of 2 when targeting Advanced SIMD."); 11878 CGM.getDiags().Report(SLoc, DiagID); 11879 return; 11880 } 11881 11882 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11883 // limits. 11884 if (ISA == 's' && UserVLEN != 0) { 11885 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11886 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11887 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11888 "lanes in the architectural constraints " 11889 "for SVE (min is 128-bit, max is " 11890 "2048-bit, by steps of 128-bit)"); 11891 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11892 return; 11893 } 11894 } 11895 11896 // Sort out parameter sequence. 11897 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11898 StringRef Prefix = "_ZGV"; 11899 // Generate simdlen from user input (if any). 11900 if (UserVLEN) { 11901 if (ISA == 's') { 11902 // SVE generates only a masked function. 11903 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11904 OutputBecomesInput, Fn); 11905 } else { 11906 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11907 // Advanced SIMD generates one or two functions, depending on 11908 // the `[not]inbranch` clause. 11909 switch (State) { 11910 case OMPDeclareSimdDeclAttr::BS_Undefined: 11911 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11912 OutputBecomesInput, Fn); 11913 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11914 OutputBecomesInput, Fn); 11915 break; 11916 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11917 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11918 OutputBecomesInput, Fn); 11919 break; 11920 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11921 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11922 OutputBecomesInput, Fn); 11923 break; 11924 } 11925 } 11926 } else { 11927 // If no user simdlen is provided, follow the AAVFABI rules for 11928 // generating the vector length. 11929 if (ISA == 's') { 11930 // SVE, section 3.4.1, item 1. 11931 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11932 OutputBecomesInput, Fn); 11933 } else { 11934 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11935 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11936 // two vector names depending on the use of the clause 11937 // `[not]inbranch`. 11938 switch (State) { 11939 case OMPDeclareSimdDeclAttr::BS_Undefined: 11940 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11941 OutputBecomesInput, Fn); 11942 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11943 OutputBecomesInput, Fn); 11944 break; 11945 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11946 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11947 OutputBecomesInput, Fn); 11948 break; 11949 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11950 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11951 OutputBecomesInput, Fn); 11952 break; 11953 } 11954 } 11955 } 11956 } 11957 11958 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11959 llvm::Function *Fn) { 11960 ASTContext &C = CGM.getContext(); 11961 FD = FD->getMostRecentDecl(); 11962 // Map params to their positions in function decl. 11963 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11964 if (isa<CXXMethodDecl>(FD)) 11965 ParamPositions.try_emplace(FD, 0); 11966 unsigned ParamPos = ParamPositions.size(); 11967 for (const ParmVarDecl *P : FD->parameters()) { 11968 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11969 ++ParamPos; 11970 } 11971 while (FD) { 11972 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11973 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11974 // Mark uniform parameters. 11975 for (const Expr *E : Attr->uniforms()) { 11976 E = E->IgnoreParenImpCasts(); 11977 unsigned Pos; 11978 if (isa<CXXThisExpr>(E)) { 11979 Pos = ParamPositions[FD]; 11980 } else { 11981 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11982 ->getCanonicalDecl(); 11983 Pos = ParamPositions[PVD]; 11984 } 11985 ParamAttrs[Pos].Kind = Uniform; 11986 } 11987 // Get alignment info. 11988 auto NI = Attr->alignments_begin(); 11989 for (const Expr *E : Attr->aligneds()) { 11990 E = E->IgnoreParenImpCasts(); 11991 unsigned Pos; 11992 QualType ParmTy; 11993 if (isa<CXXThisExpr>(E)) { 11994 Pos = ParamPositions[FD]; 11995 ParmTy = E->getType(); 11996 } else { 11997 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11998 ->getCanonicalDecl(); 11999 Pos = ParamPositions[PVD]; 12000 ParmTy = PVD->getType(); 12001 } 12002 ParamAttrs[Pos].Alignment = 12003 (*NI) 12004 ? (*NI)->EvaluateKnownConstInt(C) 12005 : llvm::APSInt::getUnsigned( 12006 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 12007 .getQuantity()); 12008 ++NI; 12009 } 12010 // Mark linear parameters. 12011 auto SI = Attr->steps_begin(); 12012 auto MI = Attr->modifiers_begin(); 12013 for (const Expr *E : Attr->linears()) { 12014 E = E->IgnoreParenImpCasts(); 12015 unsigned Pos; 12016 // Rescaling factor needed to compute the linear parameter 12017 // value in the mangled name. 12018 unsigned PtrRescalingFactor = 1; 12019 if (isa<CXXThisExpr>(E)) { 12020 Pos = ParamPositions[FD]; 12021 } else { 12022 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12023 ->getCanonicalDecl(); 12024 Pos = ParamPositions[PVD]; 12025 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 12026 PtrRescalingFactor = CGM.getContext() 12027 .getTypeSizeInChars(P->getPointeeType()) 12028 .getQuantity(); 12029 } 12030 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12031 ParamAttr.Kind = Linear; 12032 // Assuming a stride of 1, for `linear` without modifiers. 12033 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12034 if (*SI) { 12035 Expr::EvalResult Result; 12036 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12037 if (const auto *DRE = 12038 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12039 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 12040 ParamAttr.Kind = LinearWithVarStride; 12041 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 12042 ParamPositions[StridePVD->getCanonicalDecl()]); 12043 } 12044 } 12045 } else { 12046 ParamAttr.StrideOrArg = Result.Val.getInt(); 12047 } 12048 } 12049 // If we are using a linear clause on a pointer, we need to 12050 // rescale the value of linear_step with the byte size of the 12051 // pointee type. 12052 if (Linear == ParamAttr.Kind) 12053 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12054 ++SI; 12055 ++MI; 12056 } 12057 llvm::APSInt VLENVal; 12058 SourceLocation ExprLoc; 12059 const Expr *VLENExpr = Attr->getSimdlen(); 12060 if (VLENExpr) { 12061 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12062 ExprLoc = VLENExpr->getExprLoc(); 12063 } 12064 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12065 if (CGM.getTriple().isX86()) { 12066 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12067 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12068 unsigned VLEN = VLENVal.getExtValue(); 12069 StringRef MangledName = Fn->getName(); 12070 if (CGM.getTarget().hasFeature("sve")) 12071 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12072 MangledName, 's', 128, Fn, ExprLoc); 12073 if (CGM.getTarget().hasFeature("neon")) 12074 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12075 MangledName, 'n', 128, Fn, ExprLoc); 12076 } 12077 } 12078 FD = FD->getPreviousDecl(); 12079 } 12080 } 12081 12082 namespace { 12083 /// Cleanup action for doacross support. 12084 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12085 public: 12086 static const int DoacrossFinArgs = 2; 12087 12088 private: 12089 llvm::FunctionCallee RTLFn; 12090 llvm::Value *Args[DoacrossFinArgs]; 12091 12092 public: 12093 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12094 ArrayRef<llvm::Value *> CallArgs) 12095 : RTLFn(RTLFn) { 12096 assert(CallArgs.size() == DoacrossFinArgs); 12097 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12098 } 12099 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12100 if (!CGF.HaveInsertPoint()) 12101 return; 12102 CGF.EmitRuntimeCall(RTLFn, Args); 12103 } 12104 }; 12105 } // namespace 12106 12107 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12108 const OMPLoopDirective &D, 12109 ArrayRef<Expr *> NumIterations) { 12110 if (!CGF.HaveInsertPoint()) 12111 return; 12112 12113 ASTContext &C = CGM.getContext(); 12114 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12115 RecordDecl *RD; 12116 if (KmpDimTy.isNull()) { 12117 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12118 // kmp_int64 lo; // lower 12119 // kmp_int64 up; // upper 12120 // kmp_int64 st; // stride 12121 // }; 12122 RD = C.buildImplicitRecord("kmp_dim"); 12123 RD->startDefinition(); 12124 addFieldToRecordDecl(C, RD, Int64Ty); 12125 addFieldToRecordDecl(C, RD, Int64Ty); 12126 addFieldToRecordDecl(C, RD, Int64Ty); 12127 RD->completeDefinition(); 12128 KmpDimTy = C.getRecordType(RD); 12129 } else { 12130 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12131 } 12132 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12133 QualType ArrayTy = 12134 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12135 12136 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12137 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12138 enum { LowerFD = 0, UpperFD, StrideFD }; 12139 // Fill dims with data. 12140 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12141 LValue DimsLVal = CGF.MakeAddrLValue( 12142 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12143 // dims.upper = num_iterations; 12144 LValue UpperLVal = CGF.EmitLValueForField( 12145 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12146 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12147 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12148 Int64Ty, NumIterations[I]->getExprLoc()); 12149 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12150 // dims.stride = 1; 12151 LValue StrideLVal = CGF.EmitLValueForField( 12152 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12153 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12154 StrideLVal); 12155 } 12156 12157 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12158 // kmp_int32 num_dims, struct kmp_dim * dims); 12159 llvm::Value *Args[] = { 12160 emitUpdateLocation(CGF, D.getBeginLoc()), 12161 getThreadID(CGF, D.getBeginLoc()), 12162 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12163 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12164 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12165 CGM.VoidPtrTy)}; 12166 12167 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12168 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12169 CGF.EmitRuntimeCall(RTLFn, Args); 12170 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12171 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12172 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12173 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12174 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12175 llvm::makeArrayRef(FiniArgs)); 12176 } 12177 12178 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12179 const OMPDependClause *C) { 12180 QualType Int64Ty = 12181 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12182 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12183 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12184 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12185 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12186 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12187 const Expr *CounterVal = C->getLoopData(I); 12188 assert(CounterVal); 12189 llvm::Value *CntVal = CGF.EmitScalarConversion( 12190 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12191 CounterVal->getExprLoc()); 12192 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12193 /*Volatile=*/false, Int64Ty); 12194 } 12195 llvm::Value *Args[] = { 12196 emitUpdateLocation(CGF, C->getBeginLoc()), 12197 getThreadID(CGF, C->getBeginLoc()), 12198 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12199 llvm::FunctionCallee RTLFn; 12200 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12201 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12202 OMPRTL___kmpc_doacross_post); 12203 } else { 12204 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12205 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12206 OMPRTL___kmpc_doacross_wait); 12207 } 12208 CGF.EmitRuntimeCall(RTLFn, Args); 12209 } 12210 12211 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12212 llvm::FunctionCallee Callee, 12213 ArrayRef<llvm::Value *> Args) const { 12214 assert(Loc.isValid() && "Outlined function call location must be valid."); 12215 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12216 12217 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12218 if (Fn->doesNotThrow()) { 12219 CGF.EmitNounwindRuntimeCall(Fn, Args); 12220 return; 12221 } 12222 } 12223 CGF.EmitRuntimeCall(Callee, Args); 12224 } 12225 12226 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12227 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12228 ArrayRef<llvm::Value *> Args) const { 12229 emitCall(CGF, Loc, OutlinedFn, Args); 12230 } 12231 12232 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12233 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12234 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12235 HasEmittedDeclareTargetRegion = true; 12236 } 12237 12238 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12239 const VarDecl *NativeParam, 12240 const VarDecl *TargetParam) const { 12241 return CGF.GetAddrOfLocalVar(NativeParam); 12242 } 12243 12244 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12245 const VarDecl *VD) { 12246 if (!VD) 12247 return Address::invalid(); 12248 Address UntiedAddr = Address::invalid(); 12249 Address UntiedRealAddr = Address::invalid(); 12250 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12251 if (It != FunctionToUntiedTaskStackMap.end()) { 12252 const UntiedLocalVarsAddressesMap &UntiedData = 12253 UntiedLocalVarsStack[It->second]; 12254 auto I = UntiedData.find(VD); 12255 if (I != UntiedData.end()) { 12256 UntiedAddr = I->second.first; 12257 UntiedRealAddr = I->second.second; 12258 } 12259 } 12260 const VarDecl *CVD = VD->getCanonicalDecl(); 12261 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12262 // Use the default allocation. 12263 if (!isAllocatableDecl(VD)) 12264 return UntiedAddr; 12265 llvm::Value *Size; 12266 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12267 if (CVD->getType()->isVariablyModifiedType()) { 12268 Size = CGF.getTypeSize(CVD->getType()); 12269 // Align the size: ((size + align - 1) / align) * align 12270 Size = CGF.Builder.CreateNUWAdd( 12271 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12272 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12273 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12274 } else { 12275 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12276 Size = CGM.getSize(Sz.alignTo(Align)); 12277 } 12278 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12279 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12280 assert(AA->getAllocator() && 12281 "Expected allocator expression for non-default allocator."); 12282 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 12283 // According to the standard, the original allocator type is a enum 12284 // (integer). Convert to pointer type, if required. 12285 Allocator = CGF.EmitScalarConversion( 12286 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 12287 AA->getAllocator()->getExprLoc()); 12288 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 12289 12290 llvm::Value *Addr = 12291 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 12292 CGM.getModule(), OMPRTL___kmpc_alloc), 12293 Args, getName({CVD->getName(), ".void.addr"})); 12294 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12295 CGM.getModule(), OMPRTL___kmpc_free); 12296 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12297 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12298 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12299 if (UntiedAddr.isValid()) 12300 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12301 12302 // Cleanup action for allocate support. 12303 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12304 llvm::FunctionCallee RTLFn; 12305 SourceLocation::UIntTy LocEncoding; 12306 Address Addr; 12307 const Expr *Allocator; 12308 12309 public: 12310 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12311 SourceLocation::UIntTy LocEncoding, Address Addr, 12312 const Expr *Allocator) 12313 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12314 Allocator(Allocator) {} 12315 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12316 if (!CGF.HaveInsertPoint()) 12317 return; 12318 llvm::Value *Args[3]; 12319 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12320 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12321 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12322 Addr.getPointer(), CGF.VoidPtrTy); 12323 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 12324 // According to the standard, the original allocator type is a enum 12325 // (integer). Convert to pointer type, if required. 12326 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12327 CGF.getContext().VoidPtrTy, 12328 Allocator->getExprLoc()); 12329 Args[2] = AllocVal; 12330 12331 CGF.EmitRuntimeCall(RTLFn, Args); 12332 } 12333 }; 12334 Address VDAddr = 12335 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12336 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12337 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12338 VDAddr, AA->getAllocator()); 12339 if (UntiedRealAddr.isValid()) 12340 if (auto *Region = 12341 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12342 Region->emitUntiedSwitch(CGF); 12343 return VDAddr; 12344 } 12345 return UntiedAddr; 12346 } 12347 12348 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12349 const VarDecl *VD) const { 12350 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12351 if (It == FunctionToUntiedTaskStackMap.end()) 12352 return false; 12353 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12354 } 12355 12356 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12357 CodeGenModule &CGM, const OMPLoopDirective &S) 12358 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12359 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12360 if (!NeedToPush) 12361 return; 12362 NontemporalDeclsSet &DS = 12363 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12364 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12365 for (const Stmt *Ref : C->private_refs()) { 12366 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12367 const ValueDecl *VD; 12368 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12369 VD = DRE->getDecl(); 12370 } else { 12371 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12372 assert((ME->isImplicitCXXThis() || 12373 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12374 "Expected member of current class."); 12375 VD = ME->getMemberDecl(); 12376 } 12377 DS.insert(VD); 12378 } 12379 } 12380 } 12381 12382 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12383 if (!NeedToPush) 12384 return; 12385 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12386 } 12387 12388 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12389 CodeGenFunction &CGF, 12390 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12391 std::pair<Address, Address>> &LocalVars) 12392 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12393 if (!NeedToPush) 12394 return; 12395 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12396 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12397 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12398 } 12399 12400 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12401 if (!NeedToPush) 12402 return; 12403 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12404 } 12405 12406 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12407 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12408 12409 return llvm::any_of( 12410 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12411 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12412 } 12413 12414 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12415 const OMPExecutableDirective &S, 12416 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12417 const { 12418 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12419 // Vars in target/task regions must be excluded completely. 12420 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12421 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12422 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12423 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12424 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12425 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12426 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12427 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12428 } 12429 } 12430 // Exclude vars in private clauses. 12431 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12432 for (const Expr *Ref : C->varlists()) { 12433 if (!Ref->getType()->isScalarType()) 12434 continue; 12435 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12436 if (!DRE) 12437 continue; 12438 NeedToCheckForLPCs.insert(DRE->getDecl()); 12439 } 12440 } 12441 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12442 for (const Expr *Ref : C->varlists()) { 12443 if (!Ref->getType()->isScalarType()) 12444 continue; 12445 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12446 if (!DRE) 12447 continue; 12448 NeedToCheckForLPCs.insert(DRE->getDecl()); 12449 } 12450 } 12451 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12452 for (const Expr *Ref : C->varlists()) { 12453 if (!Ref->getType()->isScalarType()) 12454 continue; 12455 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12456 if (!DRE) 12457 continue; 12458 NeedToCheckForLPCs.insert(DRE->getDecl()); 12459 } 12460 } 12461 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12462 for (const Expr *Ref : C->varlists()) { 12463 if (!Ref->getType()->isScalarType()) 12464 continue; 12465 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12466 if (!DRE) 12467 continue; 12468 NeedToCheckForLPCs.insert(DRE->getDecl()); 12469 } 12470 } 12471 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12472 for (const Expr *Ref : C->varlists()) { 12473 if (!Ref->getType()->isScalarType()) 12474 continue; 12475 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12476 if (!DRE) 12477 continue; 12478 NeedToCheckForLPCs.insert(DRE->getDecl()); 12479 } 12480 } 12481 for (const Decl *VD : NeedToCheckForLPCs) { 12482 for (const LastprivateConditionalData &Data : 12483 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12484 if (Data.DeclToUniqueName.count(VD) > 0) { 12485 if (!Data.Disabled) 12486 NeedToAddForLPCsAsDisabled.insert(VD); 12487 break; 12488 } 12489 } 12490 } 12491 } 12492 12493 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12494 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12495 : CGM(CGF.CGM), 12496 Action((CGM.getLangOpts().OpenMP >= 50 && 12497 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12498 [](const OMPLastprivateClause *C) { 12499 return C->getKind() == 12500 OMPC_LASTPRIVATE_conditional; 12501 })) 12502 ? ActionToDo::PushAsLastprivateConditional 12503 : ActionToDo::DoNotPush) { 12504 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12505 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12506 return; 12507 assert(Action == ActionToDo::PushAsLastprivateConditional && 12508 "Expected a push action."); 12509 LastprivateConditionalData &Data = 12510 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12511 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12512 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12513 continue; 12514 12515 for (const Expr *Ref : C->varlists()) { 12516 Data.DeclToUniqueName.insert(std::make_pair( 12517 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12518 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12519 } 12520 } 12521 Data.IVLVal = IVLVal; 12522 Data.Fn = CGF.CurFn; 12523 } 12524 12525 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12526 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12527 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12528 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12529 if (CGM.getLangOpts().OpenMP < 50) 12530 return; 12531 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12532 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12533 if (!NeedToAddForLPCsAsDisabled.empty()) { 12534 Action = ActionToDo::DisableLastprivateConditional; 12535 LastprivateConditionalData &Data = 12536 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12537 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12538 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12539 Data.Fn = CGF.CurFn; 12540 Data.Disabled = true; 12541 } 12542 } 12543 12544 CGOpenMPRuntime::LastprivateConditionalRAII 12545 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12546 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12547 return LastprivateConditionalRAII(CGF, S); 12548 } 12549 12550 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12551 if (CGM.getLangOpts().OpenMP < 50) 12552 return; 12553 if (Action == ActionToDo::DisableLastprivateConditional) { 12554 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12555 "Expected list of disabled private vars."); 12556 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12557 } 12558 if (Action == ActionToDo::PushAsLastprivateConditional) { 12559 assert( 12560 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12561 "Expected list of lastprivate conditional vars."); 12562 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12563 } 12564 } 12565 12566 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12567 const VarDecl *VD) { 12568 ASTContext &C = CGM.getContext(); 12569 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12570 if (I == LastprivateConditionalToTypes.end()) 12571 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12572 QualType NewType; 12573 const FieldDecl *VDField; 12574 const FieldDecl *FiredField; 12575 LValue BaseLVal; 12576 auto VI = I->getSecond().find(VD); 12577 if (VI == I->getSecond().end()) { 12578 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12579 RD->startDefinition(); 12580 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12581 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12582 RD->completeDefinition(); 12583 NewType = C.getRecordType(RD); 12584 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12585 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12586 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12587 } else { 12588 NewType = std::get<0>(VI->getSecond()); 12589 VDField = std::get<1>(VI->getSecond()); 12590 FiredField = std::get<2>(VI->getSecond()); 12591 BaseLVal = std::get<3>(VI->getSecond()); 12592 } 12593 LValue FiredLVal = 12594 CGF.EmitLValueForField(BaseLVal, FiredField); 12595 CGF.EmitStoreOfScalar( 12596 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12597 FiredLVal); 12598 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12599 } 12600 12601 namespace { 12602 /// Checks if the lastprivate conditional variable is referenced in LHS. 12603 class LastprivateConditionalRefChecker final 12604 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12605 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12606 const Expr *FoundE = nullptr; 12607 const Decl *FoundD = nullptr; 12608 StringRef UniqueDeclName; 12609 LValue IVLVal; 12610 llvm::Function *FoundFn = nullptr; 12611 SourceLocation Loc; 12612 12613 public: 12614 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12615 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12616 llvm::reverse(LPM)) { 12617 auto It = D.DeclToUniqueName.find(E->getDecl()); 12618 if (It == D.DeclToUniqueName.end()) 12619 continue; 12620 if (D.Disabled) 12621 return false; 12622 FoundE = E; 12623 FoundD = E->getDecl()->getCanonicalDecl(); 12624 UniqueDeclName = It->second; 12625 IVLVal = D.IVLVal; 12626 FoundFn = D.Fn; 12627 break; 12628 } 12629 return FoundE == E; 12630 } 12631 bool VisitMemberExpr(const MemberExpr *E) { 12632 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12633 return false; 12634 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12635 llvm::reverse(LPM)) { 12636 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12637 if (It == D.DeclToUniqueName.end()) 12638 continue; 12639 if (D.Disabled) 12640 return false; 12641 FoundE = E; 12642 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12643 UniqueDeclName = It->second; 12644 IVLVal = D.IVLVal; 12645 FoundFn = D.Fn; 12646 break; 12647 } 12648 return FoundE == E; 12649 } 12650 bool VisitStmt(const Stmt *S) { 12651 for (const Stmt *Child : S->children()) { 12652 if (!Child) 12653 continue; 12654 if (const auto *E = dyn_cast<Expr>(Child)) 12655 if (!E->isGLValue()) 12656 continue; 12657 if (Visit(Child)) 12658 return true; 12659 } 12660 return false; 12661 } 12662 explicit LastprivateConditionalRefChecker( 12663 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12664 : LPM(LPM) {} 12665 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12666 getFoundData() const { 12667 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12668 } 12669 }; 12670 } // namespace 12671 12672 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12673 LValue IVLVal, 12674 StringRef UniqueDeclName, 12675 LValue LVal, 12676 SourceLocation Loc) { 12677 // Last updated loop counter for the lastprivate conditional var. 12678 // int<xx> last_iv = 0; 12679 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12680 llvm::Constant *LastIV = 12681 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12682 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12683 IVLVal.getAlignment().getAsAlign()); 12684 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12685 12686 // Last value of the lastprivate conditional. 12687 // decltype(priv_a) last_a; 12688 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12689 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12690 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12691 LValue LastLVal = CGF.MakeAddrLValue( 12692 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12693 12694 // Global loop counter. Required to handle inner parallel-for regions. 12695 // iv 12696 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12697 12698 // #pragma omp critical(a) 12699 // if (last_iv <= iv) { 12700 // last_iv = iv; 12701 // last_a = priv_a; 12702 // } 12703 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12704 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12705 Action.Enter(CGF); 12706 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12707 // (last_iv <= iv) ? Check if the variable is updated and store new 12708 // value in global var. 12709 llvm::Value *CmpRes; 12710 if (IVLVal.getType()->isSignedIntegerType()) { 12711 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12712 } else { 12713 assert(IVLVal.getType()->isUnsignedIntegerType() && 12714 "Loop iteration variable must be integer."); 12715 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12716 } 12717 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12718 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12719 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12720 // { 12721 CGF.EmitBlock(ThenBB); 12722 12723 // last_iv = iv; 12724 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12725 12726 // last_a = priv_a; 12727 switch (CGF.getEvaluationKind(LVal.getType())) { 12728 case TEK_Scalar: { 12729 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12730 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12731 break; 12732 } 12733 case TEK_Complex: { 12734 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12735 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12736 break; 12737 } 12738 case TEK_Aggregate: 12739 llvm_unreachable( 12740 "Aggregates are not supported in lastprivate conditional."); 12741 } 12742 // } 12743 CGF.EmitBranch(ExitBB); 12744 // There is no need to emit line number for unconditional branch. 12745 (void)ApplyDebugLocation::CreateEmpty(CGF); 12746 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12747 }; 12748 12749 if (CGM.getLangOpts().OpenMPSimd) { 12750 // Do not emit as a critical region as no parallel region could be emitted. 12751 RegionCodeGenTy ThenRCG(CodeGen); 12752 ThenRCG(CGF); 12753 } else { 12754 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12755 } 12756 } 12757 12758 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12759 const Expr *LHS) { 12760 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12761 return; 12762 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12763 if (!Checker.Visit(LHS)) 12764 return; 12765 const Expr *FoundE; 12766 const Decl *FoundD; 12767 StringRef UniqueDeclName; 12768 LValue IVLVal; 12769 llvm::Function *FoundFn; 12770 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12771 Checker.getFoundData(); 12772 if (FoundFn != CGF.CurFn) { 12773 // Special codegen for inner parallel regions. 12774 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12775 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12776 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12777 "Lastprivate conditional is not found in outer region."); 12778 QualType StructTy = std::get<0>(It->getSecond()); 12779 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12780 LValue PrivLVal = CGF.EmitLValue(FoundE); 12781 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12782 PrivLVal.getAddress(CGF), 12783 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12784 LValue BaseLVal = 12785 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12786 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12787 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12788 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12789 FiredLVal, llvm::AtomicOrdering::Unordered, 12790 /*IsVolatile=*/true, /*isInit=*/false); 12791 return; 12792 } 12793 12794 // Private address of the lastprivate conditional in the current context. 12795 // priv_a 12796 LValue LVal = CGF.EmitLValue(FoundE); 12797 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12798 FoundE->getExprLoc()); 12799 } 12800 12801 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12802 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12803 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12804 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12805 return; 12806 auto Range = llvm::reverse(LastprivateConditionalStack); 12807 auto It = llvm::find_if( 12808 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12809 if (It == Range.end() || It->Fn != CGF.CurFn) 12810 return; 12811 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12812 assert(LPCI != LastprivateConditionalToTypes.end() && 12813 "Lastprivates must be registered already."); 12814 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12815 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12816 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12817 for (const auto &Pair : It->DeclToUniqueName) { 12818 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12819 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12820 continue; 12821 auto I = LPCI->getSecond().find(Pair.first); 12822 assert(I != LPCI->getSecond().end() && 12823 "Lastprivate must be rehistered already."); 12824 // bool Cmp = priv_a.Fired != 0; 12825 LValue BaseLVal = std::get<3>(I->getSecond()); 12826 LValue FiredLVal = 12827 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12828 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12829 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12830 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12831 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12832 // if (Cmp) { 12833 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12834 CGF.EmitBlock(ThenBB); 12835 Address Addr = CGF.GetAddrOfLocalVar(VD); 12836 LValue LVal; 12837 if (VD->getType()->isReferenceType()) 12838 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12839 AlignmentSource::Decl); 12840 else 12841 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12842 AlignmentSource::Decl); 12843 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12844 D.getBeginLoc()); 12845 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12846 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12847 // } 12848 } 12849 } 12850 12851 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12852 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12853 SourceLocation Loc) { 12854 if (CGF.getLangOpts().OpenMP < 50) 12855 return; 12856 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12857 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12858 "Unknown lastprivate conditional variable."); 12859 StringRef UniqueName = It->second; 12860 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12861 // The variable was not updated in the region - exit. 12862 if (!GV) 12863 return; 12864 LValue LPLVal = CGF.MakeAddrLValue( 12865 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12866 PrivLVal.getType().getNonReferenceType()); 12867 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12868 CGF.EmitStoreOfScalar(Res, PrivLVal); 12869 } 12870 12871 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12872 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12873 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12874 llvm_unreachable("Not supported in SIMD-only mode"); 12875 } 12876 12877 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12878 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12879 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12880 llvm_unreachable("Not supported in SIMD-only mode"); 12881 } 12882 12883 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12884 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12885 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12886 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12887 bool Tied, unsigned &NumberOfParts) { 12888 llvm_unreachable("Not supported in SIMD-only mode"); 12889 } 12890 12891 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12892 SourceLocation Loc, 12893 llvm::Function *OutlinedFn, 12894 ArrayRef<llvm::Value *> CapturedVars, 12895 const Expr *IfCond, 12896 llvm::Value *NumThreads) { 12897 llvm_unreachable("Not supported in SIMD-only mode"); 12898 } 12899 12900 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12901 CodeGenFunction &CGF, StringRef CriticalName, 12902 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12903 const Expr *Hint) { 12904 llvm_unreachable("Not supported in SIMD-only mode"); 12905 } 12906 12907 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12908 const RegionCodeGenTy &MasterOpGen, 12909 SourceLocation Loc) { 12910 llvm_unreachable("Not supported in SIMD-only mode"); 12911 } 12912 12913 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12914 const RegionCodeGenTy &MasterOpGen, 12915 SourceLocation Loc, 12916 const Expr *Filter) { 12917 llvm_unreachable("Not supported in SIMD-only mode"); 12918 } 12919 12920 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12921 SourceLocation Loc) { 12922 llvm_unreachable("Not supported in SIMD-only mode"); 12923 } 12924 12925 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12926 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12927 SourceLocation Loc) { 12928 llvm_unreachable("Not supported in SIMD-only mode"); 12929 } 12930 12931 void CGOpenMPSIMDRuntime::emitSingleRegion( 12932 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12933 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12934 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12935 ArrayRef<const Expr *> AssignmentOps) { 12936 llvm_unreachable("Not supported in SIMD-only mode"); 12937 } 12938 12939 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12940 const RegionCodeGenTy &OrderedOpGen, 12941 SourceLocation Loc, 12942 bool IsThreads) { 12943 llvm_unreachable("Not supported in SIMD-only mode"); 12944 } 12945 12946 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12947 SourceLocation Loc, 12948 OpenMPDirectiveKind Kind, 12949 bool EmitChecks, 12950 bool ForceSimpleCall) { 12951 llvm_unreachable("Not supported in SIMD-only mode"); 12952 } 12953 12954 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12955 CodeGenFunction &CGF, SourceLocation Loc, 12956 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12957 bool Ordered, const DispatchRTInput &DispatchValues) { 12958 llvm_unreachable("Not supported in SIMD-only mode"); 12959 } 12960 12961 void CGOpenMPSIMDRuntime::emitForStaticInit( 12962 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12963 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12964 llvm_unreachable("Not supported in SIMD-only mode"); 12965 } 12966 12967 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12968 CodeGenFunction &CGF, SourceLocation Loc, 12969 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12970 llvm_unreachable("Not supported in SIMD-only mode"); 12971 } 12972 12973 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12974 SourceLocation Loc, 12975 unsigned IVSize, 12976 bool IVSigned) { 12977 llvm_unreachable("Not supported in SIMD-only mode"); 12978 } 12979 12980 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12981 SourceLocation Loc, 12982 OpenMPDirectiveKind DKind) { 12983 llvm_unreachable("Not supported in SIMD-only mode"); 12984 } 12985 12986 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12987 SourceLocation Loc, 12988 unsigned IVSize, bool IVSigned, 12989 Address IL, Address LB, 12990 Address UB, Address ST) { 12991 llvm_unreachable("Not supported in SIMD-only mode"); 12992 } 12993 12994 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12995 llvm::Value *NumThreads, 12996 SourceLocation Loc) { 12997 llvm_unreachable("Not supported in SIMD-only mode"); 12998 } 12999 13000 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 13001 ProcBindKind ProcBind, 13002 SourceLocation Loc) { 13003 llvm_unreachable("Not supported in SIMD-only mode"); 13004 } 13005 13006 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 13007 const VarDecl *VD, 13008 Address VDAddr, 13009 SourceLocation Loc) { 13010 llvm_unreachable("Not supported in SIMD-only mode"); 13011 } 13012 13013 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 13014 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 13015 CodeGenFunction *CGF) { 13016 llvm_unreachable("Not supported in SIMD-only mode"); 13017 } 13018 13019 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 13020 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 13021 llvm_unreachable("Not supported in SIMD-only mode"); 13022 } 13023 13024 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13025 ArrayRef<const Expr *> Vars, 13026 SourceLocation Loc, 13027 llvm::AtomicOrdering AO) { 13028 llvm_unreachable("Not supported in SIMD-only mode"); 13029 } 13030 13031 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13032 const OMPExecutableDirective &D, 13033 llvm::Function *TaskFunction, 13034 QualType SharedsTy, Address Shareds, 13035 const Expr *IfCond, 13036 const OMPTaskDataTy &Data) { 13037 llvm_unreachable("Not supported in SIMD-only mode"); 13038 } 13039 13040 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13041 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13042 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13043 const Expr *IfCond, const OMPTaskDataTy &Data) { 13044 llvm_unreachable("Not supported in SIMD-only mode"); 13045 } 13046 13047 void CGOpenMPSIMDRuntime::emitReduction( 13048 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13049 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13050 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13051 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13052 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13053 ReductionOps, Options); 13054 } 13055 13056 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13057 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13058 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13059 llvm_unreachable("Not supported in SIMD-only mode"); 13060 } 13061 13062 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13063 SourceLocation Loc, 13064 bool IsWorksharingReduction) { 13065 llvm_unreachable("Not supported in SIMD-only mode"); 13066 } 13067 13068 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13069 SourceLocation Loc, 13070 ReductionCodeGen &RCG, 13071 unsigned N) { 13072 llvm_unreachable("Not supported in SIMD-only mode"); 13073 } 13074 13075 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13076 SourceLocation Loc, 13077 llvm::Value *ReductionsPtr, 13078 LValue SharedLVal) { 13079 llvm_unreachable("Not supported in SIMD-only mode"); 13080 } 13081 13082 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13083 SourceLocation Loc, 13084 const OMPTaskDataTy &Data) { 13085 llvm_unreachable("Not supported in SIMD-only mode"); 13086 } 13087 13088 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13089 CodeGenFunction &CGF, SourceLocation Loc, 13090 OpenMPDirectiveKind CancelRegion) { 13091 llvm_unreachable("Not supported in SIMD-only mode"); 13092 } 13093 13094 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13095 SourceLocation Loc, const Expr *IfCond, 13096 OpenMPDirectiveKind CancelRegion) { 13097 llvm_unreachable("Not supported in SIMD-only mode"); 13098 } 13099 13100 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13101 const OMPExecutableDirective &D, StringRef ParentName, 13102 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13103 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13104 llvm_unreachable("Not supported in SIMD-only mode"); 13105 } 13106 13107 void CGOpenMPSIMDRuntime::emitTargetCall( 13108 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13109 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13110 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13111 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13112 const OMPLoopDirective &D)> 13113 SizeEmitter) { 13114 llvm_unreachable("Not supported in SIMD-only mode"); 13115 } 13116 13117 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13118 llvm_unreachable("Not supported in SIMD-only mode"); 13119 } 13120 13121 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13122 llvm_unreachable("Not supported in SIMD-only mode"); 13123 } 13124 13125 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13126 return false; 13127 } 13128 13129 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13130 const OMPExecutableDirective &D, 13131 SourceLocation Loc, 13132 llvm::Function *OutlinedFn, 13133 ArrayRef<llvm::Value *> CapturedVars) { 13134 llvm_unreachable("Not supported in SIMD-only mode"); 13135 } 13136 13137 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13138 const Expr *NumTeams, 13139 const Expr *ThreadLimit, 13140 SourceLocation Loc) { 13141 llvm_unreachable("Not supported in SIMD-only mode"); 13142 } 13143 13144 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13145 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13146 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13147 llvm_unreachable("Not supported in SIMD-only mode"); 13148 } 13149 13150 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13151 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13152 const Expr *Device) { 13153 llvm_unreachable("Not supported in SIMD-only mode"); 13154 } 13155 13156 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13157 const OMPLoopDirective &D, 13158 ArrayRef<Expr *> NumIterations) { 13159 llvm_unreachable("Not supported in SIMD-only mode"); 13160 } 13161 13162 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13163 const OMPDependClause *C) { 13164 llvm_unreachable("Not supported in SIMD-only mode"); 13165 } 13166 13167 const VarDecl * 13168 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13169 const VarDecl *NativeParam) const { 13170 llvm_unreachable("Not supported in SIMD-only mode"); 13171 } 13172 13173 Address 13174 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13175 const VarDecl *NativeParam, 13176 const VarDecl *TargetParam) const { 13177 llvm_unreachable("Not supported in SIMD-only mode"); 13178 } 13179