1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/SmallBitVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/Bitcode/BitcodeReader.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/GlobalValue.h" 38 #include "llvm/IR/Value.h" 39 #include "llvm/Support/AtomicOrdering.h" 40 #include "llvm/Support/Format.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <cassert> 43 #include <numeric> 44 45 using namespace clang; 46 using namespace CodeGen; 47 using namespace llvm::omp; 48 49 namespace { 50 /// Base class for handling code generation inside OpenMP regions. 51 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 52 public: 53 /// Kinds of OpenMP regions used in codegen. 54 enum CGOpenMPRegionKind { 55 /// Region with outlined function for standalone 'parallel' 56 /// directive. 57 ParallelOutlinedRegion, 58 /// Region with outlined function for standalone 'task' directive. 59 TaskOutlinedRegion, 60 /// Region for constructs that do not require function outlining, 61 /// like 'for', 'sections', 'atomic' etc. directives. 62 InlinedRegion, 63 /// Region with outlined function for standalone 'target' directive. 64 TargetRegion, 65 }; 66 67 CGOpenMPRegionInfo(const CapturedStmt &CS, 68 const CGOpenMPRegionKind RegionKind, 69 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 70 bool HasCancel) 71 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 72 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 73 74 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 75 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 76 bool HasCancel) 77 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 78 Kind(Kind), HasCancel(HasCancel) {} 79 80 /// Get a variable or parameter for storing global thread id 81 /// inside OpenMP construct. 82 virtual const VarDecl *getThreadIDVariable() const = 0; 83 84 /// Emit the captured statement body. 85 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 86 87 /// Get an LValue for the current ThreadID variable. 88 /// \return LValue for thread id variable. This LValue always has type int32*. 89 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 90 91 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 92 93 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 94 95 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 96 97 bool hasCancel() const { return HasCancel; } 98 99 static bool classof(const CGCapturedStmtInfo *Info) { 100 return Info->getKind() == CR_OpenMP; 101 } 102 103 ~CGOpenMPRegionInfo() override = default; 104 105 protected: 106 CGOpenMPRegionKind RegionKind; 107 RegionCodeGenTy CodeGen; 108 OpenMPDirectiveKind Kind; 109 bool HasCancel; 110 }; 111 112 /// API for captured statement code generation in OpenMP constructs. 113 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 114 public: 115 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 116 const RegionCodeGenTy &CodeGen, 117 OpenMPDirectiveKind Kind, bool HasCancel, 118 StringRef HelperName) 119 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 120 HasCancel), 121 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 122 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 123 } 124 125 /// Get a variable or parameter for storing global thread id 126 /// inside OpenMP construct. 127 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 128 129 /// Get the name of the capture helper. 130 StringRef getHelperName() const override { return HelperName; } 131 132 static bool classof(const CGCapturedStmtInfo *Info) { 133 return CGOpenMPRegionInfo::classof(Info) && 134 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 135 ParallelOutlinedRegion; 136 } 137 138 private: 139 /// A variable or parameter storing global thread id for OpenMP 140 /// constructs. 141 const VarDecl *ThreadIDVar; 142 StringRef HelperName; 143 }; 144 145 /// API for captured statement code generation in OpenMP constructs. 146 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 147 public: 148 class UntiedTaskActionTy final : public PrePostActionTy { 149 bool Untied; 150 const VarDecl *PartIDVar; 151 const RegionCodeGenTy UntiedCodeGen; 152 llvm::SwitchInst *UntiedSwitch = nullptr; 153 154 public: 155 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 156 const RegionCodeGenTy &UntiedCodeGen) 157 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 158 void Enter(CodeGenFunction &CGF) override { 159 if (Untied) { 160 // Emit task switching point. 161 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 162 CGF.GetAddrOfLocalVar(PartIDVar), 163 PartIDVar->getType()->castAs<PointerType>()); 164 llvm::Value *Res = 165 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 166 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 167 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 168 CGF.EmitBlock(DoneBB); 169 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 170 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 171 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 172 CGF.Builder.GetInsertBlock()); 173 emitUntiedSwitch(CGF); 174 } 175 } 176 void emitUntiedSwitch(CodeGenFunction &CGF) const { 177 if (Untied) { 178 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 179 CGF.GetAddrOfLocalVar(PartIDVar), 180 PartIDVar->getType()->castAs<PointerType>()); 181 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 182 PartIdLVal); 183 UntiedCodeGen(CGF); 184 CodeGenFunction::JumpDest CurPoint = 185 CGF.getJumpDestInCurrentScope(".untied.next."); 186 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 187 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 188 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 189 CGF.Builder.GetInsertBlock()); 190 CGF.EmitBranchThroughCleanup(CurPoint); 191 CGF.EmitBlock(CurPoint.getBlock()); 192 } 193 } 194 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 195 }; 196 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 197 const VarDecl *ThreadIDVar, 198 const RegionCodeGenTy &CodeGen, 199 OpenMPDirectiveKind Kind, bool HasCancel, 200 const UntiedTaskActionTy &Action) 201 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 202 ThreadIDVar(ThreadIDVar), Action(Action) { 203 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 204 } 205 206 /// Get a variable or parameter for storing global thread id 207 /// inside OpenMP construct. 208 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 209 210 /// Get an LValue for the current ThreadID variable. 211 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 212 213 /// Get the name of the capture helper. 214 StringRef getHelperName() const override { return ".omp_outlined."; } 215 216 void emitUntiedSwitch(CodeGenFunction &CGF) override { 217 Action.emitUntiedSwitch(CGF); 218 } 219 220 static bool classof(const CGCapturedStmtInfo *Info) { 221 return CGOpenMPRegionInfo::classof(Info) && 222 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 223 TaskOutlinedRegion; 224 } 225 226 private: 227 /// A variable or parameter storing global thread id for OpenMP 228 /// constructs. 229 const VarDecl *ThreadIDVar; 230 /// Action for emitting code for untied tasks. 231 const UntiedTaskActionTy &Action; 232 }; 233 234 /// API for inlined captured statement code generation in OpenMP 235 /// constructs. 236 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 237 public: 238 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 239 const RegionCodeGenTy &CodeGen, 240 OpenMPDirectiveKind Kind, bool HasCancel) 241 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 242 OldCSI(OldCSI), 243 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 244 245 // Retrieve the value of the context parameter. 246 llvm::Value *getContextValue() const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->getContextValue(); 249 llvm_unreachable("No context value for inlined OpenMP region"); 250 } 251 252 void setContextValue(llvm::Value *V) override { 253 if (OuterRegionInfo) { 254 OuterRegionInfo->setContextValue(V); 255 return; 256 } 257 llvm_unreachable("No context value for inlined OpenMP region"); 258 } 259 260 /// Lookup the captured field decl for a variable. 261 const FieldDecl *lookup(const VarDecl *VD) const override { 262 if (OuterRegionInfo) 263 return OuterRegionInfo->lookup(VD); 264 // If there is no outer outlined region,no need to lookup in a list of 265 // captured variables, we can use the original one. 266 return nullptr; 267 } 268 269 FieldDecl *getThisFieldDecl() const override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThisFieldDecl(); 272 return nullptr; 273 } 274 275 /// Get a variable or parameter for storing global thread id 276 /// inside OpenMP construct. 277 const VarDecl *getThreadIDVariable() const override { 278 if (OuterRegionInfo) 279 return OuterRegionInfo->getThreadIDVariable(); 280 return nullptr; 281 } 282 283 /// Get an LValue for the current ThreadID variable. 284 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 285 if (OuterRegionInfo) 286 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 287 llvm_unreachable("No LValue for inlined OpenMP construct"); 288 } 289 290 /// Get the name of the capture helper. 291 StringRef getHelperName() const override { 292 if (auto *OuterRegionInfo = getOldCSI()) 293 return OuterRegionInfo->getHelperName(); 294 llvm_unreachable("No helper name for inlined OpenMP construct"); 295 } 296 297 void emitUntiedSwitch(CodeGenFunction &CGF) override { 298 if (OuterRegionInfo) 299 OuterRegionInfo->emitUntiedSwitch(CGF); 300 } 301 302 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 303 304 static bool classof(const CGCapturedStmtInfo *Info) { 305 return CGOpenMPRegionInfo::classof(Info) && 306 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 307 } 308 309 ~CGOpenMPInlinedRegionInfo() override = default; 310 311 private: 312 /// CodeGen info about outer OpenMP region. 313 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 314 CGOpenMPRegionInfo *OuterRegionInfo; 315 }; 316 317 /// API for captured statement code generation in OpenMP target 318 /// constructs. For this captures, implicit parameters are used instead of the 319 /// captured fields. The name of the target region has to be unique in a given 320 /// application so it is provided by the client, because only the client has 321 /// the information to generate that. 322 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 323 public: 324 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 325 const RegionCodeGenTy &CodeGen, StringRef HelperName) 326 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 327 /*HasCancel=*/false), 328 HelperName(HelperName) {} 329 330 /// This is unused for target regions because each starts executing 331 /// with a single thread. 332 const VarDecl *getThreadIDVariable() const override { return nullptr; } 333 334 /// Get the name of the capture helper. 335 StringRef getHelperName() const override { return HelperName; } 336 337 static bool classof(const CGCapturedStmtInfo *Info) { 338 return CGOpenMPRegionInfo::classof(Info) && 339 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 340 } 341 342 private: 343 StringRef HelperName; 344 }; 345 346 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 347 llvm_unreachable("No codegen for expressions"); 348 } 349 /// API for generation of expressions captured in a innermost OpenMP 350 /// region. 351 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 352 public: 353 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 354 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 355 OMPD_unknown, 356 /*HasCancel=*/false), 357 PrivScope(CGF) { 358 // Make sure the globals captured in the provided statement are local by 359 // using the privatization logic. We assume the same variable is not 360 // captured more than once. 361 for (const auto &C : CS.captures()) { 362 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 363 continue; 364 365 const VarDecl *VD = C.getCapturedVar(); 366 if (VD->isLocalVarDeclOrParm()) 367 continue; 368 369 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 370 /*RefersToEnclosingVariableOrCapture=*/false, 371 VD->getType().getNonReferenceType(), VK_LValue, 372 C.getLocation()); 373 PrivScope.addPrivate( 374 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 375 } 376 (void)PrivScope.Privatize(); 377 } 378 379 /// Lookup the captured field decl for a variable. 380 const FieldDecl *lookup(const VarDecl *VD) const override { 381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 382 return FD; 383 return nullptr; 384 } 385 386 /// Emit the captured statement body. 387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 388 llvm_unreachable("No body for expressions"); 389 } 390 391 /// Get a variable or parameter for storing global thread id 392 /// inside OpenMP construct. 393 const VarDecl *getThreadIDVariable() const override { 394 llvm_unreachable("No thread id for expressions"); 395 } 396 397 /// Get the name of the capture helper. 398 StringRef getHelperName() const override { 399 llvm_unreachable("No helper name for expressions"); 400 } 401 402 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 403 404 private: 405 /// Private scope to capture global variables. 406 CodeGenFunction::OMPPrivateScope PrivScope; 407 }; 408 409 /// RAII for emitting code of OpenMP constructs. 410 class InlinedOpenMPRegionRAII { 411 CodeGenFunction &CGF; 412 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 413 FieldDecl *LambdaThisCaptureField = nullptr; 414 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 415 bool NoInheritance = false; 416 417 public: 418 /// Constructs region for combined constructs. 419 /// \param CodeGen Code generation sequence for combined directives. Includes 420 /// a list of functions used for code generation of implicitly inlined 421 /// regions. 422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 423 OpenMPDirectiveKind Kind, bool HasCancel, 424 bool NoInheritance = true) 425 : CGF(CGF), NoInheritance(NoInheritance) { 426 // Start emission for the construct. 427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 429 if (NoInheritance) { 430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 431 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 432 CGF.LambdaThisCaptureField = nullptr; 433 BlockInfo = CGF.BlockInfo; 434 CGF.BlockInfo = nullptr; 435 } 436 } 437 438 ~InlinedOpenMPRegionRAII() { 439 // Restore original CapturedStmtInfo only if we're done with code emission. 440 auto *OldCSI = 441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 442 delete CGF.CapturedStmtInfo; 443 CGF.CapturedStmtInfo = OldCSI; 444 if (NoInheritance) { 445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 446 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 447 CGF.BlockInfo = BlockInfo; 448 } 449 } 450 }; 451 452 /// Values for bit flags used in the ident_t to describe the fields. 453 /// All enumeric elements are named and described in accordance with the code 454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 455 enum OpenMPLocationFlags : unsigned { 456 /// Use trampoline for internal microtask. 457 OMP_IDENT_IMD = 0x01, 458 /// Use c-style ident structure. 459 OMP_IDENT_KMPC = 0x02, 460 /// Atomic reduction option for kmpc_reduce. 461 OMP_ATOMIC_REDUCE = 0x10, 462 /// Explicit 'barrier' directive. 463 OMP_IDENT_BARRIER_EXPL = 0x20, 464 /// Implicit barrier in code. 465 OMP_IDENT_BARRIER_IMPL = 0x40, 466 /// Implicit barrier in 'for' directive. 467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 468 /// Implicit barrier in 'sections' directive. 469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 470 /// Implicit barrier in 'single' directive. 471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 472 /// Call of __kmp_for_static_init for static loop. 473 OMP_IDENT_WORK_LOOP = 0x200, 474 /// Call of __kmp_for_static_init for sections. 475 OMP_IDENT_WORK_SECTIONS = 0x400, 476 /// Call of __kmp_for_static_init for distribute. 477 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 479 }; 480 481 namespace { 482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 483 /// Values for bit flags for marking which requires clauses have been used. 484 enum OpenMPOffloadingRequiresDirFlags : int64_t { 485 /// flag undefined. 486 OMP_REQ_UNDEFINED = 0x000, 487 /// no requires clause present. 488 OMP_REQ_NONE = 0x001, 489 /// reverse_offload clause. 490 OMP_REQ_REVERSE_OFFLOAD = 0x002, 491 /// unified_address clause. 492 OMP_REQ_UNIFIED_ADDRESS = 0x004, 493 /// unified_shared_memory clause. 494 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 495 /// dynamic_allocators clause. 496 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 497 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 498 }; 499 500 enum OpenMPOffloadingReservedDeviceIDs { 501 /// Device ID if the device was not defined, runtime should get it 502 /// from environment variables in the spec. 503 OMP_DEVICEID_UNDEF = -1, 504 }; 505 } // anonymous namespace 506 507 /// Describes ident structure that describes a source location. 508 /// All descriptions are taken from 509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 510 /// Original structure: 511 /// typedef struct ident { 512 /// kmp_int32 reserved_1; /**< might be used in Fortran; 513 /// see above */ 514 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 515 /// KMP_IDENT_KMPC identifies this union 516 /// member */ 517 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 518 /// see above */ 519 ///#if USE_ITT_BUILD 520 /// /* but currently used for storing 521 /// region-specific ITT */ 522 /// /* contextual information. */ 523 ///#endif /* USE_ITT_BUILD */ 524 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 525 /// C++ */ 526 /// char const *psource; /**< String describing the source location. 527 /// The string is composed of semi-colon separated 528 // fields which describe the source file, 529 /// the function and a pair of line numbers that 530 /// delimit the construct. 531 /// */ 532 /// } ident_t; 533 enum IdentFieldIndex { 534 /// might be used in Fortran 535 IdentField_Reserved_1, 536 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 537 IdentField_Flags, 538 /// Not really used in Fortran any more 539 IdentField_Reserved_2, 540 /// Source[4] in Fortran, do not use for C++ 541 IdentField_Reserved_3, 542 /// String describing the source location. The string is composed of 543 /// semi-colon separated fields which describe the source file, the function 544 /// and a pair of line numbers that delimit the construct. 545 IdentField_PSource 546 }; 547 548 /// Schedule types for 'omp for' loops (these enumerators are taken from 549 /// the enum sched_type in kmp.h). 550 enum OpenMPSchedType { 551 /// Lower bound for default (unordered) versions. 552 OMP_sch_lower = 32, 553 OMP_sch_static_chunked = 33, 554 OMP_sch_static = 34, 555 OMP_sch_dynamic_chunked = 35, 556 OMP_sch_guided_chunked = 36, 557 OMP_sch_runtime = 37, 558 OMP_sch_auto = 38, 559 /// static with chunk adjustment (e.g., simd) 560 OMP_sch_static_balanced_chunked = 45, 561 /// Lower bound for 'ordered' versions. 562 OMP_ord_lower = 64, 563 OMP_ord_static_chunked = 65, 564 OMP_ord_static = 66, 565 OMP_ord_dynamic_chunked = 67, 566 OMP_ord_guided_chunked = 68, 567 OMP_ord_runtime = 69, 568 OMP_ord_auto = 70, 569 OMP_sch_default = OMP_sch_static, 570 /// dist_schedule types 571 OMP_dist_sch_static_chunked = 91, 572 OMP_dist_sch_static = 92, 573 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 574 /// Set if the monotonic schedule modifier was present. 575 OMP_sch_modifier_monotonic = (1 << 29), 576 /// Set if the nonmonotonic schedule modifier was present. 577 OMP_sch_modifier_nonmonotonic = (1 << 30), 578 }; 579 580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 581 /// region. 582 class CleanupTy final : public EHScopeStack::Cleanup { 583 PrePostActionTy *Action; 584 585 public: 586 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 587 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 588 if (!CGF.HaveInsertPoint()) 589 return; 590 Action->Exit(CGF); 591 } 592 }; 593 594 } // anonymous namespace 595 596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 597 CodeGenFunction::RunCleanupsScope Scope(CGF); 598 if (PrePostAction) { 599 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 600 Callback(CodeGen, CGF, *PrePostAction); 601 } else { 602 PrePostActionTy Action; 603 Callback(CodeGen, CGF, Action); 604 } 605 } 606 607 /// Check if the combiner is a call to UDR combiner and if it is so return the 608 /// UDR decl used for reduction. 609 static const OMPDeclareReductionDecl * 610 getReductionInit(const Expr *ReductionOp) { 611 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 612 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 613 if (const auto *DRE = 614 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 615 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 616 return DRD; 617 return nullptr; 618 } 619 620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 621 const OMPDeclareReductionDecl *DRD, 622 const Expr *InitOp, 623 Address Private, Address Original, 624 QualType Ty) { 625 if (DRD->getInitializer()) { 626 std::pair<llvm::Function *, llvm::Function *> Reduction = 627 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 628 const auto *CE = cast<CallExpr>(InitOp); 629 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 630 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 631 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 632 const auto *LHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 634 const auto *RHSDRE = 635 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 636 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 637 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 638 [=]() { return Private; }); 639 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 640 [=]() { return Original; }); 641 (void)PrivateScope.Privatize(); 642 RValue Func = RValue::get(Reduction.second); 643 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 644 CGF.EmitIgnoredExpr(InitOp); 645 } else { 646 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 647 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 648 auto *GV = new llvm::GlobalVariable( 649 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 650 llvm::GlobalValue::PrivateLinkage, Init, Name); 651 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 652 RValue InitRVal; 653 switch (CGF.getEvaluationKind(Ty)) { 654 case TEK_Scalar: 655 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 656 break; 657 case TEK_Complex: 658 InitRVal = 659 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 660 break; 661 case TEK_Aggregate: { 662 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 663 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 664 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 665 /*IsInitializer=*/false); 666 return; 667 } 668 } 669 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 670 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 671 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 672 /*IsInitializer=*/false); 673 } 674 } 675 676 /// Emit initialization of arrays of complex types. 677 /// \param DestAddr Address of the array. 678 /// \param Type Type of array. 679 /// \param Init Initial expression of array. 680 /// \param SrcAddr Address of the original array. 681 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 682 QualType Type, bool EmitDeclareReductionInit, 683 const Expr *Init, 684 const OMPDeclareReductionDecl *DRD, 685 Address SrcAddr = Address::invalid()) { 686 // Perform element-by-element initialization. 687 QualType ElementTy; 688 689 // Drill down to the base element type on both arrays. 690 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 691 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 692 if (DRD) 693 SrcAddr = 694 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 695 696 llvm::Value *SrcBegin = nullptr; 697 if (DRD) 698 SrcBegin = SrcAddr.getPointer(); 699 llvm::Value *DestBegin = DestAddr.getPointer(); 700 // Cast from pointer to array type to pointer to single element. 701 llvm::Value *DestEnd = 702 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 703 // The basic structure here is a while-do loop. 704 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 705 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 706 llvm::Value *IsEmpty = 707 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 708 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 709 710 // Enter the loop body, making that address the current address. 711 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 712 CGF.EmitBlock(BodyBB); 713 714 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 715 716 llvm::PHINode *SrcElementPHI = nullptr; 717 Address SrcElementCurrent = Address::invalid(); 718 if (DRD) { 719 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 720 "omp.arraycpy.srcElementPast"); 721 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 722 SrcElementCurrent = 723 Address(SrcElementPHI, SrcAddr.getElementType(), 724 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 725 } 726 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 727 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 728 DestElementPHI->addIncoming(DestBegin, EntryBB); 729 Address DestElementCurrent = 730 Address(DestElementPHI, DestAddr.getElementType(), 731 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 732 733 // Emit copy. 734 { 735 CodeGenFunction::RunCleanupsScope InitScope(CGF); 736 if (EmitDeclareReductionInit) { 737 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 738 SrcElementCurrent, ElementTy); 739 } else 740 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 741 /*IsInitializer=*/false); 742 } 743 744 if (DRD) { 745 // Shift the address forward by one element. 746 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 747 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 748 "omp.arraycpy.dest.element"); 749 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 750 } 751 752 // Shift the address forward by one element. 753 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 754 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 755 "omp.arraycpy.dest.element"); 756 // Check whether we've reached the end. 757 llvm::Value *Done = 758 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 759 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 760 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 761 762 // Done. 763 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 764 } 765 766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 767 return CGF.EmitOMPSharedLValue(E); 768 } 769 770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 771 const Expr *E) { 772 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 773 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 774 return LValue(); 775 } 776 777 void ReductionCodeGen::emitAggregateInitialization( 778 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 779 const OMPDeclareReductionDecl *DRD) { 780 // Emit VarDecl with copy init for arrays. 781 // Get the address of the original variable captured in current 782 // captured region. 783 const auto *PrivateVD = 784 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 785 bool EmitDeclareReductionInit = 786 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 787 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 788 EmitDeclareReductionInit, 789 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 790 : PrivateVD->getInit(), 791 DRD, SharedAddr); 792 } 793 794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 795 ArrayRef<const Expr *> Origs, 796 ArrayRef<const Expr *> Privates, 797 ArrayRef<const Expr *> ReductionOps) { 798 ClausesData.reserve(Shareds.size()); 799 SharedAddresses.reserve(Shareds.size()); 800 Sizes.reserve(Shareds.size()); 801 BaseDecls.reserve(Shareds.size()); 802 const auto *IOrig = Origs.begin(); 803 const auto *IPriv = Privates.begin(); 804 const auto *IRed = ReductionOps.begin(); 805 for (const Expr *Ref : Shareds) { 806 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 807 std::advance(IOrig, 1); 808 std::advance(IPriv, 1); 809 std::advance(IRed, 1); 810 } 811 } 812 813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 814 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 815 "Number of generated lvalues must be exactly N."); 816 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 817 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 818 SharedAddresses.emplace_back(First, Second); 819 if (ClausesData[N].Shared == ClausesData[N].Ref) { 820 OrigAddresses.emplace_back(First, Second); 821 } else { 822 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 823 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 824 OrigAddresses.emplace_back(First, Second); 825 } 826 } 827 828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 829 const auto *PrivateVD = 830 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 831 QualType PrivateType = PrivateVD->getType(); 832 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 833 if (!PrivateType->isVariablyModifiedType()) { 834 Sizes.emplace_back( 835 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 836 nullptr); 837 return; 838 } 839 llvm::Value *Size; 840 llvm::Value *SizeInChars; 841 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 842 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 843 if (AsArraySection) { 844 Size = CGF.Builder.CreatePtrDiff(ElemType, 845 OrigAddresses[N].second.getPointer(CGF), 846 OrigAddresses[N].first.getPointer(CGF)); 847 Size = CGF.Builder.CreateNUWAdd( 848 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 849 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 850 } else { 851 SizeInChars = 852 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 853 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 854 } 855 Sizes.emplace_back(SizeInChars, Size); 856 CodeGenFunction::OpaqueValueMapping OpaqueMap( 857 CGF, 858 cast<OpaqueValueExpr>( 859 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 860 RValue::get(Size)); 861 CGF.EmitVariablyModifiedType(PrivateType); 862 } 863 864 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 865 llvm::Value *Size) { 866 const auto *PrivateVD = 867 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 868 QualType PrivateType = PrivateVD->getType(); 869 if (!PrivateType->isVariablyModifiedType()) { 870 assert(!Size && !Sizes[N].second && 871 "Size should be nullptr for non-variably modified reduction " 872 "items."); 873 return; 874 } 875 CodeGenFunction::OpaqueValueMapping OpaqueMap( 876 CGF, 877 cast<OpaqueValueExpr>( 878 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 879 RValue::get(Size)); 880 CGF.EmitVariablyModifiedType(PrivateType); 881 } 882 883 void ReductionCodeGen::emitInitialization( 884 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 885 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 886 assert(SharedAddresses.size() > N && "No variable was generated"); 887 const auto *PrivateVD = 888 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 889 const OMPDeclareReductionDecl *DRD = 890 getReductionInit(ClausesData[N].ReductionOp); 891 QualType PrivateType = PrivateVD->getType(); 892 PrivateAddr = CGF.Builder.CreateElementBitCast( 893 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 894 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 895 if (DRD && DRD->getInitializer()) 896 (void)DefaultInit(CGF); 897 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 898 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 899 (void)DefaultInit(CGF); 900 QualType SharedType = SharedAddresses[N].first.getType(); 901 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 902 PrivateAddr, SharedAddr, SharedType); 903 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 904 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 905 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 906 PrivateVD->getType().getQualifiers(), 907 /*IsInitializer=*/false); 908 } 909 } 910 911 bool ReductionCodeGen::needCleanups(unsigned N) { 912 const auto *PrivateVD = 913 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 914 QualType PrivateType = PrivateVD->getType(); 915 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 916 return DTorKind != QualType::DK_none; 917 } 918 919 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 920 Address PrivateAddr) { 921 const auto *PrivateVD = 922 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 923 QualType PrivateType = PrivateVD->getType(); 924 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 925 if (needCleanups(N)) { 926 PrivateAddr = CGF.Builder.CreateElementBitCast( 927 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 928 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 929 } 930 } 931 932 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 933 LValue BaseLV) { 934 BaseTy = BaseTy.getNonReferenceType(); 935 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 936 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 937 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 938 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 939 } else { 940 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 941 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 942 } 943 BaseTy = BaseTy->getPointeeType(); 944 } 945 return CGF.MakeAddrLValue( 946 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 947 CGF.ConvertTypeForMem(ElTy)), 948 BaseLV.getType(), BaseLV.getBaseInfo(), 949 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 950 } 951 952 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 953 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 954 llvm::Value *Addr) { 955 Address Tmp = Address::invalid(); 956 Address TopTmp = Address::invalid(); 957 Address MostTopTmp = Address::invalid(); 958 BaseTy = BaseTy.getNonReferenceType(); 959 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 960 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 961 Tmp = CGF.CreateMemTemp(BaseTy); 962 if (TopTmp.isValid()) 963 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 964 else 965 MostTopTmp = Tmp; 966 TopTmp = Tmp; 967 BaseTy = BaseTy->getPointeeType(); 968 } 969 llvm::Type *Ty = BaseLVType; 970 if (Tmp.isValid()) 971 Ty = Tmp.getElementType(); 972 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 973 if (Tmp.isValid()) { 974 CGF.Builder.CreateStore(Addr, Tmp); 975 return MostTopTmp; 976 } 977 return Address::deprecated(Addr, BaseLVAlignment); 978 } 979 980 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 981 const VarDecl *OrigVD = nullptr; 982 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 983 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 984 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 985 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 986 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 987 Base = TempASE->getBase()->IgnoreParenImpCasts(); 988 DE = cast<DeclRefExpr>(Base); 989 OrigVD = cast<VarDecl>(DE->getDecl()); 990 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 991 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 992 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 993 Base = TempASE->getBase()->IgnoreParenImpCasts(); 994 DE = cast<DeclRefExpr>(Base); 995 OrigVD = cast<VarDecl>(DE->getDecl()); 996 } 997 return OrigVD; 998 } 999 1000 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1001 Address PrivateAddr) { 1002 const DeclRefExpr *DE; 1003 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1004 BaseDecls.emplace_back(OrigVD); 1005 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1006 LValue BaseLValue = 1007 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1008 OriginalBaseLValue); 1009 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1010 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1011 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 1012 SharedAddr.getPointer()); 1013 llvm::Value *PrivatePointer = 1014 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1015 PrivateAddr.getPointer(), SharedAddr.getType()); 1016 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1017 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1018 return castToBase(CGF, OrigVD->getType(), 1019 SharedAddresses[N].first.getType(), 1020 OriginalBaseLValue.getAddress(CGF).getType(), 1021 OriginalBaseLValue.getAlignment(), Ptr); 1022 } 1023 BaseDecls.emplace_back( 1024 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1025 return PrivateAddr; 1026 } 1027 1028 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1029 const OMPDeclareReductionDecl *DRD = 1030 getReductionInit(ClausesData[N].ReductionOp); 1031 return DRD && DRD->getInitializer(); 1032 } 1033 1034 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1035 return CGF.EmitLoadOfPointerLValue( 1036 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1037 getThreadIDVariable()->getType()->castAs<PointerType>()); 1038 } 1039 1040 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1041 if (!CGF.HaveInsertPoint()) 1042 return; 1043 // 1.2.2 OpenMP Language Terminology 1044 // Structured block - An executable statement with a single entry at the 1045 // top and a single exit at the bottom. 1046 // The point of exit cannot be a branch out of the structured block. 1047 // longjmp() and throw() must not violate the entry/exit criteria. 1048 CGF.EHStack.pushTerminate(); 1049 if (S) 1050 CGF.incrementProfileCounter(S); 1051 CodeGen(CGF); 1052 CGF.EHStack.popTerminate(); 1053 } 1054 1055 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1056 CodeGenFunction &CGF) { 1057 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1058 getThreadIDVariable()->getType(), 1059 AlignmentSource::Decl); 1060 } 1061 1062 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1063 QualType FieldTy) { 1064 auto *Field = FieldDecl::Create( 1065 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1066 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1067 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1068 Field->setAccess(AS_public); 1069 DC->addDecl(Field); 1070 return Field; 1071 } 1072 1073 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1074 StringRef Separator) 1075 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1076 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1077 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1078 1079 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1080 OMPBuilder.initialize(); 1081 loadOffloadInfoMetadata(); 1082 } 1083 1084 void CGOpenMPRuntime::clear() { 1085 InternalVars.clear(); 1086 // Clean non-target variable declarations possibly used only in debug info. 1087 for (const auto &Data : EmittedNonTargetVariables) { 1088 if (!Data.getValue().pointsToAliveValue()) 1089 continue; 1090 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1091 if (!GV) 1092 continue; 1093 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1094 continue; 1095 GV->eraseFromParent(); 1096 } 1097 } 1098 1099 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1100 SmallString<128> Buffer; 1101 llvm::raw_svector_ostream OS(Buffer); 1102 StringRef Sep = FirstSeparator; 1103 for (StringRef Part : Parts) { 1104 OS << Sep << Part; 1105 Sep = Separator; 1106 } 1107 return std::string(OS.str()); 1108 } 1109 1110 static llvm::Function * 1111 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1112 const Expr *CombinerInitializer, const VarDecl *In, 1113 const VarDecl *Out, bool IsCombiner) { 1114 // void .omp_combiner.(Ty *in, Ty *out); 1115 ASTContext &C = CGM.getContext(); 1116 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1117 FunctionArgList Args; 1118 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1119 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1120 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1121 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1122 Args.push_back(&OmpOutParm); 1123 Args.push_back(&OmpInParm); 1124 const CGFunctionInfo &FnInfo = 1125 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1126 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1127 std::string Name = CGM.getOpenMPRuntime().getName( 1128 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1129 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1130 Name, &CGM.getModule()); 1131 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1132 if (CGM.getLangOpts().Optimize) { 1133 Fn->removeFnAttr(llvm::Attribute::NoInline); 1134 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1135 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1136 } 1137 CodeGenFunction CGF(CGM); 1138 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1139 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1140 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1141 Out->getLocation()); 1142 CodeGenFunction::OMPPrivateScope Scope(CGF); 1143 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1144 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1145 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1146 .getAddress(CGF); 1147 }); 1148 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1149 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1150 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1151 .getAddress(CGF); 1152 }); 1153 (void)Scope.Privatize(); 1154 if (!IsCombiner && Out->hasInit() && 1155 !CGF.isTrivialInitializer(Out->getInit())) { 1156 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1157 Out->getType().getQualifiers(), 1158 /*IsInitializer=*/true); 1159 } 1160 if (CombinerInitializer) 1161 CGF.EmitIgnoredExpr(CombinerInitializer); 1162 Scope.ForceCleanup(); 1163 CGF.FinishFunction(); 1164 return Fn; 1165 } 1166 1167 void CGOpenMPRuntime::emitUserDefinedReduction( 1168 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1169 if (UDRMap.count(D) > 0) 1170 return; 1171 llvm::Function *Combiner = emitCombinerOrInitializer( 1172 CGM, D->getType(), D->getCombiner(), 1173 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1174 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1175 /*IsCombiner=*/true); 1176 llvm::Function *Initializer = nullptr; 1177 if (const Expr *Init = D->getInitializer()) { 1178 Initializer = emitCombinerOrInitializer( 1179 CGM, D->getType(), 1180 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1181 : nullptr, 1182 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1183 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1184 /*IsCombiner=*/false); 1185 } 1186 UDRMap.try_emplace(D, Combiner, Initializer); 1187 if (CGF) { 1188 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1189 Decls.second.push_back(D); 1190 } 1191 } 1192 1193 std::pair<llvm::Function *, llvm::Function *> 1194 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1195 auto I = UDRMap.find(D); 1196 if (I != UDRMap.end()) 1197 return I->second; 1198 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1199 return UDRMap.lookup(D); 1200 } 1201 1202 namespace { 1203 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1204 // Builder if one is present. 1205 struct PushAndPopStackRAII { 1206 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1207 bool HasCancel, llvm::omp::Directive Kind) 1208 : OMPBuilder(OMPBuilder) { 1209 if (!OMPBuilder) 1210 return; 1211 1212 // The following callback is the crucial part of clangs cleanup process. 1213 // 1214 // NOTE: 1215 // Once the OpenMPIRBuilder is used to create parallel regions (and 1216 // similar), the cancellation destination (Dest below) is determined via 1217 // IP. That means if we have variables to finalize we split the block at IP, 1218 // use the new block (=BB) as destination to build a JumpDest (via 1219 // getJumpDestInCurrentScope(BB)) which then is fed to 1220 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1221 // to push & pop an FinalizationInfo object. 1222 // The FiniCB will still be needed but at the point where the 1223 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1224 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1225 assert(IP.getBlock()->end() == IP.getPoint() && 1226 "Clang CG should cause non-terminated block!"); 1227 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1228 CGF.Builder.restoreIP(IP); 1229 CodeGenFunction::JumpDest Dest = 1230 CGF.getOMPCancelDestination(OMPD_parallel); 1231 CGF.EmitBranchThroughCleanup(Dest); 1232 }; 1233 1234 // TODO: Remove this once we emit parallel regions through the 1235 // OpenMPIRBuilder as it can do this setup internally. 1236 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1237 OMPBuilder->pushFinalizationCB(std::move(FI)); 1238 } 1239 ~PushAndPopStackRAII() { 1240 if (OMPBuilder) 1241 OMPBuilder->popFinalizationCB(); 1242 } 1243 llvm::OpenMPIRBuilder *OMPBuilder; 1244 }; 1245 } // namespace 1246 1247 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1248 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1249 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1250 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1251 assert(ThreadIDVar->getType()->isPointerType() && 1252 "thread id variable must be of type kmp_int32 *"); 1253 CodeGenFunction CGF(CGM, true); 1254 bool HasCancel = false; 1255 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1256 HasCancel = OPD->hasCancel(); 1257 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1258 HasCancel = OPD->hasCancel(); 1259 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1260 HasCancel = OPSD->hasCancel(); 1261 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1262 HasCancel = OPFD->hasCancel(); 1263 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1264 HasCancel = OPFD->hasCancel(); 1265 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1266 HasCancel = OPFD->hasCancel(); 1267 else if (const auto *OPFD = 1268 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1269 HasCancel = OPFD->hasCancel(); 1270 else if (const auto *OPFD = 1271 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1272 HasCancel = OPFD->hasCancel(); 1273 1274 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1275 // parallel region to make cancellation barriers work properly. 1276 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1277 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1278 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1279 HasCancel, OutlinedHelperName); 1280 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1281 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1282 } 1283 1284 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1285 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1286 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1287 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1288 return emitParallelOrTeamsOutlinedFunction( 1289 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1290 } 1291 1292 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1293 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1294 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1295 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1296 return emitParallelOrTeamsOutlinedFunction( 1297 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1298 } 1299 1300 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1301 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1302 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1303 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1304 bool Tied, unsigned &NumberOfParts) { 1305 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1306 PrePostActionTy &) { 1307 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1308 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1309 llvm::Value *TaskArgs[] = { 1310 UpLoc, ThreadID, 1311 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1312 TaskTVar->getType()->castAs<PointerType>()) 1313 .getPointer(CGF)}; 1314 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1315 CGM.getModule(), OMPRTL___kmpc_omp_task), 1316 TaskArgs); 1317 }; 1318 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1319 UntiedCodeGen); 1320 CodeGen.setAction(Action); 1321 assert(!ThreadIDVar->getType()->isPointerType() && 1322 "thread id variable must be of type kmp_int32 for tasks"); 1323 const OpenMPDirectiveKind Region = 1324 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1325 : OMPD_task; 1326 const CapturedStmt *CS = D.getCapturedStmt(Region); 1327 bool HasCancel = false; 1328 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1329 HasCancel = TD->hasCancel(); 1330 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1331 HasCancel = TD->hasCancel(); 1332 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1333 HasCancel = TD->hasCancel(); 1334 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1335 HasCancel = TD->hasCancel(); 1336 1337 CodeGenFunction CGF(CGM, true); 1338 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1339 InnermostKind, HasCancel, Action); 1340 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1341 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1342 if (!Tied) 1343 NumberOfParts = Action.getNumberOfParts(); 1344 return Res; 1345 } 1346 1347 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1348 const RecordDecl *RD, const CGRecordLayout &RL, 1349 ArrayRef<llvm::Constant *> Data) { 1350 llvm::StructType *StructTy = RL.getLLVMType(); 1351 unsigned PrevIdx = 0; 1352 ConstantInitBuilder CIBuilder(CGM); 1353 const auto *DI = Data.begin(); 1354 for (const FieldDecl *FD : RD->fields()) { 1355 unsigned Idx = RL.getLLVMFieldNo(FD); 1356 // Fill the alignment. 1357 for (unsigned I = PrevIdx; I < Idx; ++I) 1358 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1359 PrevIdx = Idx + 1; 1360 Fields.add(*DI); 1361 ++DI; 1362 } 1363 } 1364 1365 template <class... As> 1366 static llvm::GlobalVariable * 1367 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1368 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1369 As &&... Args) { 1370 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1371 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1372 ConstantInitBuilder CIBuilder(CGM); 1373 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1374 buildStructValue(Fields, CGM, RD, RL, Data); 1375 return Fields.finishAndCreateGlobal( 1376 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1377 std::forward<As>(Args)...); 1378 } 1379 1380 template <typename T> 1381 static void 1382 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1383 ArrayRef<llvm::Constant *> Data, 1384 T &Parent) { 1385 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1386 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1387 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1388 buildStructValue(Fields, CGM, RD, RL, Data); 1389 Fields.finishAndAddTo(Parent); 1390 } 1391 1392 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1393 bool AtCurrentPoint) { 1394 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1395 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1396 1397 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1398 if (AtCurrentPoint) { 1399 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1400 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1401 } else { 1402 Elem.second.ServiceInsertPt = 1403 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1404 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1405 } 1406 } 1407 1408 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1409 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1410 if (Elem.second.ServiceInsertPt) { 1411 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1412 Elem.second.ServiceInsertPt = nullptr; 1413 Ptr->eraseFromParent(); 1414 } 1415 } 1416 1417 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1418 SourceLocation Loc, 1419 SmallString<128> &Buffer) { 1420 llvm::raw_svector_ostream OS(Buffer); 1421 // Build debug location 1422 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1423 OS << ";" << PLoc.getFilename() << ";"; 1424 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1425 OS << FD->getQualifiedNameAsString(); 1426 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1427 return OS.str(); 1428 } 1429 1430 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1431 SourceLocation Loc, 1432 unsigned Flags) { 1433 uint32_t SrcLocStrSize; 1434 llvm::Constant *SrcLocStr; 1435 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1436 Loc.isInvalid()) { 1437 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1438 } else { 1439 std::string FunctionName; 1440 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1441 FunctionName = FD->getQualifiedNameAsString(); 1442 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1443 const char *FileName = PLoc.getFilename(); 1444 unsigned Line = PLoc.getLine(); 1445 unsigned Column = PLoc.getColumn(); 1446 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1447 Column, SrcLocStrSize); 1448 } 1449 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1450 return OMPBuilder.getOrCreateIdent( 1451 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1452 } 1453 1454 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1455 SourceLocation Loc) { 1456 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1457 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1458 // the clang invariants used below might be broken. 1459 if (CGM.getLangOpts().OpenMPIRBuilder) { 1460 SmallString<128> Buffer; 1461 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1462 uint32_t SrcLocStrSize; 1463 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1464 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1465 return OMPBuilder.getOrCreateThreadID( 1466 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1467 } 1468 1469 llvm::Value *ThreadID = nullptr; 1470 // Check whether we've already cached a load of the thread id in this 1471 // function. 1472 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1473 if (I != OpenMPLocThreadIDMap.end()) { 1474 ThreadID = I->second.ThreadID; 1475 if (ThreadID != nullptr) 1476 return ThreadID; 1477 } 1478 // If exceptions are enabled, do not use parameter to avoid possible crash. 1479 if (auto *OMPRegionInfo = 1480 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1481 if (OMPRegionInfo->getThreadIDVariable()) { 1482 // Check if this an outlined function with thread id passed as argument. 1483 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1484 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1485 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1486 !CGF.getLangOpts().CXXExceptions || 1487 CGF.Builder.GetInsertBlock() == TopBlock || 1488 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1489 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1490 TopBlock || 1491 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1492 CGF.Builder.GetInsertBlock()) { 1493 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1494 // If value loaded in entry block, cache it and use it everywhere in 1495 // function. 1496 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 Elem.second.ThreadID = ThreadID; 1499 } 1500 return ThreadID; 1501 } 1502 } 1503 } 1504 1505 // This is not an outlined function region - need to call __kmpc_int32 1506 // kmpc_global_thread_num(ident_t *loc). 1507 // Generate thread id value and cache this value for use across the 1508 // function. 1509 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1510 if (!Elem.second.ServiceInsertPt) 1511 setLocThreadIdInsertPt(CGF); 1512 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1513 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1514 llvm::CallInst *Call = CGF.Builder.CreateCall( 1515 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1516 OMPRTL___kmpc_global_thread_num), 1517 emitUpdateLocation(CGF, Loc)); 1518 Call->setCallingConv(CGF.getRuntimeCC()); 1519 Elem.second.ThreadID = Call; 1520 return Call; 1521 } 1522 1523 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1524 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1525 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1526 clearLocThreadIdInsertPt(CGF); 1527 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1528 } 1529 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1530 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1531 UDRMap.erase(D); 1532 FunctionUDRMap.erase(CGF.CurFn); 1533 } 1534 auto I = FunctionUDMMap.find(CGF.CurFn); 1535 if (I != FunctionUDMMap.end()) { 1536 for(const auto *D : I->second) 1537 UDMMap.erase(D); 1538 FunctionUDMMap.erase(I); 1539 } 1540 LastprivateConditionalToTypes.erase(CGF.CurFn); 1541 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1542 } 1543 1544 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1545 return OMPBuilder.IdentPtr; 1546 } 1547 1548 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1549 if (!Kmpc_MicroTy) { 1550 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1551 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1552 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1553 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1554 } 1555 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1556 } 1557 1558 llvm::FunctionCallee 1559 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1560 bool IsGPUDistribute) { 1561 assert((IVSize == 32 || IVSize == 64) && 1562 "IV size is not compatible with the omp runtime"); 1563 StringRef Name; 1564 if (IsGPUDistribute) 1565 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1566 : "__kmpc_distribute_static_init_4u") 1567 : (IVSigned ? "__kmpc_distribute_static_init_8" 1568 : "__kmpc_distribute_static_init_8u"); 1569 else 1570 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1571 : "__kmpc_for_static_init_4u") 1572 : (IVSigned ? "__kmpc_for_static_init_8" 1573 : "__kmpc_for_static_init_8u"); 1574 1575 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1576 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1577 llvm::Type *TypeParams[] = { 1578 getIdentTyPointerTy(), // loc 1579 CGM.Int32Ty, // tid 1580 CGM.Int32Ty, // schedtype 1581 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1582 PtrTy, // p_lower 1583 PtrTy, // p_upper 1584 PtrTy, // p_stride 1585 ITy, // incr 1586 ITy // chunk 1587 }; 1588 auto *FnTy = 1589 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1590 return CGM.CreateRuntimeFunction(FnTy, Name); 1591 } 1592 1593 llvm::FunctionCallee 1594 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1595 assert((IVSize == 32 || IVSize == 64) && 1596 "IV size is not compatible with the omp runtime"); 1597 StringRef Name = 1598 IVSize == 32 1599 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1600 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1601 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1602 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1603 CGM.Int32Ty, // tid 1604 CGM.Int32Ty, // schedtype 1605 ITy, // lower 1606 ITy, // upper 1607 ITy, // stride 1608 ITy // chunk 1609 }; 1610 auto *FnTy = 1611 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1612 return CGM.CreateRuntimeFunction(FnTy, Name); 1613 } 1614 1615 llvm::FunctionCallee 1616 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1617 assert((IVSize == 32 || IVSize == 64) && 1618 "IV size is not compatible with the omp runtime"); 1619 StringRef Name = 1620 IVSize == 32 1621 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1622 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1623 llvm::Type *TypeParams[] = { 1624 getIdentTyPointerTy(), // loc 1625 CGM.Int32Ty, // tid 1626 }; 1627 auto *FnTy = 1628 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1629 return CGM.CreateRuntimeFunction(FnTy, Name); 1630 } 1631 1632 llvm::FunctionCallee 1633 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1634 assert((IVSize == 32 || IVSize == 64) && 1635 "IV size is not compatible with the omp runtime"); 1636 StringRef Name = 1637 IVSize == 32 1638 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1639 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1640 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1641 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1642 llvm::Type *TypeParams[] = { 1643 getIdentTyPointerTy(), // loc 1644 CGM.Int32Ty, // tid 1645 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1646 PtrTy, // p_lower 1647 PtrTy, // p_upper 1648 PtrTy // p_stride 1649 }; 1650 auto *FnTy = 1651 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1652 return CGM.CreateRuntimeFunction(FnTy, Name); 1653 } 1654 1655 /// Obtain information that uniquely identifies a target entry. This 1656 /// consists of the file and device IDs as well as line number associated with 1657 /// the relevant entry source location. 1658 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1659 unsigned &DeviceID, unsigned &FileID, 1660 unsigned &LineNum) { 1661 SourceManager &SM = C.getSourceManager(); 1662 1663 // The loc should be always valid and have a file ID (the user cannot use 1664 // #pragma directives in macros) 1665 1666 assert(Loc.isValid() && "Source location is expected to be always valid."); 1667 1668 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1669 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1670 1671 llvm::sys::fs::UniqueID ID; 1672 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1673 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1674 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1675 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1676 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1677 << PLoc.getFilename() << EC.message(); 1678 } 1679 1680 DeviceID = ID.getDevice(); 1681 FileID = ID.getFile(); 1682 LineNum = PLoc.getLine(); 1683 } 1684 1685 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1686 if (CGM.getLangOpts().OpenMPSimd) 1687 return Address::invalid(); 1688 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1689 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1690 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1691 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1692 HasRequiresUnifiedSharedMemory))) { 1693 SmallString<64> PtrName; 1694 { 1695 llvm::raw_svector_ostream OS(PtrName); 1696 OS << CGM.getMangledName(GlobalDecl(VD)); 1697 if (!VD->isExternallyVisible()) { 1698 unsigned DeviceID, FileID, Line; 1699 getTargetEntryUniqueInfo(CGM.getContext(), 1700 VD->getCanonicalDecl()->getBeginLoc(), 1701 DeviceID, FileID, Line); 1702 OS << llvm::format("_%x", FileID); 1703 } 1704 OS << "_decl_tgt_ref_ptr"; 1705 } 1706 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1707 if (!Ptr) { 1708 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1709 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1710 PtrName); 1711 1712 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1713 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1714 1715 if (!CGM.getLangOpts().OpenMPIsDevice) 1716 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1717 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1718 } 1719 return Address::deprecated(Ptr, CGM.getContext().getDeclAlign(VD)); 1720 } 1721 return Address::invalid(); 1722 } 1723 1724 llvm::Constant * 1725 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1726 assert(!CGM.getLangOpts().OpenMPUseTLS || 1727 !CGM.getContext().getTargetInfo().isTLSSupported()); 1728 // Lookup the entry, lazily creating it if necessary. 1729 std::string Suffix = getName({"cache", ""}); 1730 return getOrCreateInternalVariable( 1731 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1732 } 1733 1734 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1735 const VarDecl *VD, 1736 Address VDAddr, 1737 SourceLocation Loc) { 1738 if (CGM.getLangOpts().OpenMPUseTLS && 1739 CGM.getContext().getTargetInfo().isTLSSupported()) 1740 return VDAddr; 1741 1742 llvm::Type *VarTy = VDAddr.getElementType(); 1743 llvm::Value *Args[] = { 1744 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1745 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), 1746 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1747 getOrCreateThreadPrivateCache(VD)}; 1748 return Address::deprecated( 1749 CGF.EmitRuntimeCall( 1750 OMPBuilder.getOrCreateRuntimeFunction( 1751 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1752 Args), 1753 VDAddr.getAlignment()); 1754 } 1755 1756 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1757 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1758 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1759 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1760 // library. 1761 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1762 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1763 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1764 OMPLoc); 1765 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1766 // to register constructor/destructor for variable. 1767 llvm::Value *Args[] = { 1768 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1769 Ctor, CopyCtor, Dtor}; 1770 CGF.EmitRuntimeCall( 1771 OMPBuilder.getOrCreateRuntimeFunction( 1772 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1773 Args); 1774 } 1775 1776 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1777 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1778 bool PerformInit, CodeGenFunction *CGF) { 1779 if (CGM.getLangOpts().OpenMPUseTLS && 1780 CGM.getContext().getTargetInfo().isTLSSupported()) 1781 return nullptr; 1782 1783 VD = VD->getDefinition(CGM.getContext()); 1784 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1785 QualType ASTTy = VD->getType(); 1786 1787 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1788 const Expr *Init = VD->getAnyInitializer(); 1789 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1790 // Generate function that re-emits the declaration's initializer into the 1791 // threadprivate copy of the variable VD 1792 CodeGenFunction CtorCGF(CGM); 1793 FunctionArgList Args; 1794 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1795 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1796 ImplicitParamDecl::Other); 1797 Args.push_back(&Dst); 1798 1799 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1800 CGM.getContext().VoidPtrTy, Args); 1801 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1802 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1803 llvm::Function *Fn = 1804 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1805 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1806 Args, Loc, Loc); 1807 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1808 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1809 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1810 Address Arg = Address::deprecated(ArgVal, VDAddr.getAlignment()); 1811 Arg = CtorCGF.Builder.CreateElementBitCast( 1812 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1813 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1814 /*IsInitializer=*/true); 1815 ArgVal = CtorCGF.EmitLoadOfScalar( 1816 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1817 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1818 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1819 CtorCGF.FinishFunction(); 1820 Ctor = Fn; 1821 } 1822 if (VD->getType().isDestructedType() != QualType::DK_none) { 1823 // Generate function that emits destructor call for the threadprivate copy 1824 // of the variable VD 1825 CodeGenFunction DtorCGF(CGM); 1826 FunctionArgList Args; 1827 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1828 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1829 ImplicitParamDecl::Other); 1830 Args.push_back(&Dst); 1831 1832 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1833 CGM.getContext().VoidTy, Args); 1834 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1835 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1836 llvm::Function *Fn = 1837 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1838 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1839 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1840 Loc, Loc); 1841 // Create a scope with an artificial location for the body of this function. 1842 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1843 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1844 DtorCGF.GetAddrOfLocalVar(&Dst), 1845 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1846 DtorCGF.emitDestroy(Address::deprecated(ArgVal, VDAddr.getAlignment()), 1847 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1848 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1849 DtorCGF.FinishFunction(); 1850 Dtor = Fn; 1851 } 1852 // Do not emit init function if it is not required. 1853 if (!Ctor && !Dtor) 1854 return nullptr; 1855 1856 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1857 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1858 /*isVarArg=*/false) 1859 ->getPointerTo(); 1860 // Copying constructor for the threadprivate variable. 1861 // Must be NULL - reserved by runtime, but currently it requires that this 1862 // parameter is always NULL. Otherwise it fires assertion. 1863 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1864 if (Ctor == nullptr) { 1865 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1866 /*isVarArg=*/false) 1867 ->getPointerTo(); 1868 Ctor = llvm::Constant::getNullValue(CtorTy); 1869 } 1870 if (Dtor == nullptr) { 1871 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1872 /*isVarArg=*/false) 1873 ->getPointerTo(); 1874 Dtor = llvm::Constant::getNullValue(DtorTy); 1875 } 1876 if (!CGF) { 1877 auto *InitFunctionTy = 1878 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1879 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1880 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1881 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1882 CodeGenFunction InitCGF(CGM); 1883 FunctionArgList ArgList; 1884 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1885 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1886 Loc, Loc); 1887 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1888 InitCGF.FinishFunction(); 1889 return InitFunction; 1890 } 1891 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1892 } 1893 return nullptr; 1894 } 1895 1896 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1897 llvm::GlobalVariable *Addr, 1898 bool PerformInit) { 1899 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1900 !CGM.getLangOpts().OpenMPIsDevice) 1901 return false; 1902 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1903 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1904 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1905 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1906 HasRequiresUnifiedSharedMemory)) 1907 return CGM.getLangOpts().OpenMPIsDevice; 1908 VD = VD->getDefinition(CGM.getContext()); 1909 assert(VD && "Unknown VarDecl"); 1910 1911 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1912 return CGM.getLangOpts().OpenMPIsDevice; 1913 1914 QualType ASTTy = VD->getType(); 1915 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1916 1917 // Produce the unique prefix to identify the new target regions. We use 1918 // the source location of the variable declaration which we know to not 1919 // conflict with any target region. 1920 unsigned DeviceID; 1921 unsigned FileID; 1922 unsigned Line; 1923 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1924 SmallString<128> Buffer, Out; 1925 { 1926 llvm::raw_svector_ostream OS(Buffer); 1927 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1928 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1929 } 1930 1931 const Expr *Init = VD->getAnyInitializer(); 1932 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1933 llvm::Constant *Ctor; 1934 llvm::Constant *ID; 1935 if (CGM.getLangOpts().OpenMPIsDevice) { 1936 // Generate function that re-emits the declaration's initializer into 1937 // the threadprivate copy of the variable VD 1938 CodeGenFunction CtorCGF(CGM); 1939 1940 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1941 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1942 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1943 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1944 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1945 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1946 FunctionArgList(), Loc, Loc); 1947 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1948 CtorCGF.EmitAnyExprToMem( 1949 Init, Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)), 1950 Init->getType().getQualifiers(), 1951 /*IsInitializer=*/true); 1952 CtorCGF.FinishFunction(); 1953 Ctor = Fn; 1954 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1955 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1956 } else { 1957 Ctor = new llvm::GlobalVariable( 1958 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1959 llvm::GlobalValue::PrivateLinkage, 1960 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1961 ID = Ctor; 1962 } 1963 1964 // Register the information for the entry associated with the constructor. 1965 Out.clear(); 1966 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1967 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1968 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1969 } 1970 if (VD->getType().isDestructedType() != QualType::DK_none) { 1971 llvm::Constant *Dtor; 1972 llvm::Constant *ID; 1973 if (CGM.getLangOpts().OpenMPIsDevice) { 1974 // Generate function that emits destructor call for the threadprivate 1975 // copy of the variable VD 1976 CodeGenFunction DtorCGF(CGM); 1977 1978 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1979 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1980 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1981 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1982 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1983 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1984 FunctionArgList(), Loc, Loc); 1985 // Create a scope with an artificial location for the body of this 1986 // function. 1987 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1988 DtorCGF.emitDestroy( 1989 Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)), ASTTy, 1990 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1991 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1992 DtorCGF.FinishFunction(); 1993 Dtor = Fn; 1994 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1995 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1996 } else { 1997 Dtor = new llvm::GlobalVariable( 1998 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1999 llvm::GlobalValue::PrivateLinkage, 2000 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2001 ID = Dtor; 2002 } 2003 // Register the information for the entry associated with the destructor. 2004 Out.clear(); 2005 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2006 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2007 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2008 } 2009 return CGM.getLangOpts().OpenMPIsDevice; 2010 } 2011 2012 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2013 QualType VarType, 2014 StringRef Name) { 2015 std::string Suffix = getName({"artificial", ""}); 2016 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2017 llvm::GlobalVariable *GAddr = 2018 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2019 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2020 CGM.getTarget().isTLSSupported()) { 2021 GAddr->setThreadLocal(/*Val=*/true); 2022 return Address(GAddr, GAddr->getValueType(), 2023 CGM.getContext().getTypeAlignInChars(VarType)); 2024 } 2025 std::string CacheSuffix = getName({"cache", ""}); 2026 llvm::Value *Args[] = { 2027 emitUpdateLocation(CGF, SourceLocation()), 2028 getThreadID(CGF, SourceLocation()), 2029 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2030 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2031 /*isSigned=*/false), 2032 getOrCreateInternalVariable( 2033 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2034 return Address( 2035 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2036 CGF.EmitRuntimeCall( 2037 OMPBuilder.getOrCreateRuntimeFunction( 2038 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2039 Args), 2040 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2041 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 2042 } 2043 2044 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2045 const RegionCodeGenTy &ThenGen, 2046 const RegionCodeGenTy &ElseGen) { 2047 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2048 2049 // If the condition constant folds and can be elided, try to avoid emitting 2050 // the condition and the dead arm of the if/else. 2051 bool CondConstant; 2052 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2053 if (CondConstant) 2054 ThenGen(CGF); 2055 else 2056 ElseGen(CGF); 2057 return; 2058 } 2059 2060 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2061 // emit the conditional branch. 2062 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2063 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2064 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2065 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2066 2067 // Emit the 'then' code. 2068 CGF.EmitBlock(ThenBlock); 2069 ThenGen(CGF); 2070 CGF.EmitBranch(ContBlock); 2071 // Emit the 'else' code if present. 2072 // There is no need to emit line number for unconditional branch. 2073 (void)ApplyDebugLocation::CreateEmpty(CGF); 2074 CGF.EmitBlock(ElseBlock); 2075 ElseGen(CGF); 2076 // There is no need to emit line number for unconditional branch. 2077 (void)ApplyDebugLocation::CreateEmpty(CGF); 2078 CGF.EmitBranch(ContBlock); 2079 // Emit the continuation block for code after the if. 2080 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2081 } 2082 2083 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2084 llvm::Function *OutlinedFn, 2085 ArrayRef<llvm::Value *> CapturedVars, 2086 const Expr *IfCond, 2087 llvm::Value *NumThreads) { 2088 if (!CGF.HaveInsertPoint()) 2089 return; 2090 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2091 auto &M = CGM.getModule(); 2092 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2093 this](CodeGenFunction &CGF, PrePostActionTy &) { 2094 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2095 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2096 llvm::Value *Args[] = { 2097 RTLoc, 2098 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2099 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2100 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2101 RealArgs.append(std::begin(Args), std::end(Args)); 2102 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2103 2104 llvm::FunctionCallee RTLFn = 2105 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2106 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2107 }; 2108 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2109 this](CodeGenFunction &CGF, PrePostActionTy &) { 2110 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2111 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2112 // Build calls: 2113 // __kmpc_serialized_parallel(&Loc, GTid); 2114 llvm::Value *Args[] = {RTLoc, ThreadID}; 2115 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2116 M, OMPRTL___kmpc_serialized_parallel), 2117 Args); 2118 2119 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2120 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2121 Address ZeroAddrBound = 2122 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2123 /*Name=*/".bound.zero.addr"); 2124 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2125 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2126 // ThreadId for serialized parallels is 0. 2127 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2128 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2129 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2130 2131 // Ensure we do not inline the function. This is trivially true for the ones 2132 // passed to __kmpc_fork_call but the ones called in serialized regions 2133 // could be inlined. This is not a perfect but it is closer to the invariant 2134 // we want, namely, every data environment starts with a new function. 2135 // TODO: We should pass the if condition to the runtime function and do the 2136 // handling there. Much cleaner code. 2137 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2138 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2139 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2140 2141 // __kmpc_end_serialized_parallel(&Loc, GTid); 2142 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2143 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2144 M, OMPRTL___kmpc_end_serialized_parallel), 2145 EndArgs); 2146 }; 2147 if (IfCond) { 2148 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2149 } else { 2150 RegionCodeGenTy ThenRCG(ThenGen); 2151 ThenRCG(CGF); 2152 } 2153 } 2154 2155 // If we're inside an (outlined) parallel region, use the region info's 2156 // thread-ID variable (it is passed in a first argument of the outlined function 2157 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2158 // regular serial code region, get thread ID by calling kmp_int32 2159 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2160 // return the address of that temp. 2161 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2162 SourceLocation Loc) { 2163 if (auto *OMPRegionInfo = 2164 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2165 if (OMPRegionInfo->getThreadIDVariable()) 2166 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2167 2168 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2169 QualType Int32Ty = 2170 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2171 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2172 CGF.EmitStoreOfScalar(ThreadID, 2173 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2174 2175 return ThreadIDTemp; 2176 } 2177 2178 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2179 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2180 SmallString<256> Buffer; 2181 llvm::raw_svector_ostream Out(Buffer); 2182 Out << Name; 2183 StringRef RuntimeName = Out.str(); 2184 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2185 if (Elem.second) { 2186 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2187 "OMP internal variable has different type than requested"); 2188 return &*Elem.second; 2189 } 2190 2191 return Elem.second = new llvm::GlobalVariable( 2192 CGM.getModule(), Ty, /*IsConstant*/ false, 2193 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2194 Elem.first(), /*InsertBefore=*/nullptr, 2195 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2196 } 2197 2198 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2199 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2200 std::string Name = getName({Prefix, "var"}); 2201 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2202 } 2203 2204 namespace { 2205 /// Common pre(post)-action for different OpenMP constructs. 2206 class CommonActionTy final : public PrePostActionTy { 2207 llvm::FunctionCallee EnterCallee; 2208 ArrayRef<llvm::Value *> EnterArgs; 2209 llvm::FunctionCallee ExitCallee; 2210 ArrayRef<llvm::Value *> ExitArgs; 2211 bool Conditional; 2212 llvm::BasicBlock *ContBlock = nullptr; 2213 2214 public: 2215 CommonActionTy(llvm::FunctionCallee EnterCallee, 2216 ArrayRef<llvm::Value *> EnterArgs, 2217 llvm::FunctionCallee ExitCallee, 2218 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2219 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2220 ExitArgs(ExitArgs), Conditional(Conditional) {} 2221 void Enter(CodeGenFunction &CGF) override { 2222 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2223 if (Conditional) { 2224 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2225 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2226 ContBlock = CGF.createBasicBlock("omp_if.end"); 2227 // Generate the branch (If-stmt) 2228 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2229 CGF.EmitBlock(ThenBlock); 2230 } 2231 } 2232 void Done(CodeGenFunction &CGF) { 2233 // Emit the rest of blocks/branches 2234 CGF.EmitBranch(ContBlock); 2235 CGF.EmitBlock(ContBlock, true); 2236 } 2237 void Exit(CodeGenFunction &CGF) override { 2238 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2239 } 2240 }; 2241 } // anonymous namespace 2242 2243 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2244 StringRef CriticalName, 2245 const RegionCodeGenTy &CriticalOpGen, 2246 SourceLocation Loc, const Expr *Hint) { 2247 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2248 // CriticalOpGen(); 2249 // __kmpc_end_critical(ident_t *, gtid, Lock); 2250 // Prepare arguments and build a call to __kmpc_critical 2251 if (!CGF.HaveInsertPoint()) 2252 return; 2253 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2254 getCriticalRegionLock(CriticalName)}; 2255 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2256 std::end(Args)); 2257 if (Hint) { 2258 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2259 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2260 } 2261 CommonActionTy Action( 2262 OMPBuilder.getOrCreateRuntimeFunction( 2263 CGM.getModule(), 2264 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2265 EnterArgs, 2266 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2267 OMPRTL___kmpc_end_critical), 2268 Args); 2269 CriticalOpGen.setAction(Action); 2270 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2271 } 2272 2273 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2274 const RegionCodeGenTy &MasterOpGen, 2275 SourceLocation Loc) { 2276 if (!CGF.HaveInsertPoint()) 2277 return; 2278 // if(__kmpc_master(ident_t *, gtid)) { 2279 // MasterOpGen(); 2280 // __kmpc_end_master(ident_t *, gtid); 2281 // } 2282 // Prepare arguments and build a call to __kmpc_master 2283 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2284 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2285 CGM.getModule(), OMPRTL___kmpc_master), 2286 Args, 2287 OMPBuilder.getOrCreateRuntimeFunction( 2288 CGM.getModule(), OMPRTL___kmpc_end_master), 2289 Args, 2290 /*Conditional=*/true); 2291 MasterOpGen.setAction(Action); 2292 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2293 Action.Done(CGF); 2294 } 2295 2296 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2297 const RegionCodeGenTy &MaskedOpGen, 2298 SourceLocation Loc, const Expr *Filter) { 2299 if (!CGF.HaveInsertPoint()) 2300 return; 2301 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2302 // MaskedOpGen(); 2303 // __kmpc_end_masked(iden_t *, gtid); 2304 // } 2305 // Prepare arguments and build a call to __kmpc_masked 2306 llvm::Value *FilterVal = Filter 2307 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2308 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2309 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2310 FilterVal}; 2311 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2312 getThreadID(CGF, Loc)}; 2313 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2314 CGM.getModule(), OMPRTL___kmpc_masked), 2315 Args, 2316 OMPBuilder.getOrCreateRuntimeFunction( 2317 CGM.getModule(), OMPRTL___kmpc_end_masked), 2318 ArgsEnd, 2319 /*Conditional=*/true); 2320 MaskedOpGen.setAction(Action); 2321 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2322 Action.Done(CGF); 2323 } 2324 2325 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2326 SourceLocation Loc) { 2327 if (!CGF.HaveInsertPoint()) 2328 return; 2329 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2330 OMPBuilder.createTaskyield(CGF.Builder); 2331 } else { 2332 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2333 llvm::Value *Args[] = { 2334 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2335 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2336 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2337 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2338 Args); 2339 } 2340 2341 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2342 Region->emitUntiedSwitch(CGF); 2343 } 2344 2345 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2346 const RegionCodeGenTy &TaskgroupOpGen, 2347 SourceLocation Loc) { 2348 if (!CGF.HaveInsertPoint()) 2349 return; 2350 // __kmpc_taskgroup(ident_t *, gtid); 2351 // TaskgroupOpGen(); 2352 // __kmpc_end_taskgroup(ident_t *, gtid); 2353 // Prepare arguments and build a call to __kmpc_taskgroup 2354 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2355 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2356 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2357 Args, 2358 OMPBuilder.getOrCreateRuntimeFunction( 2359 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2360 Args); 2361 TaskgroupOpGen.setAction(Action); 2362 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2363 } 2364 2365 /// Given an array of pointers to variables, project the address of a 2366 /// given variable. 2367 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2368 unsigned Index, const VarDecl *Var) { 2369 // Pull out the pointer to the variable. 2370 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2371 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2372 2373 Address Addr = Address::deprecated(Ptr, CGF.getContext().getDeclAlign(Var)); 2374 Addr = CGF.Builder.CreateElementBitCast( 2375 Addr, CGF.ConvertTypeForMem(Var->getType())); 2376 return Addr; 2377 } 2378 2379 static llvm::Value *emitCopyprivateCopyFunction( 2380 CodeGenModule &CGM, llvm::Type *ArgsType, 2381 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2382 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2383 SourceLocation Loc) { 2384 ASTContext &C = CGM.getContext(); 2385 // void copy_func(void *LHSArg, void *RHSArg); 2386 FunctionArgList Args; 2387 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2388 ImplicitParamDecl::Other); 2389 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2390 ImplicitParamDecl::Other); 2391 Args.push_back(&LHSArg); 2392 Args.push_back(&RHSArg); 2393 const auto &CGFI = 2394 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2395 std::string Name = 2396 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2397 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2398 llvm::GlobalValue::InternalLinkage, Name, 2399 &CGM.getModule()); 2400 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2401 Fn->setDoesNotRecurse(); 2402 CodeGenFunction CGF(CGM); 2403 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2404 // Dest = (void*[n])(LHSArg); 2405 // Src = (void*[n])(RHSArg); 2406 Address LHS = Address::deprecated( 2407 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2408 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType), 2409 CGF.getPointerAlign()); 2410 Address RHS = Address::deprecated( 2411 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2412 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType), 2413 CGF.getPointerAlign()); 2414 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2415 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2416 // ... 2417 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2418 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2419 const auto *DestVar = 2420 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2421 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2422 2423 const auto *SrcVar = 2424 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2425 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2426 2427 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2428 QualType Type = VD->getType(); 2429 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2430 } 2431 CGF.FinishFunction(); 2432 return Fn; 2433 } 2434 2435 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2436 const RegionCodeGenTy &SingleOpGen, 2437 SourceLocation Loc, 2438 ArrayRef<const Expr *> CopyprivateVars, 2439 ArrayRef<const Expr *> SrcExprs, 2440 ArrayRef<const Expr *> DstExprs, 2441 ArrayRef<const Expr *> AssignmentOps) { 2442 if (!CGF.HaveInsertPoint()) 2443 return; 2444 assert(CopyprivateVars.size() == SrcExprs.size() && 2445 CopyprivateVars.size() == DstExprs.size() && 2446 CopyprivateVars.size() == AssignmentOps.size()); 2447 ASTContext &C = CGM.getContext(); 2448 // int32 did_it = 0; 2449 // if(__kmpc_single(ident_t *, gtid)) { 2450 // SingleOpGen(); 2451 // __kmpc_end_single(ident_t *, gtid); 2452 // did_it = 1; 2453 // } 2454 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2455 // <copy_func>, did_it); 2456 2457 Address DidIt = Address::invalid(); 2458 if (!CopyprivateVars.empty()) { 2459 // int32 did_it = 0; 2460 QualType KmpInt32Ty = 2461 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2462 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2463 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2464 } 2465 // Prepare arguments and build a call to __kmpc_single 2466 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2467 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2468 CGM.getModule(), OMPRTL___kmpc_single), 2469 Args, 2470 OMPBuilder.getOrCreateRuntimeFunction( 2471 CGM.getModule(), OMPRTL___kmpc_end_single), 2472 Args, 2473 /*Conditional=*/true); 2474 SingleOpGen.setAction(Action); 2475 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2476 if (DidIt.isValid()) { 2477 // did_it = 1; 2478 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2479 } 2480 Action.Done(CGF); 2481 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2482 // <copy_func>, did_it); 2483 if (DidIt.isValid()) { 2484 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2485 QualType CopyprivateArrayTy = C.getConstantArrayType( 2486 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2487 /*IndexTypeQuals=*/0); 2488 // Create a list of all private variables for copyprivate. 2489 Address CopyprivateList = 2490 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2491 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2492 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2493 CGF.Builder.CreateStore( 2494 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2495 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2496 CGF.VoidPtrTy), 2497 Elem); 2498 } 2499 // Build function that copies private values from single region to all other 2500 // threads in the corresponding parallel region. 2501 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2502 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2503 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2504 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2505 Address CL = 2506 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2507 CGF.VoidPtrTy); 2508 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2509 llvm::Value *Args[] = { 2510 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2511 getThreadID(CGF, Loc), // i32 <gtid> 2512 BufSize, // size_t <buf_size> 2513 CL.getPointer(), // void *<copyprivate list> 2514 CpyFn, // void (*) (void *, void *) <copy_func> 2515 DidItVal // i32 did_it 2516 }; 2517 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2518 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2519 Args); 2520 } 2521 } 2522 2523 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2524 const RegionCodeGenTy &OrderedOpGen, 2525 SourceLocation Loc, bool IsThreads) { 2526 if (!CGF.HaveInsertPoint()) 2527 return; 2528 // __kmpc_ordered(ident_t *, gtid); 2529 // OrderedOpGen(); 2530 // __kmpc_end_ordered(ident_t *, gtid); 2531 // Prepare arguments and build a call to __kmpc_ordered 2532 if (IsThreads) { 2533 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2534 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2535 CGM.getModule(), OMPRTL___kmpc_ordered), 2536 Args, 2537 OMPBuilder.getOrCreateRuntimeFunction( 2538 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2539 Args); 2540 OrderedOpGen.setAction(Action); 2541 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2542 return; 2543 } 2544 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2545 } 2546 2547 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2548 unsigned Flags; 2549 if (Kind == OMPD_for) 2550 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2551 else if (Kind == OMPD_sections) 2552 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2553 else if (Kind == OMPD_single) 2554 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2555 else if (Kind == OMPD_barrier) 2556 Flags = OMP_IDENT_BARRIER_EXPL; 2557 else 2558 Flags = OMP_IDENT_BARRIER_IMPL; 2559 return Flags; 2560 } 2561 2562 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2563 CodeGenFunction &CGF, const OMPLoopDirective &S, 2564 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2565 // Check if the loop directive is actually a doacross loop directive. In this 2566 // case choose static, 1 schedule. 2567 if (llvm::any_of( 2568 S.getClausesOfKind<OMPOrderedClause>(), 2569 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2570 ScheduleKind = OMPC_SCHEDULE_static; 2571 // Chunk size is 1 in this case. 2572 llvm::APInt ChunkSize(32, 1); 2573 ChunkExpr = IntegerLiteral::Create( 2574 CGF.getContext(), ChunkSize, 2575 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2576 SourceLocation()); 2577 } 2578 } 2579 2580 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2581 OpenMPDirectiveKind Kind, bool EmitChecks, 2582 bool ForceSimpleCall) { 2583 // Check if we should use the OMPBuilder 2584 auto *OMPRegionInfo = 2585 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2586 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2587 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2588 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2589 return; 2590 } 2591 2592 if (!CGF.HaveInsertPoint()) 2593 return; 2594 // Build call __kmpc_cancel_barrier(loc, thread_id); 2595 // Build call __kmpc_barrier(loc, thread_id); 2596 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2597 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2598 // thread_id); 2599 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2600 getThreadID(CGF, Loc)}; 2601 if (OMPRegionInfo) { 2602 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2603 llvm::Value *Result = CGF.EmitRuntimeCall( 2604 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2605 OMPRTL___kmpc_cancel_barrier), 2606 Args); 2607 if (EmitChecks) { 2608 // if (__kmpc_cancel_barrier()) { 2609 // exit from construct; 2610 // } 2611 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2612 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2613 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2614 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2615 CGF.EmitBlock(ExitBB); 2616 // exit from construct; 2617 CodeGenFunction::JumpDest CancelDestination = 2618 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2619 CGF.EmitBranchThroughCleanup(CancelDestination); 2620 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2621 } 2622 return; 2623 } 2624 } 2625 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2626 CGM.getModule(), OMPRTL___kmpc_barrier), 2627 Args); 2628 } 2629 2630 /// Map the OpenMP loop schedule to the runtime enumeration. 2631 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2632 bool Chunked, bool Ordered) { 2633 switch (ScheduleKind) { 2634 case OMPC_SCHEDULE_static: 2635 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2636 : (Ordered ? OMP_ord_static : OMP_sch_static); 2637 case OMPC_SCHEDULE_dynamic: 2638 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2639 case OMPC_SCHEDULE_guided: 2640 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2641 case OMPC_SCHEDULE_runtime: 2642 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2643 case OMPC_SCHEDULE_auto: 2644 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2645 case OMPC_SCHEDULE_unknown: 2646 assert(!Chunked && "chunk was specified but schedule kind not known"); 2647 return Ordered ? OMP_ord_static : OMP_sch_static; 2648 } 2649 llvm_unreachable("Unexpected runtime schedule"); 2650 } 2651 2652 /// Map the OpenMP distribute schedule to the runtime enumeration. 2653 static OpenMPSchedType 2654 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2655 // only static is allowed for dist_schedule 2656 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2657 } 2658 2659 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2660 bool Chunked) const { 2661 OpenMPSchedType Schedule = 2662 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2663 return Schedule == OMP_sch_static; 2664 } 2665 2666 bool CGOpenMPRuntime::isStaticNonchunked( 2667 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2668 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2669 return Schedule == OMP_dist_sch_static; 2670 } 2671 2672 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2673 bool Chunked) const { 2674 OpenMPSchedType Schedule = 2675 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2676 return Schedule == OMP_sch_static_chunked; 2677 } 2678 2679 bool CGOpenMPRuntime::isStaticChunked( 2680 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2681 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2682 return Schedule == OMP_dist_sch_static_chunked; 2683 } 2684 2685 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2686 OpenMPSchedType Schedule = 2687 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2688 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2689 return Schedule != OMP_sch_static; 2690 } 2691 2692 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2693 OpenMPScheduleClauseModifier M1, 2694 OpenMPScheduleClauseModifier M2) { 2695 int Modifier = 0; 2696 switch (M1) { 2697 case OMPC_SCHEDULE_MODIFIER_monotonic: 2698 Modifier = OMP_sch_modifier_monotonic; 2699 break; 2700 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2701 Modifier = OMP_sch_modifier_nonmonotonic; 2702 break; 2703 case OMPC_SCHEDULE_MODIFIER_simd: 2704 if (Schedule == OMP_sch_static_chunked) 2705 Schedule = OMP_sch_static_balanced_chunked; 2706 break; 2707 case OMPC_SCHEDULE_MODIFIER_last: 2708 case OMPC_SCHEDULE_MODIFIER_unknown: 2709 break; 2710 } 2711 switch (M2) { 2712 case OMPC_SCHEDULE_MODIFIER_monotonic: 2713 Modifier = OMP_sch_modifier_monotonic; 2714 break; 2715 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2716 Modifier = OMP_sch_modifier_nonmonotonic; 2717 break; 2718 case OMPC_SCHEDULE_MODIFIER_simd: 2719 if (Schedule == OMP_sch_static_chunked) 2720 Schedule = OMP_sch_static_balanced_chunked; 2721 break; 2722 case OMPC_SCHEDULE_MODIFIER_last: 2723 case OMPC_SCHEDULE_MODIFIER_unknown: 2724 break; 2725 } 2726 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2727 // If the static schedule kind is specified or if the ordered clause is 2728 // specified, and if the nonmonotonic modifier is not specified, the effect is 2729 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2730 // modifier is specified, the effect is as if the nonmonotonic modifier is 2731 // specified. 2732 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2733 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2734 Schedule == OMP_sch_static_balanced_chunked || 2735 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2736 Schedule == OMP_dist_sch_static_chunked || 2737 Schedule == OMP_dist_sch_static)) 2738 Modifier = OMP_sch_modifier_nonmonotonic; 2739 } 2740 return Schedule | Modifier; 2741 } 2742 2743 void CGOpenMPRuntime::emitForDispatchInit( 2744 CodeGenFunction &CGF, SourceLocation Loc, 2745 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2746 bool Ordered, const DispatchRTInput &DispatchValues) { 2747 if (!CGF.HaveInsertPoint()) 2748 return; 2749 OpenMPSchedType Schedule = getRuntimeSchedule( 2750 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2751 assert(Ordered || 2752 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2753 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2754 Schedule != OMP_sch_static_balanced_chunked)); 2755 // Call __kmpc_dispatch_init( 2756 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2757 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2758 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2759 2760 // If the Chunk was not specified in the clause - use default value 1. 2761 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2762 : CGF.Builder.getIntN(IVSize, 1); 2763 llvm::Value *Args[] = { 2764 emitUpdateLocation(CGF, Loc), 2765 getThreadID(CGF, Loc), 2766 CGF.Builder.getInt32(addMonoNonMonoModifier( 2767 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2768 DispatchValues.LB, // Lower 2769 DispatchValues.UB, // Upper 2770 CGF.Builder.getIntN(IVSize, 1), // Stride 2771 Chunk // Chunk 2772 }; 2773 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2774 } 2775 2776 static void emitForStaticInitCall( 2777 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2778 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2779 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2780 const CGOpenMPRuntime::StaticRTInput &Values) { 2781 if (!CGF.HaveInsertPoint()) 2782 return; 2783 2784 assert(!Values.Ordered); 2785 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2786 Schedule == OMP_sch_static_balanced_chunked || 2787 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2788 Schedule == OMP_dist_sch_static || 2789 Schedule == OMP_dist_sch_static_chunked); 2790 2791 // Call __kmpc_for_static_init( 2792 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2793 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2794 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2795 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2796 llvm::Value *Chunk = Values.Chunk; 2797 if (Chunk == nullptr) { 2798 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2799 Schedule == OMP_dist_sch_static) && 2800 "expected static non-chunked schedule"); 2801 // If the Chunk was not specified in the clause - use default value 1. 2802 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2803 } else { 2804 assert((Schedule == OMP_sch_static_chunked || 2805 Schedule == OMP_sch_static_balanced_chunked || 2806 Schedule == OMP_ord_static_chunked || 2807 Schedule == OMP_dist_sch_static_chunked) && 2808 "expected static chunked schedule"); 2809 } 2810 llvm::Value *Args[] = { 2811 UpdateLocation, 2812 ThreadId, 2813 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2814 M2)), // Schedule type 2815 Values.IL.getPointer(), // &isLastIter 2816 Values.LB.getPointer(), // &LB 2817 Values.UB.getPointer(), // &UB 2818 Values.ST.getPointer(), // &Stride 2819 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2820 Chunk // Chunk 2821 }; 2822 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2823 } 2824 2825 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2826 SourceLocation Loc, 2827 OpenMPDirectiveKind DKind, 2828 const OpenMPScheduleTy &ScheduleKind, 2829 const StaticRTInput &Values) { 2830 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2831 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2832 assert(isOpenMPWorksharingDirective(DKind) && 2833 "Expected loop-based or sections-based directive."); 2834 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2835 isOpenMPLoopDirective(DKind) 2836 ? OMP_IDENT_WORK_LOOP 2837 : OMP_IDENT_WORK_SECTIONS); 2838 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2839 llvm::FunctionCallee StaticInitFunction = 2840 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2841 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2842 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2843 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2844 } 2845 2846 void CGOpenMPRuntime::emitDistributeStaticInit( 2847 CodeGenFunction &CGF, SourceLocation Loc, 2848 OpenMPDistScheduleClauseKind SchedKind, 2849 const CGOpenMPRuntime::StaticRTInput &Values) { 2850 OpenMPSchedType ScheduleNum = 2851 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2852 llvm::Value *UpdatedLocation = 2853 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2854 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2855 llvm::FunctionCallee StaticInitFunction; 2856 bool isGPUDistribute = 2857 CGM.getLangOpts().OpenMPIsDevice && 2858 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2859 StaticInitFunction = createForStaticInitFunction( 2860 Values.IVSize, Values.IVSigned, isGPUDistribute); 2861 2862 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2863 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2864 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2865 } 2866 2867 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2868 SourceLocation Loc, 2869 OpenMPDirectiveKind DKind) { 2870 if (!CGF.HaveInsertPoint()) 2871 return; 2872 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2873 llvm::Value *Args[] = { 2874 emitUpdateLocation(CGF, Loc, 2875 isOpenMPDistributeDirective(DKind) 2876 ? OMP_IDENT_WORK_DISTRIBUTE 2877 : isOpenMPLoopDirective(DKind) 2878 ? OMP_IDENT_WORK_LOOP 2879 : OMP_IDENT_WORK_SECTIONS), 2880 getThreadID(CGF, Loc)}; 2881 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2882 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2883 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2884 CGF.EmitRuntimeCall( 2885 OMPBuilder.getOrCreateRuntimeFunction( 2886 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2887 Args); 2888 else 2889 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2890 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2891 Args); 2892 } 2893 2894 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2895 SourceLocation Loc, 2896 unsigned IVSize, 2897 bool IVSigned) { 2898 if (!CGF.HaveInsertPoint()) 2899 return; 2900 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2901 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2902 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2903 } 2904 2905 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2906 SourceLocation Loc, unsigned IVSize, 2907 bool IVSigned, Address IL, 2908 Address LB, Address UB, 2909 Address ST) { 2910 // Call __kmpc_dispatch_next( 2911 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2912 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2913 // kmp_int[32|64] *p_stride); 2914 llvm::Value *Args[] = { 2915 emitUpdateLocation(CGF, Loc), 2916 getThreadID(CGF, Loc), 2917 IL.getPointer(), // &isLastIter 2918 LB.getPointer(), // &Lower 2919 UB.getPointer(), // &Upper 2920 ST.getPointer() // &Stride 2921 }; 2922 llvm::Value *Call = 2923 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2924 return CGF.EmitScalarConversion( 2925 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2926 CGF.getContext().BoolTy, Loc); 2927 } 2928 2929 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2930 llvm::Value *NumThreads, 2931 SourceLocation Loc) { 2932 if (!CGF.HaveInsertPoint()) 2933 return; 2934 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2935 llvm::Value *Args[] = { 2936 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2937 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2938 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2939 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2940 Args); 2941 } 2942 2943 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2944 ProcBindKind ProcBind, 2945 SourceLocation Loc) { 2946 if (!CGF.HaveInsertPoint()) 2947 return; 2948 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2949 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2950 llvm::Value *Args[] = { 2951 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2952 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2953 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2954 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2955 Args); 2956 } 2957 2958 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2959 SourceLocation Loc, llvm::AtomicOrdering AO) { 2960 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2961 OMPBuilder.createFlush(CGF.Builder); 2962 } else { 2963 if (!CGF.HaveInsertPoint()) 2964 return; 2965 // Build call void __kmpc_flush(ident_t *loc) 2966 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2967 CGM.getModule(), OMPRTL___kmpc_flush), 2968 emitUpdateLocation(CGF, Loc)); 2969 } 2970 } 2971 2972 namespace { 2973 /// Indexes of fields for type kmp_task_t. 2974 enum KmpTaskTFields { 2975 /// List of shared variables. 2976 KmpTaskTShareds, 2977 /// Task routine. 2978 KmpTaskTRoutine, 2979 /// Partition id for the untied tasks. 2980 KmpTaskTPartId, 2981 /// Function with call of destructors for private variables. 2982 Data1, 2983 /// Task priority. 2984 Data2, 2985 /// (Taskloops only) Lower bound. 2986 KmpTaskTLowerBound, 2987 /// (Taskloops only) Upper bound. 2988 KmpTaskTUpperBound, 2989 /// (Taskloops only) Stride. 2990 KmpTaskTStride, 2991 /// (Taskloops only) Is last iteration flag. 2992 KmpTaskTLastIter, 2993 /// (Taskloops only) Reduction data. 2994 KmpTaskTReductions, 2995 }; 2996 } // anonymous namespace 2997 2998 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2999 return OffloadEntriesTargetRegion.empty() && 3000 OffloadEntriesDeviceGlobalVar.empty(); 3001 } 3002 3003 /// Initialize target region entry. 3004 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3005 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3006 StringRef ParentName, unsigned LineNum, 3007 unsigned Order) { 3008 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3009 "only required for the device " 3010 "code generation."); 3011 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3012 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3013 OMPTargetRegionEntryTargetRegion); 3014 ++OffloadingEntriesNum; 3015 } 3016 3017 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3018 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3019 StringRef ParentName, unsigned LineNum, 3020 llvm::Constant *Addr, llvm::Constant *ID, 3021 OMPTargetRegionEntryKind Flags) { 3022 // If we are emitting code for a target, the entry is already initialized, 3023 // only has to be registered. 3024 if (CGM.getLangOpts().OpenMPIsDevice) { 3025 // This could happen if the device compilation is invoked standalone. 3026 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3027 return; 3028 auto &Entry = 3029 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3030 Entry.setAddress(Addr); 3031 Entry.setID(ID); 3032 Entry.setFlags(Flags); 3033 } else { 3034 if (Flags == 3035 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3036 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3037 /*IgnoreAddressId*/ true)) 3038 return; 3039 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3040 "Target region entry already registered!"); 3041 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3042 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3043 ++OffloadingEntriesNum; 3044 } 3045 } 3046 3047 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3048 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3049 bool IgnoreAddressId) const { 3050 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3051 if (PerDevice == OffloadEntriesTargetRegion.end()) 3052 return false; 3053 auto PerFile = PerDevice->second.find(FileID); 3054 if (PerFile == PerDevice->second.end()) 3055 return false; 3056 auto PerParentName = PerFile->second.find(ParentName); 3057 if (PerParentName == PerFile->second.end()) 3058 return false; 3059 auto PerLine = PerParentName->second.find(LineNum); 3060 if (PerLine == PerParentName->second.end()) 3061 return false; 3062 // Fail if this entry is already registered. 3063 if (!IgnoreAddressId && 3064 (PerLine->second.getAddress() || PerLine->second.getID())) 3065 return false; 3066 return true; 3067 } 3068 3069 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3070 const OffloadTargetRegionEntryInfoActTy &Action) { 3071 // Scan all target region entries and perform the provided action. 3072 for (const auto &D : OffloadEntriesTargetRegion) 3073 for (const auto &F : D.second) 3074 for (const auto &P : F.second) 3075 for (const auto &L : P.second) 3076 Action(D.first, F.first, P.first(), L.first, L.second); 3077 } 3078 3079 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3080 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3081 OMPTargetGlobalVarEntryKind Flags, 3082 unsigned Order) { 3083 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3084 "only required for the device " 3085 "code generation."); 3086 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3087 ++OffloadingEntriesNum; 3088 } 3089 3090 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3091 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3092 CharUnits VarSize, 3093 OMPTargetGlobalVarEntryKind Flags, 3094 llvm::GlobalValue::LinkageTypes Linkage) { 3095 if (CGM.getLangOpts().OpenMPIsDevice) { 3096 // This could happen if the device compilation is invoked standalone. 3097 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3098 return; 3099 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3100 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3101 if (Entry.getVarSize().isZero()) { 3102 Entry.setVarSize(VarSize); 3103 Entry.setLinkage(Linkage); 3104 } 3105 return; 3106 } 3107 Entry.setVarSize(VarSize); 3108 Entry.setLinkage(Linkage); 3109 Entry.setAddress(Addr); 3110 } else { 3111 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3112 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3113 assert(Entry.isValid() && Entry.getFlags() == Flags && 3114 "Entry not initialized!"); 3115 if (Entry.getVarSize().isZero()) { 3116 Entry.setVarSize(VarSize); 3117 Entry.setLinkage(Linkage); 3118 } 3119 return; 3120 } 3121 OffloadEntriesDeviceGlobalVar.try_emplace( 3122 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3123 ++OffloadingEntriesNum; 3124 } 3125 } 3126 3127 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3128 actOnDeviceGlobalVarEntriesInfo( 3129 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3130 // Scan all target region entries and perform the provided action. 3131 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3132 Action(E.getKey(), E.getValue()); 3133 } 3134 3135 void CGOpenMPRuntime::createOffloadEntry( 3136 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3137 llvm::GlobalValue::LinkageTypes Linkage) { 3138 StringRef Name = Addr->getName(); 3139 llvm::Module &M = CGM.getModule(); 3140 llvm::LLVMContext &C = M.getContext(); 3141 3142 // Create constant string with the name. 3143 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3144 3145 std::string StringName = getName({"omp_offloading", "entry_name"}); 3146 auto *Str = new llvm::GlobalVariable( 3147 M, StrPtrInit->getType(), /*isConstant=*/true, 3148 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3149 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3150 3151 llvm::Constant *Data[] = { 3152 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3153 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3154 llvm::ConstantInt::get(CGM.SizeTy, Size), 3155 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3156 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3157 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3158 llvm::GlobalVariable *Entry = createGlobalStruct( 3159 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3160 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3161 3162 // The entry has to be created in the section the linker expects it to be. 3163 Entry->setSection("omp_offloading_entries"); 3164 } 3165 3166 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3167 // Emit the offloading entries and metadata so that the device codegen side 3168 // can easily figure out what to emit. The produced metadata looks like 3169 // this: 3170 // 3171 // !omp_offload.info = !{!1, ...} 3172 // 3173 // Right now we only generate metadata for function that contain target 3174 // regions. 3175 3176 // If we are in simd mode or there are no entries, we don't need to do 3177 // anything. 3178 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3179 return; 3180 3181 llvm::Module &M = CGM.getModule(); 3182 llvm::LLVMContext &C = M.getContext(); 3183 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3184 SourceLocation, StringRef>, 3185 16> 3186 OrderedEntries(OffloadEntriesInfoManager.size()); 3187 llvm::SmallVector<StringRef, 16> ParentFunctions( 3188 OffloadEntriesInfoManager.size()); 3189 3190 // Auxiliary methods to create metadata values and strings. 3191 auto &&GetMDInt = [this](unsigned V) { 3192 return llvm::ConstantAsMetadata::get( 3193 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3194 }; 3195 3196 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3197 3198 // Create the offloading info metadata node. 3199 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3200 3201 // Create function that emits metadata for each target region entry; 3202 auto &&TargetRegionMetadataEmitter = 3203 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3204 &GetMDString]( 3205 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3206 unsigned Line, 3207 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3208 // Generate metadata for target regions. Each entry of this metadata 3209 // contains: 3210 // - Entry 0 -> Kind of this type of metadata (0). 3211 // - Entry 1 -> Device ID of the file where the entry was identified. 3212 // - Entry 2 -> File ID of the file where the entry was identified. 3213 // - Entry 3 -> Mangled name of the function where the entry was 3214 // identified. 3215 // - Entry 4 -> Line in the file where the entry was identified. 3216 // - Entry 5 -> Order the entry was created. 3217 // The first element of the metadata node is the kind. 3218 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3219 GetMDInt(FileID), GetMDString(ParentName), 3220 GetMDInt(Line), GetMDInt(E.getOrder())}; 3221 3222 SourceLocation Loc; 3223 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3224 E = CGM.getContext().getSourceManager().fileinfo_end(); 3225 I != E; ++I) { 3226 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3227 I->getFirst()->getUniqueID().getFile() == FileID) { 3228 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3229 I->getFirst(), Line, 1); 3230 break; 3231 } 3232 } 3233 // Save this entry in the right position of the ordered entries array. 3234 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3235 ParentFunctions[E.getOrder()] = ParentName; 3236 3237 // Add metadata to the named metadata node. 3238 MD->addOperand(llvm::MDNode::get(C, Ops)); 3239 }; 3240 3241 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3242 TargetRegionMetadataEmitter); 3243 3244 // Create function that emits metadata for each device global variable entry; 3245 auto &&DeviceGlobalVarMetadataEmitter = 3246 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3247 MD](StringRef MangledName, 3248 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3249 &E) { 3250 // Generate metadata for global variables. Each entry of this metadata 3251 // contains: 3252 // - Entry 0 -> Kind of this type of metadata (1). 3253 // - Entry 1 -> Mangled name of the variable. 3254 // - Entry 2 -> Declare target kind. 3255 // - Entry 3 -> Order the entry was created. 3256 // The first element of the metadata node is the kind. 3257 llvm::Metadata *Ops[] = { 3258 GetMDInt(E.getKind()), GetMDString(MangledName), 3259 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3260 3261 // Save this entry in the right position of the ordered entries array. 3262 OrderedEntries[E.getOrder()] = 3263 std::make_tuple(&E, SourceLocation(), MangledName); 3264 3265 // Add metadata to the named metadata node. 3266 MD->addOperand(llvm::MDNode::get(C, Ops)); 3267 }; 3268 3269 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3270 DeviceGlobalVarMetadataEmitter); 3271 3272 for (const auto &E : OrderedEntries) { 3273 assert(std::get<0>(E) && "All ordered entries must exist!"); 3274 if (const auto *CE = 3275 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3276 std::get<0>(E))) { 3277 if (!CE->getID() || !CE->getAddress()) { 3278 // Do not blame the entry if the parent funtion is not emitted. 3279 StringRef FnName = ParentFunctions[CE->getOrder()]; 3280 if (!CGM.GetGlobalValue(FnName)) 3281 continue; 3282 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3283 DiagnosticsEngine::Error, 3284 "Offloading entry for target region in %0 is incorrect: either the " 3285 "address or the ID is invalid."); 3286 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3287 continue; 3288 } 3289 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3290 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3291 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3292 OffloadEntryInfoDeviceGlobalVar>( 3293 std::get<0>(E))) { 3294 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3295 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3296 CE->getFlags()); 3297 switch (Flags) { 3298 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3299 if (CGM.getLangOpts().OpenMPIsDevice && 3300 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3301 continue; 3302 if (!CE->getAddress()) { 3303 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3304 DiagnosticsEngine::Error, "Offloading entry for declare target " 3305 "variable %0 is incorrect: the " 3306 "address is invalid."); 3307 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3308 continue; 3309 } 3310 // The vaiable has no definition - no need to add the entry. 3311 if (CE->getVarSize().isZero()) 3312 continue; 3313 break; 3314 } 3315 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3316 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3317 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3318 "Declaret target link address is set."); 3319 if (CGM.getLangOpts().OpenMPIsDevice) 3320 continue; 3321 if (!CE->getAddress()) { 3322 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3323 DiagnosticsEngine::Error, 3324 "Offloading entry for declare target variable is incorrect: the " 3325 "address is invalid."); 3326 CGM.getDiags().Report(DiagID); 3327 continue; 3328 } 3329 break; 3330 } 3331 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3332 CE->getVarSize().getQuantity(), Flags, 3333 CE->getLinkage()); 3334 } else { 3335 llvm_unreachable("Unsupported entry kind."); 3336 } 3337 } 3338 } 3339 3340 /// Loads all the offload entries information from the host IR 3341 /// metadata. 3342 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3343 // If we are in target mode, load the metadata from the host IR. This code has 3344 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3345 3346 if (!CGM.getLangOpts().OpenMPIsDevice) 3347 return; 3348 3349 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3350 return; 3351 3352 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3353 if (auto EC = Buf.getError()) { 3354 CGM.getDiags().Report(diag::err_cannot_open_file) 3355 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3356 return; 3357 } 3358 3359 llvm::LLVMContext C; 3360 auto ME = expectedToErrorOrAndEmitErrors( 3361 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3362 3363 if (auto EC = ME.getError()) { 3364 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3365 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3366 CGM.getDiags().Report(DiagID) 3367 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3368 return; 3369 } 3370 3371 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3372 if (!MD) 3373 return; 3374 3375 for (llvm::MDNode *MN : MD->operands()) { 3376 auto &&GetMDInt = [MN](unsigned Idx) { 3377 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3378 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3379 }; 3380 3381 auto &&GetMDString = [MN](unsigned Idx) { 3382 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3383 return V->getString(); 3384 }; 3385 3386 switch (GetMDInt(0)) { 3387 default: 3388 llvm_unreachable("Unexpected metadata!"); 3389 break; 3390 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3391 OffloadingEntryInfoTargetRegion: 3392 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3393 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3394 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3395 /*Order=*/GetMDInt(5)); 3396 break; 3397 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3398 OffloadingEntryInfoDeviceGlobalVar: 3399 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3400 /*MangledName=*/GetMDString(1), 3401 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3402 /*Flags=*/GetMDInt(2)), 3403 /*Order=*/GetMDInt(3)); 3404 break; 3405 } 3406 } 3407 } 3408 3409 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3410 if (!KmpRoutineEntryPtrTy) { 3411 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3412 ASTContext &C = CGM.getContext(); 3413 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3414 FunctionProtoType::ExtProtoInfo EPI; 3415 KmpRoutineEntryPtrQTy = C.getPointerType( 3416 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3417 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3418 } 3419 } 3420 3421 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3422 // Make sure the type of the entry is already created. This is the type we 3423 // have to create: 3424 // struct __tgt_offload_entry{ 3425 // void *addr; // Pointer to the offload entry info. 3426 // // (function or global) 3427 // char *name; // Name of the function or global. 3428 // size_t size; // Size of the entry info (0 if it a function). 3429 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3430 // int32_t reserved; // Reserved, to use by the runtime library. 3431 // }; 3432 if (TgtOffloadEntryQTy.isNull()) { 3433 ASTContext &C = CGM.getContext(); 3434 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3435 RD->startDefinition(); 3436 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3437 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3438 addFieldToRecordDecl(C, RD, C.getSizeType()); 3439 addFieldToRecordDecl( 3440 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3441 addFieldToRecordDecl( 3442 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3443 RD->completeDefinition(); 3444 RD->addAttr(PackedAttr::CreateImplicit(C)); 3445 TgtOffloadEntryQTy = C.getRecordType(RD); 3446 } 3447 return TgtOffloadEntryQTy; 3448 } 3449 3450 namespace { 3451 struct PrivateHelpersTy { 3452 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3453 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3454 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3455 PrivateElemInit(PrivateElemInit) {} 3456 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3457 const Expr *OriginalRef = nullptr; 3458 const VarDecl *Original = nullptr; 3459 const VarDecl *PrivateCopy = nullptr; 3460 const VarDecl *PrivateElemInit = nullptr; 3461 bool isLocalPrivate() const { 3462 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3463 } 3464 }; 3465 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3466 } // anonymous namespace 3467 3468 static bool isAllocatableDecl(const VarDecl *VD) { 3469 const VarDecl *CVD = VD->getCanonicalDecl(); 3470 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3471 return false; 3472 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3473 // Use the default allocation. 3474 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 3475 !AA->getAllocator()); 3476 } 3477 3478 static RecordDecl * 3479 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3480 if (!Privates.empty()) { 3481 ASTContext &C = CGM.getContext(); 3482 // Build struct .kmp_privates_t. { 3483 // /* private vars */ 3484 // }; 3485 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3486 RD->startDefinition(); 3487 for (const auto &Pair : Privates) { 3488 const VarDecl *VD = Pair.second.Original; 3489 QualType Type = VD->getType().getNonReferenceType(); 3490 // If the private variable is a local variable with lvalue ref type, 3491 // allocate the pointer instead of the pointee type. 3492 if (Pair.second.isLocalPrivate()) { 3493 if (VD->getType()->isLValueReferenceType()) 3494 Type = C.getPointerType(Type); 3495 if (isAllocatableDecl(VD)) 3496 Type = C.getPointerType(Type); 3497 } 3498 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3499 if (VD->hasAttrs()) { 3500 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3501 E(VD->getAttrs().end()); 3502 I != E; ++I) 3503 FD->addAttr(*I); 3504 } 3505 } 3506 RD->completeDefinition(); 3507 return RD; 3508 } 3509 return nullptr; 3510 } 3511 3512 static RecordDecl * 3513 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3514 QualType KmpInt32Ty, 3515 QualType KmpRoutineEntryPointerQTy) { 3516 ASTContext &C = CGM.getContext(); 3517 // Build struct kmp_task_t { 3518 // void * shareds; 3519 // kmp_routine_entry_t routine; 3520 // kmp_int32 part_id; 3521 // kmp_cmplrdata_t data1; 3522 // kmp_cmplrdata_t data2; 3523 // For taskloops additional fields: 3524 // kmp_uint64 lb; 3525 // kmp_uint64 ub; 3526 // kmp_int64 st; 3527 // kmp_int32 liter; 3528 // void * reductions; 3529 // }; 3530 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3531 UD->startDefinition(); 3532 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3533 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3534 UD->completeDefinition(); 3535 QualType KmpCmplrdataTy = C.getRecordType(UD); 3536 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3537 RD->startDefinition(); 3538 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3539 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3540 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3541 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3542 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3543 if (isOpenMPTaskLoopDirective(Kind)) { 3544 QualType KmpUInt64Ty = 3545 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3546 QualType KmpInt64Ty = 3547 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3548 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3549 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3550 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3551 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3552 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3553 } 3554 RD->completeDefinition(); 3555 return RD; 3556 } 3557 3558 static RecordDecl * 3559 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3560 ArrayRef<PrivateDataTy> Privates) { 3561 ASTContext &C = CGM.getContext(); 3562 // Build struct kmp_task_t_with_privates { 3563 // kmp_task_t task_data; 3564 // .kmp_privates_t. privates; 3565 // }; 3566 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3567 RD->startDefinition(); 3568 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3569 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3570 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3571 RD->completeDefinition(); 3572 return RD; 3573 } 3574 3575 /// Emit a proxy function which accepts kmp_task_t as the second 3576 /// argument. 3577 /// \code 3578 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3579 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3580 /// For taskloops: 3581 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3582 /// tt->reductions, tt->shareds); 3583 /// return 0; 3584 /// } 3585 /// \endcode 3586 static llvm::Function * 3587 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3588 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3589 QualType KmpTaskTWithPrivatesPtrQTy, 3590 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3591 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3592 llvm::Value *TaskPrivatesMap) { 3593 ASTContext &C = CGM.getContext(); 3594 FunctionArgList Args; 3595 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3596 ImplicitParamDecl::Other); 3597 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3598 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3599 ImplicitParamDecl::Other); 3600 Args.push_back(&GtidArg); 3601 Args.push_back(&TaskTypeArg); 3602 const auto &TaskEntryFnInfo = 3603 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3604 llvm::FunctionType *TaskEntryTy = 3605 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3606 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3607 auto *TaskEntry = llvm::Function::Create( 3608 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3609 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3610 TaskEntry->setDoesNotRecurse(); 3611 CodeGenFunction CGF(CGM); 3612 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3613 Loc, Loc); 3614 3615 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3616 // tt, 3617 // For taskloops: 3618 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3619 // tt->task_data.shareds); 3620 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3621 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3622 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3623 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3624 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3625 const auto *KmpTaskTWithPrivatesQTyRD = 3626 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3627 LValue Base = 3628 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3629 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3630 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3631 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3632 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3633 3634 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3635 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3636 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3637 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3638 CGF.ConvertTypeForMem(SharedsPtrTy)); 3639 3640 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3641 llvm::Value *PrivatesParam; 3642 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3643 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3644 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3645 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3646 } else { 3647 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3648 } 3649 3650 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3651 TaskPrivatesMap, 3652 CGF.Builder 3653 .CreatePointerBitCastOrAddrSpaceCast( 3654 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3655 .getPointer()}; 3656 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3657 std::end(CommonArgs)); 3658 if (isOpenMPTaskLoopDirective(Kind)) { 3659 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3660 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3661 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3662 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3663 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3664 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3665 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3666 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3667 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3668 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3669 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3670 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3671 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3672 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3673 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3674 CallArgs.push_back(LBParam); 3675 CallArgs.push_back(UBParam); 3676 CallArgs.push_back(StParam); 3677 CallArgs.push_back(LIParam); 3678 CallArgs.push_back(RParam); 3679 } 3680 CallArgs.push_back(SharedsParam); 3681 3682 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3683 CallArgs); 3684 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3685 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3686 CGF.FinishFunction(); 3687 return TaskEntry; 3688 } 3689 3690 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3691 SourceLocation Loc, 3692 QualType KmpInt32Ty, 3693 QualType KmpTaskTWithPrivatesPtrQTy, 3694 QualType KmpTaskTWithPrivatesQTy) { 3695 ASTContext &C = CGM.getContext(); 3696 FunctionArgList Args; 3697 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3698 ImplicitParamDecl::Other); 3699 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3700 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3701 ImplicitParamDecl::Other); 3702 Args.push_back(&GtidArg); 3703 Args.push_back(&TaskTypeArg); 3704 const auto &DestructorFnInfo = 3705 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3706 llvm::FunctionType *DestructorFnTy = 3707 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3708 std::string Name = 3709 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3710 auto *DestructorFn = 3711 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3712 Name, &CGM.getModule()); 3713 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3714 DestructorFnInfo); 3715 DestructorFn->setDoesNotRecurse(); 3716 CodeGenFunction CGF(CGM); 3717 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3718 Args, Loc, Loc); 3719 3720 LValue Base = CGF.EmitLoadOfPointerLValue( 3721 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3722 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3723 const auto *KmpTaskTWithPrivatesQTyRD = 3724 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3725 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3726 Base = CGF.EmitLValueForField(Base, *FI); 3727 for (const auto *Field : 3728 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3729 if (QualType::DestructionKind DtorKind = 3730 Field->getType().isDestructedType()) { 3731 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3732 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3733 } 3734 } 3735 CGF.FinishFunction(); 3736 return DestructorFn; 3737 } 3738 3739 /// Emit a privates mapping function for correct handling of private and 3740 /// firstprivate variables. 3741 /// \code 3742 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3743 /// **noalias priv1,..., <tyn> **noalias privn) { 3744 /// *priv1 = &.privates.priv1; 3745 /// ...; 3746 /// *privn = &.privates.privn; 3747 /// } 3748 /// \endcode 3749 static llvm::Value * 3750 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3751 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3752 ArrayRef<PrivateDataTy> Privates) { 3753 ASTContext &C = CGM.getContext(); 3754 FunctionArgList Args; 3755 ImplicitParamDecl TaskPrivatesArg( 3756 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3757 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3758 ImplicitParamDecl::Other); 3759 Args.push_back(&TaskPrivatesArg); 3760 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3761 unsigned Counter = 1; 3762 for (const Expr *E : Data.PrivateVars) { 3763 Args.push_back(ImplicitParamDecl::Create( 3764 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3765 C.getPointerType(C.getPointerType(E->getType())) 3766 .withConst() 3767 .withRestrict(), 3768 ImplicitParamDecl::Other)); 3769 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3770 PrivateVarsPos[VD] = Counter; 3771 ++Counter; 3772 } 3773 for (const Expr *E : Data.FirstprivateVars) { 3774 Args.push_back(ImplicitParamDecl::Create( 3775 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3776 C.getPointerType(C.getPointerType(E->getType())) 3777 .withConst() 3778 .withRestrict(), 3779 ImplicitParamDecl::Other)); 3780 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3781 PrivateVarsPos[VD] = Counter; 3782 ++Counter; 3783 } 3784 for (const Expr *E : Data.LastprivateVars) { 3785 Args.push_back(ImplicitParamDecl::Create( 3786 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3787 C.getPointerType(C.getPointerType(E->getType())) 3788 .withConst() 3789 .withRestrict(), 3790 ImplicitParamDecl::Other)); 3791 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3792 PrivateVarsPos[VD] = Counter; 3793 ++Counter; 3794 } 3795 for (const VarDecl *VD : Data.PrivateLocals) { 3796 QualType Ty = VD->getType().getNonReferenceType(); 3797 if (VD->getType()->isLValueReferenceType()) 3798 Ty = C.getPointerType(Ty); 3799 if (isAllocatableDecl(VD)) 3800 Ty = C.getPointerType(Ty); 3801 Args.push_back(ImplicitParamDecl::Create( 3802 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3803 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3804 ImplicitParamDecl::Other)); 3805 PrivateVarsPos[VD] = Counter; 3806 ++Counter; 3807 } 3808 const auto &TaskPrivatesMapFnInfo = 3809 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3810 llvm::FunctionType *TaskPrivatesMapTy = 3811 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3812 std::string Name = 3813 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3814 auto *TaskPrivatesMap = llvm::Function::Create( 3815 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3816 &CGM.getModule()); 3817 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3818 TaskPrivatesMapFnInfo); 3819 if (CGM.getLangOpts().Optimize) { 3820 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3821 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3822 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3823 } 3824 CodeGenFunction CGF(CGM); 3825 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3826 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3827 3828 // *privi = &.privates.privi; 3829 LValue Base = CGF.EmitLoadOfPointerLValue( 3830 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3831 TaskPrivatesArg.getType()->castAs<PointerType>()); 3832 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3833 Counter = 0; 3834 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3835 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3836 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3837 LValue RefLVal = 3838 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3839 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3840 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3841 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3842 ++Counter; 3843 } 3844 CGF.FinishFunction(); 3845 return TaskPrivatesMap; 3846 } 3847 3848 /// Emit initialization for private variables in task-based directives. 3849 static void emitPrivatesInit(CodeGenFunction &CGF, 3850 const OMPExecutableDirective &D, 3851 Address KmpTaskSharedsPtr, LValue TDBase, 3852 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3853 QualType SharedsTy, QualType SharedsPtrTy, 3854 const OMPTaskDataTy &Data, 3855 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3856 ASTContext &C = CGF.getContext(); 3857 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3858 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3859 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3860 ? OMPD_taskloop 3861 : OMPD_task; 3862 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3863 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3864 LValue SrcBase; 3865 bool IsTargetTask = 3866 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3867 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3868 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3869 // PointersArray, SizesArray, and MappersArray. The original variables for 3870 // these arrays are not captured and we get their addresses explicitly. 3871 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3872 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3873 SrcBase = CGF.MakeAddrLValue( 3874 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3875 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3876 SharedsTy); 3877 } 3878 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3879 for (const PrivateDataTy &Pair : Privates) { 3880 // Do not initialize private locals. 3881 if (Pair.second.isLocalPrivate()) { 3882 ++FI; 3883 continue; 3884 } 3885 const VarDecl *VD = Pair.second.PrivateCopy; 3886 const Expr *Init = VD->getAnyInitializer(); 3887 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3888 !CGF.isTrivialInitializer(Init)))) { 3889 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3890 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3891 const VarDecl *OriginalVD = Pair.second.Original; 3892 // Check if the variable is the target-based BasePointersArray, 3893 // PointersArray, SizesArray, or MappersArray. 3894 LValue SharedRefLValue; 3895 QualType Type = PrivateLValue.getType(); 3896 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3897 if (IsTargetTask && !SharedField) { 3898 assert(isa<ImplicitParamDecl>(OriginalVD) && 3899 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3900 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3901 ->getNumParams() == 0 && 3902 isa<TranslationUnitDecl>( 3903 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3904 ->getDeclContext()) && 3905 "Expected artificial target data variable."); 3906 SharedRefLValue = 3907 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3908 } else if (ForDup) { 3909 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3910 SharedRefLValue = CGF.MakeAddrLValue( 3911 SharedRefLValue.getAddress(CGF).withAlignment( 3912 C.getDeclAlign(OriginalVD)), 3913 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3914 SharedRefLValue.getTBAAInfo()); 3915 } else if (CGF.LambdaCaptureFields.count( 3916 Pair.second.Original->getCanonicalDecl()) > 0 || 3917 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3918 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3919 } else { 3920 // Processing for implicitly captured variables. 3921 InlinedOpenMPRegionRAII Region( 3922 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3923 /*HasCancel=*/false, /*NoInheritance=*/true); 3924 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3925 } 3926 if (Type->isArrayType()) { 3927 // Initialize firstprivate array. 3928 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3929 // Perform simple memcpy. 3930 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3931 } else { 3932 // Initialize firstprivate array using element-by-element 3933 // initialization. 3934 CGF.EmitOMPAggregateAssign( 3935 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3936 Type, 3937 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3938 Address SrcElement) { 3939 // Clean up any temporaries needed by the initialization. 3940 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3941 InitScope.addPrivate( 3942 Elem, [SrcElement]() -> Address { return SrcElement; }); 3943 (void)InitScope.Privatize(); 3944 // Emit initialization for single element. 3945 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3946 CGF, &CapturesInfo); 3947 CGF.EmitAnyExprToMem(Init, DestElement, 3948 Init->getType().getQualifiers(), 3949 /*IsInitializer=*/false); 3950 }); 3951 } 3952 } else { 3953 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3954 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3955 return SharedRefLValue.getAddress(CGF); 3956 }); 3957 (void)InitScope.Privatize(); 3958 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3959 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3960 /*capturedByInit=*/false); 3961 } 3962 } else { 3963 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3964 } 3965 } 3966 ++FI; 3967 } 3968 } 3969 3970 /// Check if duplication function is required for taskloops. 3971 static bool checkInitIsRequired(CodeGenFunction &CGF, 3972 ArrayRef<PrivateDataTy> Privates) { 3973 bool InitRequired = false; 3974 for (const PrivateDataTy &Pair : Privates) { 3975 if (Pair.second.isLocalPrivate()) 3976 continue; 3977 const VarDecl *VD = Pair.second.PrivateCopy; 3978 const Expr *Init = VD->getAnyInitializer(); 3979 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3980 !CGF.isTrivialInitializer(Init)); 3981 if (InitRequired) 3982 break; 3983 } 3984 return InitRequired; 3985 } 3986 3987 3988 /// Emit task_dup function (for initialization of 3989 /// private/firstprivate/lastprivate vars and last_iter flag) 3990 /// \code 3991 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3992 /// lastpriv) { 3993 /// // setup lastprivate flag 3994 /// task_dst->last = lastpriv; 3995 /// // could be constructor calls here... 3996 /// } 3997 /// \endcode 3998 static llvm::Value * 3999 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4000 const OMPExecutableDirective &D, 4001 QualType KmpTaskTWithPrivatesPtrQTy, 4002 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4003 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4004 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4005 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4006 ASTContext &C = CGM.getContext(); 4007 FunctionArgList Args; 4008 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4009 KmpTaskTWithPrivatesPtrQTy, 4010 ImplicitParamDecl::Other); 4011 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4012 KmpTaskTWithPrivatesPtrQTy, 4013 ImplicitParamDecl::Other); 4014 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4015 ImplicitParamDecl::Other); 4016 Args.push_back(&DstArg); 4017 Args.push_back(&SrcArg); 4018 Args.push_back(&LastprivArg); 4019 const auto &TaskDupFnInfo = 4020 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4021 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4022 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4023 auto *TaskDup = llvm::Function::Create( 4024 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4025 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4026 TaskDup->setDoesNotRecurse(); 4027 CodeGenFunction CGF(CGM); 4028 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4029 Loc); 4030 4031 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4032 CGF.GetAddrOfLocalVar(&DstArg), 4033 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4034 // task_dst->liter = lastpriv; 4035 if (WithLastIter) { 4036 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4037 LValue Base = CGF.EmitLValueForField( 4038 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4039 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4040 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4041 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4042 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4043 } 4044 4045 // Emit initial values for private copies (if any). 4046 assert(!Privates.empty()); 4047 Address KmpTaskSharedsPtr = Address::invalid(); 4048 if (!Data.FirstprivateVars.empty()) { 4049 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4050 CGF.GetAddrOfLocalVar(&SrcArg), 4051 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4052 LValue Base = CGF.EmitLValueForField( 4053 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4054 KmpTaskSharedsPtr = Address::deprecated( 4055 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4056 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4057 KmpTaskTShareds)), 4058 Loc), 4059 CGM.getNaturalTypeAlignment(SharedsTy)); 4060 } 4061 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4062 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4063 CGF.FinishFunction(); 4064 return TaskDup; 4065 } 4066 4067 /// Checks if destructor function is required to be generated. 4068 /// \return true if cleanups are required, false otherwise. 4069 static bool 4070 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4071 ArrayRef<PrivateDataTy> Privates) { 4072 for (const PrivateDataTy &P : Privates) { 4073 if (P.second.isLocalPrivate()) 4074 continue; 4075 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4076 if (Ty.isDestructedType()) 4077 return true; 4078 } 4079 return false; 4080 } 4081 4082 namespace { 4083 /// Loop generator for OpenMP iterator expression. 4084 class OMPIteratorGeneratorScope final 4085 : public CodeGenFunction::OMPPrivateScope { 4086 CodeGenFunction &CGF; 4087 const OMPIteratorExpr *E = nullptr; 4088 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4089 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4090 OMPIteratorGeneratorScope() = delete; 4091 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4092 4093 public: 4094 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4095 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4096 if (!E) 4097 return; 4098 SmallVector<llvm::Value *, 4> Uppers; 4099 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4100 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4101 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4102 addPrivate(VD, [&CGF, VD]() { 4103 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4104 }); 4105 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4106 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4107 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4108 "counter.addr"); 4109 }); 4110 } 4111 Privatize(); 4112 4113 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4114 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4115 LValue CLVal = 4116 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4117 HelperData.CounterVD->getType()); 4118 // Counter = 0; 4119 CGF.EmitStoreOfScalar( 4120 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4121 CLVal); 4122 CodeGenFunction::JumpDest &ContDest = 4123 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4124 CodeGenFunction::JumpDest &ExitDest = 4125 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4126 // N = <number-of_iterations>; 4127 llvm::Value *N = Uppers[I]; 4128 // cont: 4129 // if (Counter < N) goto body; else goto exit; 4130 CGF.EmitBlock(ContDest.getBlock()); 4131 auto *CVal = 4132 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4133 llvm::Value *Cmp = 4134 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4135 ? CGF.Builder.CreateICmpSLT(CVal, N) 4136 : CGF.Builder.CreateICmpULT(CVal, N); 4137 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4138 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4139 // body: 4140 CGF.EmitBlock(BodyBB); 4141 // Iteri = Begini + Counter * Stepi; 4142 CGF.EmitIgnoredExpr(HelperData.Update); 4143 } 4144 } 4145 ~OMPIteratorGeneratorScope() { 4146 if (!E) 4147 return; 4148 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4149 // Counter = Counter + 1; 4150 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4151 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4152 // goto cont; 4153 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4154 // exit: 4155 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4156 } 4157 } 4158 }; 4159 } // namespace 4160 4161 static std::pair<llvm::Value *, llvm::Value *> 4162 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4163 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4164 llvm::Value *Addr; 4165 if (OASE) { 4166 const Expr *Base = OASE->getBase(); 4167 Addr = CGF.EmitScalarExpr(Base); 4168 } else { 4169 Addr = CGF.EmitLValue(E).getPointer(CGF); 4170 } 4171 llvm::Value *SizeVal; 4172 QualType Ty = E->getType(); 4173 if (OASE) { 4174 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4175 for (const Expr *SE : OASE->getDimensions()) { 4176 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4177 Sz = CGF.EmitScalarConversion( 4178 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4179 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4180 } 4181 } else if (const auto *ASE = 4182 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4183 LValue UpAddrLVal = 4184 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4185 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4186 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4187 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4188 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4189 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4190 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4191 } else { 4192 SizeVal = CGF.getTypeSize(Ty); 4193 } 4194 return std::make_pair(Addr, SizeVal); 4195 } 4196 4197 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4198 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4199 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4200 if (KmpTaskAffinityInfoTy.isNull()) { 4201 RecordDecl *KmpAffinityInfoRD = 4202 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4203 KmpAffinityInfoRD->startDefinition(); 4204 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4205 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4206 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4207 KmpAffinityInfoRD->completeDefinition(); 4208 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4209 } 4210 } 4211 4212 CGOpenMPRuntime::TaskResultTy 4213 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4214 const OMPExecutableDirective &D, 4215 llvm::Function *TaskFunction, QualType SharedsTy, 4216 Address Shareds, const OMPTaskDataTy &Data) { 4217 ASTContext &C = CGM.getContext(); 4218 llvm::SmallVector<PrivateDataTy, 4> Privates; 4219 // Aggregate privates and sort them by the alignment. 4220 const auto *I = Data.PrivateCopies.begin(); 4221 for (const Expr *E : Data.PrivateVars) { 4222 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4223 Privates.emplace_back( 4224 C.getDeclAlign(VD), 4225 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4226 /*PrivateElemInit=*/nullptr)); 4227 ++I; 4228 } 4229 I = Data.FirstprivateCopies.begin(); 4230 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4231 for (const Expr *E : Data.FirstprivateVars) { 4232 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4233 Privates.emplace_back( 4234 C.getDeclAlign(VD), 4235 PrivateHelpersTy( 4236 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4237 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4238 ++I; 4239 ++IElemInitRef; 4240 } 4241 I = Data.LastprivateCopies.begin(); 4242 for (const Expr *E : Data.LastprivateVars) { 4243 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4244 Privates.emplace_back( 4245 C.getDeclAlign(VD), 4246 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4247 /*PrivateElemInit=*/nullptr)); 4248 ++I; 4249 } 4250 for (const VarDecl *VD : Data.PrivateLocals) { 4251 if (isAllocatableDecl(VD)) 4252 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4253 else 4254 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4255 } 4256 llvm::stable_sort(Privates, 4257 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4258 return L.first > R.first; 4259 }); 4260 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4261 // Build type kmp_routine_entry_t (if not built yet). 4262 emitKmpRoutineEntryT(KmpInt32Ty); 4263 // Build type kmp_task_t (if not built yet). 4264 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4265 if (SavedKmpTaskloopTQTy.isNull()) { 4266 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4267 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4268 } 4269 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4270 } else { 4271 assert((D.getDirectiveKind() == OMPD_task || 4272 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4273 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4274 "Expected taskloop, task or target directive"); 4275 if (SavedKmpTaskTQTy.isNull()) { 4276 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4277 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4278 } 4279 KmpTaskTQTy = SavedKmpTaskTQTy; 4280 } 4281 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4282 // Build particular struct kmp_task_t for the given task. 4283 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4284 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4285 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4286 QualType KmpTaskTWithPrivatesPtrQTy = 4287 C.getPointerType(KmpTaskTWithPrivatesQTy); 4288 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4289 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4290 KmpTaskTWithPrivatesTy->getPointerTo(); 4291 llvm::Value *KmpTaskTWithPrivatesTySize = 4292 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4293 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4294 4295 // Emit initial values for private copies (if any). 4296 llvm::Value *TaskPrivatesMap = nullptr; 4297 llvm::Type *TaskPrivatesMapTy = 4298 std::next(TaskFunction->arg_begin(), 3)->getType(); 4299 if (!Privates.empty()) { 4300 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4301 TaskPrivatesMap = 4302 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4303 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4304 TaskPrivatesMap, TaskPrivatesMapTy); 4305 } else { 4306 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4307 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4308 } 4309 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4310 // kmp_task_t *tt); 4311 llvm::Function *TaskEntry = emitProxyTaskFunction( 4312 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4313 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4314 TaskPrivatesMap); 4315 4316 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4317 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4318 // kmp_routine_entry_t *task_entry); 4319 // Task flags. Format is taken from 4320 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4321 // description of kmp_tasking_flags struct. 4322 enum { 4323 TiedFlag = 0x1, 4324 FinalFlag = 0x2, 4325 DestructorsFlag = 0x8, 4326 PriorityFlag = 0x20, 4327 DetachableFlag = 0x40, 4328 }; 4329 unsigned Flags = Data.Tied ? TiedFlag : 0; 4330 bool NeedsCleanup = false; 4331 if (!Privates.empty()) { 4332 NeedsCleanup = 4333 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4334 if (NeedsCleanup) 4335 Flags = Flags | DestructorsFlag; 4336 } 4337 if (Data.Priority.getInt()) 4338 Flags = Flags | PriorityFlag; 4339 if (D.hasClausesOfKind<OMPDetachClause>()) 4340 Flags = Flags | DetachableFlag; 4341 llvm::Value *TaskFlags = 4342 Data.Final.getPointer() 4343 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4344 CGF.Builder.getInt32(FinalFlag), 4345 CGF.Builder.getInt32(/*C=*/0)) 4346 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4347 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4348 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4349 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4350 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4351 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4352 TaskEntry, KmpRoutineEntryPtrTy)}; 4353 llvm::Value *NewTask; 4354 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4355 // Check if we have any device clause associated with the directive. 4356 const Expr *Device = nullptr; 4357 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4358 Device = C->getDevice(); 4359 // Emit device ID if any otherwise use default value. 4360 llvm::Value *DeviceID; 4361 if (Device) 4362 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4363 CGF.Int64Ty, /*isSigned=*/true); 4364 else 4365 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4366 AllocArgs.push_back(DeviceID); 4367 NewTask = CGF.EmitRuntimeCall( 4368 OMPBuilder.getOrCreateRuntimeFunction( 4369 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4370 AllocArgs); 4371 } else { 4372 NewTask = 4373 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4374 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4375 AllocArgs); 4376 } 4377 // Emit detach clause initialization. 4378 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4379 // task_descriptor); 4380 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4381 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4382 LValue EvtLVal = CGF.EmitLValue(Evt); 4383 4384 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4385 // int gtid, kmp_task_t *task); 4386 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4387 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4388 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4389 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4390 OMPBuilder.getOrCreateRuntimeFunction( 4391 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4392 {Loc, Tid, NewTask}); 4393 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4394 Evt->getExprLoc()); 4395 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4396 } 4397 // Process affinity clauses. 4398 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4399 // Process list of affinity data. 4400 ASTContext &C = CGM.getContext(); 4401 Address AffinitiesArray = Address::invalid(); 4402 // Calculate number of elements to form the array of affinity data. 4403 llvm::Value *NumOfElements = nullptr; 4404 unsigned NumAffinities = 0; 4405 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4406 if (const Expr *Modifier = C->getModifier()) { 4407 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4408 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4409 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4410 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4411 NumOfElements = 4412 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4413 } 4414 } else { 4415 NumAffinities += C->varlist_size(); 4416 } 4417 } 4418 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4419 // Fields ids in kmp_task_affinity_info record. 4420 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4421 4422 QualType KmpTaskAffinityInfoArrayTy; 4423 if (NumOfElements) { 4424 NumOfElements = CGF.Builder.CreateNUWAdd( 4425 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4426 auto *OVE = new (C) OpaqueValueExpr( 4427 Loc, 4428 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4429 VK_PRValue); 4430 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4431 RValue::get(NumOfElements)); 4432 KmpTaskAffinityInfoArrayTy = 4433 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4434 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4435 // Properly emit variable-sized array. 4436 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4437 ImplicitParamDecl::Other); 4438 CGF.EmitVarDecl(*PD); 4439 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4440 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4441 /*isSigned=*/false); 4442 } else { 4443 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4444 KmpTaskAffinityInfoTy, 4445 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4446 ArrayType::Normal, /*IndexTypeQuals=*/0); 4447 AffinitiesArray = 4448 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4449 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4450 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4451 /*isSigned=*/false); 4452 } 4453 4454 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4455 // Fill array by elements without iterators. 4456 unsigned Pos = 0; 4457 bool HasIterator = false; 4458 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4459 if (C->getModifier()) { 4460 HasIterator = true; 4461 continue; 4462 } 4463 for (const Expr *E : C->varlists()) { 4464 llvm::Value *Addr; 4465 llvm::Value *Size; 4466 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4467 LValue Base = 4468 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4469 KmpTaskAffinityInfoTy); 4470 // affs[i].base_addr = &<Affinities[i].second>; 4471 LValue BaseAddrLVal = CGF.EmitLValueForField( 4472 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4473 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4474 BaseAddrLVal); 4475 // affs[i].len = sizeof(<Affinities[i].second>); 4476 LValue LenLVal = CGF.EmitLValueForField( 4477 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4478 CGF.EmitStoreOfScalar(Size, LenLVal); 4479 ++Pos; 4480 } 4481 } 4482 LValue PosLVal; 4483 if (HasIterator) { 4484 PosLVal = CGF.MakeAddrLValue( 4485 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4486 C.getSizeType()); 4487 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4488 } 4489 // Process elements with iterators. 4490 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4491 const Expr *Modifier = C->getModifier(); 4492 if (!Modifier) 4493 continue; 4494 OMPIteratorGeneratorScope IteratorScope( 4495 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4496 for (const Expr *E : C->varlists()) { 4497 llvm::Value *Addr; 4498 llvm::Value *Size; 4499 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4500 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4501 LValue Base = CGF.MakeAddrLValue( 4502 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4503 // affs[i].base_addr = &<Affinities[i].second>; 4504 LValue BaseAddrLVal = CGF.EmitLValueForField( 4505 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4506 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4507 BaseAddrLVal); 4508 // affs[i].len = sizeof(<Affinities[i].second>); 4509 LValue LenLVal = CGF.EmitLValueForField( 4510 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4511 CGF.EmitStoreOfScalar(Size, LenLVal); 4512 Idx = CGF.Builder.CreateNUWAdd( 4513 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4514 CGF.EmitStoreOfScalar(Idx, PosLVal); 4515 } 4516 } 4517 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4518 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4519 // naffins, kmp_task_affinity_info_t *affin_list); 4520 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4521 llvm::Value *GTid = getThreadID(CGF, Loc); 4522 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4523 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4524 // FIXME: Emit the function and ignore its result for now unless the 4525 // runtime function is properly implemented. 4526 (void)CGF.EmitRuntimeCall( 4527 OMPBuilder.getOrCreateRuntimeFunction( 4528 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4529 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4530 } 4531 llvm::Value *NewTaskNewTaskTTy = 4532 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4533 NewTask, KmpTaskTWithPrivatesPtrTy); 4534 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4535 KmpTaskTWithPrivatesQTy); 4536 LValue TDBase = 4537 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4538 // Fill the data in the resulting kmp_task_t record. 4539 // Copy shareds if there are any. 4540 Address KmpTaskSharedsPtr = Address::invalid(); 4541 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4542 KmpTaskSharedsPtr = Address::deprecated( 4543 CGF.EmitLoadOfScalar( 4544 CGF.EmitLValueForField( 4545 TDBase, 4546 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 4547 Loc), 4548 CGM.getNaturalTypeAlignment(SharedsTy)); 4549 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4550 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4551 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4552 } 4553 // Emit initial values for private copies (if any). 4554 TaskResultTy Result; 4555 if (!Privates.empty()) { 4556 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4557 SharedsTy, SharedsPtrTy, Data, Privates, 4558 /*ForDup=*/false); 4559 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4560 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4561 Result.TaskDupFn = emitTaskDupFunction( 4562 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4563 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4564 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4565 } 4566 } 4567 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4568 enum { Priority = 0, Destructors = 1 }; 4569 // Provide pointer to function with destructors for privates. 4570 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4571 const RecordDecl *KmpCmplrdataUD = 4572 (*FI)->getType()->getAsUnionType()->getDecl(); 4573 if (NeedsCleanup) { 4574 llvm::Value *DestructorFn = emitDestructorsFunction( 4575 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4576 KmpTaskTWithPrivatesQTy); 4577 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4578 LValue DestructorsLV = CGF.EmitLValueForField( 4579 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4580 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4581 DestructorFn, KmpRoutineEntryPtrTy), 4582 DestructorsLV); 4583 } 4584 // Set priority. 4585 if (Data.Priority.getInt()) { 4586 LValue Data2LV = CGF.EmitLValueForField( 4587 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4588 LValue PriorityLV = CGF.EmitLValueForField( 4589 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4590 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4591 } 4592 Result.NewTask = NewTask; 4593 Result.TaskEntry = TaskEntry; 4594 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4595 Result.TDBase = TDBase; 4596 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4597 return Result; 4598 } 4599 4600 namespace { 4601 /// Dependence kind for RTL. 4602 enum RTLDependenceKindTy { 4603 DepIn = 0x01, 4604 DepInOut = 0x3, 4605 DepMutexInOutSet = 0x4, 4606 DepInOutSet = 0x8 4607 }; 4608 /// Fields ids in kmp_depend_info record. 4609 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4610 } // namespace 4611 4612 /// Translates internal dependency kind into the runtime kind. 4613 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4614 RTLDependenceKindTy DepKind; 4615 switch (K) { 4616 case OMPC_DEPEND_in: 4617 DepKind = DepIn; 4618 break; 4619 // Out and InOut dependencies must use the same code. 4620 case OMPC_DEPEND_out: 4621 case OMPC_DEPEND_inout: 4622 DepKind = DepInOut; 4623 break; 4624 case OMPC_DEPEND_mutexinoutset: 4625 DepKind = DepMutexInOutSet; 4626 break; 4627 case OMPC_DEPEND_inoutset: 4628 DepKind = DepInOutSet; 4629 break; 4630 case OMPC_DEPEND_source: 4631 case OMPC_DEPEND_sink: 4632 case OMPC_DEPEND_depobj: 4633 case OMPC_DEPEND_unknown: 4634 llvm_unreachable("Unknown task dependence type"); 4635 } 4636 return DepKind; 4637 } 4638 4639 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4640 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4641 QualType &FlagsTy) { 4642 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4643 if (KmpDependInfoTy.isNull()) { 4644 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4645 KmpDependInfoRD->startDefinition(); 4646 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4647 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4648 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4649 KmpDependInfoRD->completeDefinition(); 4650 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4651 } 4652 } 4653 4654 std::pair<llvm::Value *, LValue> 4655 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4656 SourceLocation Loc) { 4657 ASTContext &C = CGM.getContext(); 4658 QualType FlagsTy; 4659 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4660 RecordDecl *KmpDependInfoRD = 4661 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4662 LValue Base = CGF.EmitLoadOfPointerLValue( 4663 DepobjLVal.getAddress(CGF), 4664 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4665 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4666 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4667 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4668 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4669 Base.getTBAAInfo()); 4670 Address DepObjAddr = CGF.Builder.CreateGEP( 4671 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4672 LValue NumDepsBase = CGF.MakeAddrLValue( 4673 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4674 // NumDeps = deps[i].base_addr; 4675 LValue BaseAddrLVal = CGF.EmitLValueForField( 4676 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4677 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4678 return std::make_pair(NumDeps, Base); 4679 } 4680 4681 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4682 llvm::PointerUnion<unsigned *, LValue *> Pos, 4683 const OMPTaskDataTy::DependData &Data, 4684 Address DependenciesArray) { 4685 CodeGenModule &CGM = CGF.CGM; 4686 ASTContext &C = CGM.getContext(); 4687 QualType FlagsTy; 4688 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4689 RecordDecl *KmpDependInfoRD = 4690 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4691 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4692 4693 OMPIteratorGeneratorScope IteratorScope( 4694 CGF, cast_or_null<OMPIteratorExpr>( 4695 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4696 : nullptr)); 4697 for (const Expr *E : Data.DepExprs) { 4698 llvm::Value *Addr; 4699 llvm::Value *Size; 4700 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4701 LValue Base; 4702 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4703 Base = CGF.MakeAddrLValue( 4704 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4705 } else { 4706 LValue &PosLVal = *Pos.get<LValue *>(); 4707 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4708 Base = CGF.MakeAddrLValue( 4709 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4710 } 4711 // deps[i].base_addr = &<Dependencies[i].second>; 4712 LValue BaseAddrLVal = CGF.EmitLValueForField( 4713 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4714 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4715 BaseAddrLVal); 4716 // deps[i].len = sizeof(<Dependencies[i].second>); 4717 LValue LenLVal = CGF.EmitLValueForField( 4718 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4719 CGF.EmitStoreOfScalar(Size, LenLVal); 4720 // deps[i].flags = <Dependencies[i].first>; 4721 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4722 LValue FlagsLVal = CGF.EmitLValueForField( 4723 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4724 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4725 FlagsLVal); 4726 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4727 ++(*P); 4728 } else { 4729 LValue &PosLVal = *Pos.get<LValue *>(); 4730 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4731 Idx = CGF.Builder.CreateNUWAdd(Idx, 4732 llvm::ConstantInt::get(Idx->getType(), 1)); 4733 CGF.EmitStoreOfScalar(Idx, PosLVal); 4734 } 4735 } 4736 } 4737 4738 static SmallVector<llvm::Value *, 4> 4739 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4740 const OMPTaskDataTy::DependData &Data) { 4741 assert(Data.DepKind == OMPC_DEPEND_depobj && 4742 "Expected depobj dependecy kind."); 4743 SmallVector<llvm::Value *, 4> Sizes; 4744 SmallVector<LValue, 4> SizeLVals; 4745 ASTContext &C = CGF.getContext(); 4746 QualType FlagsTy; 4747 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4748 RecordDecl *KmpDependInfoRD = 4749 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4750 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4751 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4752 { 4753 OMPIteratorGeneratorScope IteratorScope( 4754 CGF, cast_or_null<OMPIteratorExpr>( 4755 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4756 : nullptr)); 4757 for (const Expr *E : Data.DepExprs) { 4758 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4759 LValue Base = CGF.EmitLoadOfPointerLValue( 4760 DepobjLVal.getAddress(CGF), 4761 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4762 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4763 Base.getAddress(CGF), KmpDependInfoPtrT); 4764 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4765 Base.getTBAAInfo()); 4766 Address DepObjAddr = CGF.Builder.CreateGEP( 4767 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4768 LValue NumDepsBase = CGF.MakeAddrLValue( 4769 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4770 // NumDeps = deps[i].base_addr; 4771 LValue BaseAddrLVal = CGF.EmitLValueForField( 4772 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4773 llvm::Value *NumDeps = 4774 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4775 LValue NumLVal = CGF.MakeAddrLValue( 4776 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4777 C.getUIntPtrType()); 4778 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4779 NumLVal.getAddress(CGF)); 4780 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4781 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4782 CGF.EmitStoreOfScalar(Add, NumLVal); 4783 SizeLVals.push_back(NumLVal); 4784 } 4785 } 4786 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4787 llvm::Value *Size = 4788 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4789 Sizes.push_back(Size); 4790 } 4791 return Sizes; 4792 } 4793 4794 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4795 LValue PosLVal, 4796 const OMPTaskDataTy::DependData &Data, 4797 Address DependenciesArray) { 4798 assert(Data.DepKind == OMPC_DEPEND_depobj && 4799 "Expected depobj dependecy kind."); 4800 ASTContext &C = CGF.getContext(); 4801 QualType FlagsTy; 4802 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4803 RecordDecl *KmpDependInfoRD = 4804 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4805 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4806 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4807 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4808 { 4809 OMPIteratorGeneratorScope IteratorScope( 4810 CGF, cast_or_null<OMPIteratorExpr>( 4811 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4812 : nullptr)); 4813 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4814 const Expr *E = Data.DepExprs[I]; 4815 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4816 LValue Base = CGF.EmitLoadOfPointerLValue( 4817 DepobjLVal.getAddress(CGF), 4818 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4819 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4820 Base.getAddress(CGF), KmpDependInfoPtrT); 4821 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4822 Base.getTBAAInfo()); 4823 4824 // Get number of elements in a single depobj. 4825 Address DepObjAddr = CGF.Builder.CreateGEP( 4826 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4827 LValue NumDepsBase = CGF.MakeAddrLValue( 4828 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4829 // NumDeps = deps[i].base_addr; 4830 LValue BaseAddrLVal = CGF.EmitLValueForField( 4831 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4832 llvm::Value *NumDeps = 4833 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4834 4835 // memcopy dependency data. 4836 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4837 ElSize, 4838 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4839 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4840 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4841 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4842 4843 // Increase pos. 4844 // pos += size; 4845 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4846 CGF.EmitStoreOfScalar(Add, PosLVal); 4847 } 4848 } 4849 } 4850 4851 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4852 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4853 SourceLocation Loc) { 4854 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4855 return D.DepExprs.empty(); 4856 })) 4857 return std::make_pair(nullptr, Address::invalid()); 4858 // Process list of dependencies. 4859 ASTContext &C = CGM.getContext(); 4860 Address DependenciesArray = Address::invalid(); 4861 llvm::Value *NumOfElements = nullptr; 4862 unsigned NumDependencies = std::accumulate( 4863 Dependencies.begin(), Dependencies.end(), 0, 4864 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4865 return D.DepKind == OMPC_DEPEND_depobj 4866 ? V 4867 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4868 }); 4869 QualType FlagsTy; 4870 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4871 bool HasDepobjDeps = false; 4872 bool HasRegularWithIterators = false; 4873 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4874 llvm::Value *NumOfRegularWithIterators = 4875 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4876 // Calculate number of depobj dependecies and regular deps with the iterators. 4877 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4878 if (D.DepKind == OMPC_DEPEND_depobj) { 4879 SmallVector<llvm::Value *, 4> Sizes = 4880 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4881 for (llvm::Value *Size : Sizes) { 4882 NumOfDepobjElements = 4883 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4884 } 4885 HasDepobjDeps = true; 4886 continue; 4887 } 4888 // Include number of iterations, if any. 4889 4890 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4891 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4892 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4893 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4894 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4895 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4896 NumOfRegularWithIterators = 4897 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4898 } 4899 HasRegularWithIterators = true; 4900 continue; 4901 } 4902 } 4903 4904 QualType KmpDependInfoArrayTy; 4905 if (HasDepobjDeps || HasRegularWithIterators) { 4906 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4907 /*isSigned=*/false); 4908 if (HasDepobjDeps) { 4909 NumOfElements = 4910 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4911 } 4912 if (HasRegularWithIterators) { 4913 NumOfElements = 4914 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4915 } 4916 auto *OVE = new (C) OpaqueValueExpr( 4917 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4918 VK_PRValue); 4919 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4920 RValue::get(NumOfElements)); 4921 KmpDependInfoArrayTy = 4922 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4923 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4924 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4925 // Properly emit variable-sized array. 4926 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4927 ImplicitParamDecl::Other); 4928 CGF.EmitVarDecl(*PD); 4929 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4930 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4931 /*isSigned=*/false); 4932 } else { 4933 KmpDependInfoArrayTy = C.getConstantArrayType( 4934 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4935 ArrayType::Normal, /*IndexTypeQuals=*/0); 4936 DependenciesArray = 4937 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4938 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4939 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4940 /*isSigned=*/false); 4941 } 4942 unsigned Pos = 0; 4943 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4944 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4945 Dependencies[I].IteratorExpr) 4946 continue; 4947 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4948 DependenciesArray); 4949 } 4950 // Copy regular dependecies with iterators. 4951 LValue PosLVal = CGF.MakeAddrLValue( 4952 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4953 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4954 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4955 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4956 !Dependencies[I].IteratorExpr) 4957 continue; 4958 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4959 DependenciesArray); 4960 } 4961 // Copy final depobj arrays without iterators. 4962 if (HasDepobjDeps) { 4963 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4964 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4965 continue; 4966 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4967 DependenciesArray); 4968 } 4969 } 4970 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4971 DependenciesArray, CGF.VoidPtrTy); 4972 return std::make_pair(NumOfElements, DependenciesArray); 4973 } 4974 4975 Address CGOpenMPRuntime::emitDepobjDependClause( 4976 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4977 SourceLocation Loc) { 4978 if (Dependencies.DepExprs.empty()) 4979 return Address::invalid(); 4980 // Process list of dependencies. 4981 ASTContext &C = CGM.getContext(); 4982 Address DependenciesArray = Address::invalid(); 4983 unsigned NumDependencies = Dependencies.DepExprs.size(); 4984 QualType FlagsTy; 4985 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4986 RecordDecl *KmpDependInfoRD = 4987 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4988 4989 llvm::Value *Size; 4990 // Define type kmp_depend_info[<Dependencies.size()>]; 4991 // For depobj reserve one extra element to store the number of elements. 4992 // It is required to handle depobj(x) update(in) construct. 4993 // kmp_depend_info[<Dependencies.size()>] deps; 4994 llvm::Value *NumDepsVal; 4995 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4996 if (const auto *IE = 4997 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4998 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4999 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5000 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5001 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 5002 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 5003 } 5004 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 5005 NumDepsVal); 5006 CharUnits SizeInBytes = 5007 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5008 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5009 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5010 NumDepsVal = 5011 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5012 } else { 5013 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5014 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5015 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5016 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5017 Size = CGM.getSize(Sz.alignTo(Align)); 5018 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5019 } 5020 // Need to allocate on the dynamic memory. 5021 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5022 // Use default allocator. 5023 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5024 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5025 5026 llvm::Value *Addr = 5027 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5028 CGM.getModule(), OMPRTL___kmpc_alloc), 5029 Args, ".dep.arr.addr"); 5030 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5031 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5032 DependenciesArray = Address::deprecated(Addr, Align); 5033 // Write number of elements in the first element of array for depobj. 5034 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5035 // deps[i].base_addr = NumDependencies; 5036 LValue BaseAddrLVal = CGF.EmitLValueForField( 5037 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5038 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5039 llvm::PointerUnion<unsigned *, LValue *> Pos; 5040 unsigned Idx = 1; 5041 LValue PosLVal; 5042 if (Dependencies.IteratorExpr) { 5043 PosLVal = CGF.MakeAddrLValue( 5044 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5045 C.getSizeType()); 5046 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5047 /*IsInit=*/true); 5048 Pos = &PosLVal; 5049 } else { 5050 Pos = &Idx; 5051 } 5052 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5053 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5054 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5055 return DependenciesArray; 5056 } 5057 5058 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5059 SourceLocation Loc) { 5060 ASTContext &C = CGM.getContext(); 5061 QualType FlagsTy; 5062 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5063 LValue Base = CGF.EmitLoadOfPointerLValue( 5064 DepobjLVal.getAddress(CGF), 5065 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5066 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5067 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5068 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5069 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5070 Addr.getElementType(), Addr.getPointer(), 5071 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5072 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5073 CGF.VoidPtrTy); 5074 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5075 // Use default allocator. 5076 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5077 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5078 5079 // _kmpc_free(gtid, addr, nullptr); 5080 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5081 CGM.getModule(), OMPRTL___kmpc_free), 5082 Args); 5083 } 5084 5085 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5086 OpenMPDependClauseKind NewDepKind, 5087 SourceLocation Loc) { 5088 ASTContext &C = CGM.getContext(); 5089 QualType FlagsTy; 5090 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5091 RecordDecl *KmpDependInfoRD = 5092 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5093 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5094 llvm::Value *NumDeps; 5095 LValue Base; 5096 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5097 5098 Address Begin = Base.getAddress(CGF); 5099 // Cast from pointer to array type to pointer to single element. 5100 llvm::Value *End = CGF.Builder.CreateGEP( 5101 Begin.getElementType(), Begin.getPointer(), NumDeps); 5102 // The basic structure here is a while-do loop. 5103 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5104 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5105 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5106 CGF.EmitBlock(BodyBB); 5107 llvm::PHINode *ElementPHI = 5108 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5109 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5110 Begin = Begin.withPointer(ElementPHI); 5111 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5112 Base.getTBAAInfo()); 5113 // deps[i].flags = NewDepKind; 5114 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5115 LValue FlagsLVal = CGF.EmitLValueForField( 5116 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5117 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5118 FlagsLVal); 5119 5120 // Shift the address forward by one element. 5121 Address ElementNext = 5122 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5123 ElementPHI->addIncoming(ElementNext.getPointer(), 5124 CGF.Builder.GetInsertBlock()); 5125 llvm::Value *IsEmpty = 5126 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5127 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5128 // Done. 5129 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5130 } 5131 5132 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5133 const OMPExecutableDirective &D, 5134 llvm::Function *TaskFunction, 5135 QualType SharedsTy, Address Shareds, 5136 const Expr *IfCond, 5137 const OMPTaskDataTy &Data) { 5138 if (!CGF.HaveInsertPoint()) 5139 return; 5140 5141 TaskResultTy Result = 5142 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5143 llvm::Value *NewTask = Result.NewTask; 5144 llvm::Function *TaskEntry = Result.TaskEntry; 5145 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5146 LValue TDBase = Result.TDBase; 5147 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5148 // Process list of dependences. 5149 Address DependenciesArray = Address::invalid(); 5150 llvm::Value *NumOfElements; 5151 std::tie(NumOfElements, DependenciesArray) = 5152 emitDependClause(CGF, Data.Dependences, Loc); 5153 5154 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5155 // libcall. 5156 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5157 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5158 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5159 // list is not empty 5160 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5161 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5162 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5163 llvm::Value *DepTaskArgs[7]; 5164 if (!Data.Dependences.empty()) { 5165 DepTaskArgs[0] = UpLoc; 5166 DepTaskArgs[1] = ThreadID; 5167 DepTaskArgs[2] = NewTask; 5168 DepTaskArgs[3] = NumOfElements; 5169 DepTaskArgs[4] = DependenciesArray.getPointer(); 5170 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5171 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5172 } 5173 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5174 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5175 if (!Data.Tied) { 5176 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5177 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5179 } 5180 if (!Data.Dependences.empty()) { 5181 CGF.EmitRuntimeCall( 5182 OMPBuilder.getOrCreateRuntimeFunction( 5183 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5184 DepTaskArgs); 5185 } else { 5186 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5187 CGM.getModule(), OMPRTL___kmpc_omp_task), 5188 TaskArgs); 5189 } 5190 // Check if parent region is untied and build return for untied task; 5191 if (auto *Region = 5192 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5193 Region->emitUntiedSwitch(CGF); 5194 }; 5195 5196 llvm::Value *DepWaitTaskArgs[6]; 5197 if (!Data.Dependences.empty()) { 5198 DepWaitTaskArgs[0] = UpLoc; 5199 DepWaitTaskArgs[1] = ThreadID; 5200 DepWaitTaskArgs[2] = NumOfElements; 5201 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5202 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5203 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5204 } 5205 auto &M = CGM.getModule(); 5206 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5207 TaskEntry, &Data, &DepWaitTaskArgs, 5208 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5209 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5210 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5211 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5212 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5213 // is specified. 5214 if (!Data.Dependences.empty()) 5215 CGF.EmitRuntimeCall( 5216 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5217 DepWaitTaskArgs); 5218 // Call proxy_task_entry(gtid, new_task); 5219 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5220 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5221 Action.Enter(CGF); 5222 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5223 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5224 OutlinedFnArgs); 5225 }; 5226 5227 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5228 // kmp_task_t *new_task); 5229 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5230 // kmp_task_t *new_task); 5231 RegionCodeGenTy RCG(CodeGen); 5232 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5233 M, OMPRTL___kmpc_omp_task_begin_if0), 5234 TaskArgs, 5235 OMPBuilder.getOrCreateRuntimeFunction( 5236 M, OMPRTL___kmpc_omp_task_complete_if0), 5237 TaskArgs); 5238 RCG.setAction(Action); 5239 RCG(CGF); 5240 }; 5241 5242 if (IfCond) { 5243 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5244 } else { 5245 RegionCodeGenTy ThenRCG(ThenCodeGen); 5246 ThenRCG(CGF); 5247 } 5248 } 5249 5250 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5251 const OMPLoopDirective &D, 5252 llvm::Function *TaskFunction, 5253 QualType SharedsTy, Address Shareds, 5254 const Expr *IfCond, 5255 const OMPTaskDataTy &Data) { 5256 if (!CGF.HaveInsertPoint()) 5257 return; 5258 TaskResultTy Result = 5259 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5260 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5261 // libcall. 5262 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5263 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5264 // sched, kmp_uint64 grainsize, void *task_dup); 5265 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5266 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5267 llvm::Value *IfVal; 5268 if (IfCond) { 5269 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5270 /*isSigned=*/true); 5271 } else { 5272 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5273 } 5274 5275 LValue LBLVal = CGF.EmitLValueForField( 5276 Result.TDBase, 5277 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5278 const auto *LBVar = 5279 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5280 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5281 LBLVal.getQuals(), 5282 /*IsInitializer=*/true); 5283 LValue UBLVal = CGF.EmitLValueForField( 5284 Result.TDBase, 5285 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5286 const auto *UBVar = 5287 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5288 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5289 UBLVal.getQuals(), 5290 /*IsInitializer=*/true); 5291 LValue StLVal = CGF.EmitLValueForField( 5292 Result.TDBase, 5293 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5294 const auto *StVar = 5295 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5296 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5297 StLVal.getQuals(), 5298 /*IsInitializer=*/true); 5299 // Store reductions address. 5300 LValue RedLVal = CGF.EmitLValueForField( 5301 Result.TDBase, 5302 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5303 if (Data.Reductions) { 5304 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5305 } else { 5306 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5307 CGF.getContext().VoidPtrTy); 5308 } 5309 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5310 llvm::Value *TaskArgs[] = { 5311 UpLoc, 5312 ThreadID, 5313 Result.NewTask, 5314 IfVal, 5315 LBLVal.getPointer(CGF), 5316 UBLVal.getPointer(CGF), 5317 CGF.EmitLoadOfScalar(StLVal, Loc), 5318 llvm::ConstantInt::getSigned( 5319 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5320 llvm::ConstantInt::getSigned( 5321 CGF.IntTy, Data.Schedule.getPointer() 5322 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5323 : NoSchedule), 5324 Data.Schedule.getPointer() 5325 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5326 /*isSigned=*/false) 5327 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5328 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5329 Result.TaskDupFn, CGF.VoidPtrTy) 5330 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5331 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5332 CGM.getModule(), OMPRTL___kmpc_taskloop), 5333 TaskArgs); 5334 } 5335 5336 /// Emit reduction operation for each element of array (required for 5337 /// array sections) LHS op = RHS. 5338 /// \param Type Type of array. 5339 /// \param LHSVar Variable on the left side of the reduction operation 5340 /// (references element of array in original variable). 5341 /// \param RHSVar Variable on the right side of the reduction operation 5342 /// (references element of array in original variable). 5343 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5344 /// RHSVar. 5345 static void EmitOMPAggregateReduction( 5346 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5347 const VarDecl *RHSVar, 5348 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5349 const Expr *, const Expr *)> &RedOpGen, 5350 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5351 const Expr *UpExpr = nullptr) { 5352 // Perform element-by-element initialization. 5353 QualType ElementTy; 5354 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5355 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5356 5357 // Drill down to the base element type on both arrays. 5358 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5359 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5360 5361 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5362 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5363 // Cast from pointer to array type to pointer to single element. 5364 llvm::Value *LHSEnd = 5365 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5366 // The basic structure here is a while-do loop. 5367 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5368 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5369 llvm::Value *IsEmpty = 5370 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5371 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5372 5373 // Enter the loop body, making that address the current address. 5374 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5375 CGF.EmitBlock(BodyBB); 5376 5377 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5378 5379 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5380 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5381 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5382 Address RHSElementCurrent = Address::deprecated( 5383 RHSElementPHI, 5384 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5385 5386 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5387 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5388 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5389 Address LHSElementCurrent = Address::deprecated( 5390 LHSElementPHI, 5391 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5392 5393 // Emit copy. 5394 CodeGenFunction::OMPPrivateScope Scope(CGF); 5395 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5396 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5397 Scope.Privatize(); 5398 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5399 Scope.ForceCleanup(); 5400 5401 // Shift the address forward by one element. 5402 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5403 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5404 "omp.arraycpy.dest.element"); 5405 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5406 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5407 "omp.arraycpy.src.element"); 5408 // Check whether we've reached the end. 5409 llvm::Value *Done = 5410 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5411 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5412 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5413 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5414 5415 // Done. 5416 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5417 } 5418 5419 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5420 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5421 /// UDR combiner function. 5422 static void emitReductionCombiner(CodeGenFunction &CGF, 5423 const Expr *ReductionOp) { 5424 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5425 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5426 if (const auto *DRE = 5427 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5428 if (const auto *DRD = 5429 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5430 std::pair<llvm::Function *, llvm::Function *> Reduction = 5431 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5432 RValue Func = RValue::get(Reduction.first); 5433 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5434 CGF.EmitIgnoredExpr(ReductionOp); 5435 return; 5436 } 5437 CGF.EmitIgnoredExpr(ReductionOp); 5438 } 5439 5440 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5441 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5442 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5443 ArrayRef<const Expr *> ReductionOps) { 5444 ASTContext &C = CGM.getContext(); 5445 5446 // void reduction_func(void *LHSArg, void *RHSArg); 5447 FunctionArgList Args; 5448 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5449 ImplicitParamDecl::Other); 5450 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5451 ImplicitParamDecl::Other); 5452 Args.push_back(&LHSArg); 5453 Args.push_back(&RHSArg); 5454 const auto &CGFI = 5455 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5456 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5457 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5458 llvm::GlobalValue::InternalLinkage, Name, 5459 &CGM.getModule()); 5460 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5461 Fn->setDoesNotRecurse(); 5462 CodeGenFunction CGF(CGM); 5463 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5464 5465 // Dst = (void*[n])(LHSArg); 5466 // Src = (void*[n])(RHSArg); 5467 Address LHS = Address::deprecated( 5468 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5469 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType), 5470 CGF.getPointerAlign()); 5471 Address RHS = Address::deprecated( 5472 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5473 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType), 5474 CGF.getPointerAlign()); 5475 5476 // ... 5477 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5478 // ... 5479 CodeGenFunction::OMPPrivateScope Scope(CGF); 5480 const auto *IPriv = Privates.begin(); 5481 unsigned Idx = 0; 5482 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5483 const auto *RHSVar = 5484 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5485 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5486 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5487 }); 5488 const auto *LHSVar = 5489 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5490 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5491 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5492 }); 5493 QualType PrivTy = (*IPriv)->getType(); 5494 if (PrivTy->isVariablyModifiedType()) { 5495 // Get array size and emit VLA type. 5496 ++Idx; 5497 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5498 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5499 const VariableArrayType *VLA = 5500 CGF.getContext().getAsVariableArrayType(PrivTy); 5501 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5502 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5503 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5504 CGF.EmitVariablyModifiedType(PrivTy); 5505 } 5506 } 5507 Scope.Privatize(); 5508 IPriv = Privates.begin(); 5509 const auto *ILHS = LHSExprs.begin(); 5510 const auto *IRHS = RHSExprs.begin(); 5511 for (const Expr *E : ReductionOps) { 5512 if ((*IPriv)->getType()->isArrayType()) { 5513 // Emit reduction for array section. 5514 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5515 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5516 EmitOMPAggregateReduction( 5517 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5518 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5519 emitReductionCombiner(CGF, E); 5520 }); 5521 } else { 5522 // Emit reduction for array subscript or single variable. 5523 emitReductionCombiner(CGF, E); 5524 } 5525 ++IPriv; 5526 ++ILHS; 5527 ++IRHS; 5528 } 5529 Scope.ForceCleanup(); 5530 CGF.FinishFunction(); 5531 return Fn; 5532 } 5533 5534 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5535 const Expr *ReductionOp, 5536 const Expr *PrivateRef, 5537 const DeclRefExpr *LHS, 5538 const DeclRefExpr *RHS) { 5539 if (PrivateRef->getType()->isArrayType()) { 5540 // Emit reduction for array section. 5541 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5542 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5543 EmitOMPAggregateReduction( 5544 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5545 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5546 emitReductionCombiner(CGF, ReductionOp); 5547 }); 5548 } else { 5549 // Emit reduction for array subscript or single variable. 5550 emitReductionCombiner(CGF, ReductionOp); 5551 } 5552 } 5553 5554 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5555 ArrayRef<const Expr *> Privates, 5556 ArrayRef<const Expr *> LHSExprs, 5557 ArrayRef<const Expr *> RHSExprs, 5558 ArrayRef<const Expr *> ReductionOps, 5559 ReductionOptionsTy Options) { 5560 if (!CGF.HaveInsertPoint()) 5561 return; 5562 5563 bool WithNowait = Options.WithNowait; 5564 bool SimpleReduction = Options.SimpleReduction; 5565 5566 // Next code should be emitted for reduction: 5567 // 5568 // static kmp_critical_name lock = { 0 }; 5569 // 5570 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5571 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5572 // ... 5573 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5574 // *(Type<n>-1*)rhs[<n>-1]); 5575 // } 5576 // 5577 // ... 5578 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5579 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5580 // RedList, reduce_func, &<lock>)) { 5581 // case 1: 5582 // ... 5583 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5584 // ... 5585 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5586 // break; 5587 // case 2: 5588 // ... 5589 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5590 // ... 5591 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5592 // break; 5593 // default:; 5594 // } 5595 // 5596 // if SimpleReduction is true, only the next code is generated: 5597 // ... 5598 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5599 // ... 5600 5601 ASTContext &C = CGM.getContext(); 5602 5603 if (SimpleReduction) { 5604 CodeGenFunction::RunCleanupsScope Scope(CGF); 5605 const auto *IPriv = Privates.begin(); 5606 const auto *ILHS = LHSExprs.begin(); 5607 const auto *IRHS = RHSExprs.begin(); 5608 for (const Expr *E : ReductionOps) { 5609 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5610 cast<DeclRefExpr>(*IRHS)); 5611 ++IPriv; 5612 ++ILHS; 5613 ++IRHS; 5614 } 5615 return; 5616 } 5617 5618 // 1. Build a list of reduction variables. 5619 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5620 auto Size = RHSExprs.size(); 5621 for (const Expr *E : Privates) { 5622 if (E->getType()->isVariablyModifiedType()) 5623 // Reserve place for array size. 5624 ++Size; 5625 } 5626 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5627 QualType ReductionArrayTy = 5628 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5629 /*IndexTypeQuals=*/0); 5630 Address ReductionList = 5631 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5632 const auto *IPriv = Privates.begin(); 5633 unsigned Idx = 0; 5634 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5635 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5636 CGF.Builder.CreateStore( 5637 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5638 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5639 Elem); 5640 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5641 // Store array size. 5642 ++Idx; 5643 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5644 llvm::Value *Size = CGF.Builder.CreateIntCast( 5645 CGF.getVLASize( 5646 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5647 .NumElts, 5648 CGF.SizeTy, /*isSigned=*/false); 5649 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5650 Elem); 5651 } 5652 } 5653 5654 // 2. Emit reduce_func(). 5655 llvm::Function *ReductionFn = emitReductionFunction( 5656 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5657 LHSExprs, RHSExprs, ReductionOps); 5658 5659 // 3. Create static kmp_critical_name lock = { 0 }; 5660 std::string Name = getName({"reduction"}); 5661 llvm::Value *Lock = getCriticalRegionLock(Name); 5662 5663 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5664 // RedList, reduce_func, &<lock>); 5665 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5666 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5667 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5668 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5669 ReductionList.getPointer(), CGF.VoidPtrTy); 5670 llvm::Value *Args[] = { 5671 IdentTLoc, // ident_t *<loc> 5672 ThreadId, // i32 <gtid> 5673 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5674 ReductionArrayTySize, // size_type sizeof(RedList) 5675 RL, // void *RedList 5676 ReductionFn, // void (*) (void *, void *) <reduce_func> 5677 Lock // kmp_critical_name *&<lock> 5678 }; 5679 llvm::Value *Res = CGF.EmitRuntimeCall( 5680 OMPBuilder.getOrCreateRuntimeFunction( 5681 CGM.getModule(), 5682 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5683 Args); 5684 5685 // 5. Build switch(res) 5686 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5687 llvm::SwitchInst *SwInst = 5688 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5689 5690 // 6. Build case 1: 5691 // ... 5692 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5693 // ... 5694 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5695 // break; 5696 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5697 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5698 CGF.EmitBlock(Case1BB); 5699 5700 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5701 llvm::Value *EndArgs[] = { 5702 IdentTLoc, // ident_t *<loc> 5703 ThreadId, // i32 <gtid> 5704 Lock // kmp_critical_name *&<lock> 5705 }; 5706 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5707 CodeGenFunction &CGF, PrePostActionTy &Action) { 5708 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5709 const auto *IPriv = Privates.begin(); 5710 const auto *ILHS = LHSExprs.begin(); 5711 const auto *IRHS = RHSExprs.begin(); 5712 for (const Expr *E : ReductionOps) { 5713 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5714 cast<DeclRefExpr>(*IRHS)); 5715 ++IPriv; 5716 ++ILHS; 5717 ++IRHS; 5718 } 5719 }; 5720 RegionCodeGenTy RCG(CodeGen); 5721 CommonActionTy Action( 5722 nullptr, llvm::None, 5723 OMPBuilder.getOrCreateRuntimeFunction( 5724 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5725 : OMPRTL___kmpc_end_reduce), 5726 EndArgs); 5727 RCG.setAction(Action); 5728 RCG(CGF); 5729 5730 CGF.EmitBranch(DefaultBB); 5731 5732 // 7. Build case 2: 5733 // ... 5734 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5735 // ... 5736 // break; 5737 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5738 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5739 CGF.EmitBlock(Case2BB); 5740 5741 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5742 CodeGenFunction &CGF, PrePostActionTy &Action) { 5743 const auto *ILHS = LHSExprs.begin(); 5744 const auto *IRHS = RHSExprs.begin(); 5745 const auto *IPriv = Privates.begin(); 5746 for (const Expr *E : ReductionOps) { 5747 const Expr *XExpr = nullptr; 5748 const Expr *EExpr = nullptr; 5749 const Expr *UpExpr = nullptr; 5750 BinaryOperatorKind BO = BO_Comma; 5751 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5752 if (BO->getOpcode() == BO_Assign) { 5753 XExpr = BO->getLHS(); 5754 UpExpr = BO->getRHS(); 5755 } 5756 } 5757 // Try to emit update expression as a simple atomic. 5758 const Expr *RHSExpr = UpExpr; 5759 if (RHSExpr) { 5760 // Analyze RHS part of the whole expression. 5761 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5762 RHSExpr->IgnoreParenImpCasts())) { 5763 // If this is a conditional operator, analyze its condition for 5764 // min/max reduction operator. 5765 RHSExpr = ACO->getCond(); 5766 } 5767 if (const auto *BORHS = 5768 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5769 EExpr = BORHS->getRHS(); 5770 BO = BORHS->getOpcode(); 5771 } 5772 } 5773 if (XExpr) { 5774 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5775 auto &&AtomicRedGen = [BO, VD, 5776 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5777 const Expr *EExpr, const Expr *UpExpr) { 5778 LValue X = CGF.EmitLValue(XExpr); 5779 RValue E; 5780 if (EExpr) 5781 E = CGF.EmitAnyExpr(EExpr); 5782 CGF.EmitOMPAtomicSimpleUpdateExpr( 5783 X, E, BO, /*IsXLHSInRHSPart=*/true, 5784 llvm::AtomicOrdering::Monotonic, Loc, 5785 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5786 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5787 PrivateScope.addPrivate( 5788 VD, [&CGF, VD, XRValue, Loc]() { 5789 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5790 CGF.emitOMPSimpleStore( 5791 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5792 VD->getType().getNonReferenceType(), Loc); 5793 return LHSTemp; 5794 }); 5795 (void)PrivateScope.Privatize(); 5796 return CGF.EmitAnyExpr(UpExpr); 5797 }); 5798 }; 5799 if ((*IPriv)->getType()->isArrayType()) { 5800 // Emit atomic reduction for array section. 5801 const auto *RHSVar = 5802 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5803 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5804 AtomicRedGen, XExpr, EExpr, UpExpr); 5805 } else { 5806 // Emit atomic reduction for array subscript or single variable. 5807 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5808 } 5809 } else { 5810 // Emit as a critical region. 5811 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5812 const Expr *, const Expr *) { 5813 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5814 std::string Name = RT.getName({"atomic_reduction"}); 5815 RT.emitCriticalRegion( 5816 CGF, Name, 5817 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5818 Action.Enter(CGF); 5819 emitReductionCombiner(CGF, E); 5820 }, 5821 Loc); 5822 }; 5823 if ((*IPriv)->getType()->isArrayType()) { 5824 const auto *LHSVar = 5825 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5826 const auto *RHSVar = 5827 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5828 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5829 CritRedGen); 5830 } else { 5831 CritRedGen(CGF, nullptr, nullptr, nullptr); 5832 } 5833 } 5834 ++ILHS; 5835 ++IRHS; 5836 ++IPriv; 5837 } 5838 }; 5839 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5840 if (!WithNowait) { 5841 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5842 llvm::Value *EndArgs[] = { 5843 IdentTLoc, // ident_t *<loc> 5844 ThreadId, // i32 <gtid> 5845 Lock // kmp_critical_name *&<lock> 5846 }; 5847 CommonActionTy Action(nullptr, llvm::None, 5848 OMPBuilder.getOrCreateRuntimeFunction( 5849 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5850 EndArgs); 5851 AtomicRCG.setAction(Action); 5852 AtomicRCG(CGF); 5853 } else { 5854 AtomicRCG(CGF); 5855 } 5856 5857 CGF.EmitBranch(DefaultBB); 5858 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5859 } 5860 5861 /// Generates unique name for artificial threadprivate variables. 5862 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5863 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5864 const Expr *Ref) { 5865 SmallString<256> Buffer; 5866 llvm::raw_svector_ostream Out(Buffer); 5867 const clang::DeclRefExpr *DE; 5868 const VarDecl *D = ::getBaseDecl(Ref, DE); 5869 if (!D) 5870 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5871 D = D->getCanonicalDecl(); 5872 std::string Name = CGM.getOpenMPRuntime().getName( 5873 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5874 Out << Prefix << Name << "_" 5875 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5876 return std::string(Out.str()); 5877 } 5878 5879 /// Emits reduction initializer function: 5880 /// \code 5881 /// void @.red_init(void* %arg, void* %orig) { 5882 /// %0 = bitcast void* %arg to <type>* 5883 /// store <type> <init>, <type>* %0 5884 /// ret void 5885 /// } 5886 /// \endcode 5887 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5888 SourceLocation Loc, 5889 ReductionCodeGen &RCG, unsigned N) { 5890 ASTContext &C = CGM.getContext(); 5891 QualType VoidPtrTy = C.VoidPtrTy; 5892 VoidPtrTy.addRestrict(); 5893 FunctionArgList Args; 5894 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5895 ImplicitParamDecl::Other); 5896 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5897 ImplicitParamDecl::Other); 5898 Args.emplace_back(&Param); 5899 Args.emplace_back(&ParamOrig); 5900 const auto &FnInfo = 5901 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5902 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5903 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5904 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5905 Name, &CGM.getModule()); 5906 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5907 Fn->setDoesNotRecurse(); 5908 CodeGenFunction CGF(CGM); 5909 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5910 Address PrivateAddr = CGF.EmitLoadOfPointer( 5911 CGF.GetAddrOfLocalVar(&Param), 5912 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5913 llvm::Value *Size = nullptr; 5914 // If the size of the reduction item is non-constant, load it from global 5915 // threadprivate variable. 5916 if (RCG.getSizes(N).second) { 5917 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5918 CGF, CGM.getContext().getSizeType(), 5919 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5920 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5921 CGM.getContext().getSizeType(), Loc); 5922 } 5923 RCG.emitAggregateType(CGF, N, Size); 5924 Address OrigAddr = Address::invalid(); 5925 // If initializer uses initializer from declare reduction construct, emit a 5926 // pointer to the address of the original reduction item (reuired by reduction 5927 // initializer) 5928 if (RCG.usesReductionInitializer(N)) { 5929 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5930 OrigAddr = CGF.EmitLoadOfPointer( 5931 SharedAddr, 5932 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5933 } 5934 // Emit the initializer: 5935 // %0 = bitcast void* %arg to <type>* 5936 // store <type> <init>, <type>* %0 5937 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5938 [](CodeGenFunction &) { return false; }); 5939 CGF.FinishFunction(); 5940 return Fn; 5941 } 5942 5943 /// Emits reduction combiner function: 5944 /// \code 5945 /// void @.red_comb(void* %arg0, void* %arg1) { 5946 /// %lhs = bitcast void* %arg0 to <type>* 5947 /// %rhs = bitcast void* %arg1 to <type>* 5948 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5949 /// store <type> %2, <type>* %lhs 5950 /// ret void 5951 /// } 5952 /// \endcode 5953 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5954 SourceLocation Loc, 5955 ReductionCodeGen &RCG, unsigned N, 5956 const Expr *ReductionOp, 5957 const Expr *LHS, const Expr *RHS, 5958 const Expr *PrivateRef) { 5959 ASTContext &C = CGM.getContext(); 5960 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5961 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5962 FunctionArgList Args; 5963 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5964 C.VoidPtrTy, ImplicitParamDecl::Other); 5965 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5966 ImplicitParamDecl::Other); 5967 Args.emplace_back(&ParamInOut); 5968 Args.emplace_back(&ParamIn); 5969 const auto &FnInfo = 5970 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5971 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5972 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5973 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5974 Name, &CGM.getModule()); 5975 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5976 Fn->setDoesNotRecurse(); 5977 CodeGenFunction CGF(CGM); 5978 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5979 llvm::Value *Size = nullptr; 5980 // If the size of the reduction item is non-constant, load it from global 5981 // threadprivate variable. 5982 if (RCG.getSizes(N).second) { 5983 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5984 CGF, CGM.getContext().getSizeType(), 5985 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5986 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5987 CGM.getContext().getSizeType(), Loc); 5988 } 5989 RCG.emitAggregateType(CGF, N, Size); 5990 // Remap lhs and rhs variables to the addresses of the function arguments. 5991 // %lhs = bitcast void* %arg0 to <type>* 5992 // %rhs = bitcast void* %arg1 to <type>* 5993 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5994 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5995 // Pull out the pointer to the variable. 5996 Address PtrAddr = CGF.EmitLoadOfPointer( 5997 CGF.GetAddrOfLocalVar(&ParamInOut), 5998 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5999 return CGF.Builder.CreateElementBitCast( 6000 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6001 }); 6002 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6003 // Pull out the pointer to the variable. 6004 Address PtrAddr = CGF.EmitLoadOfPointer( 6005 CGF.GetAddrOfLocalVar(&ParamIn), 6006 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6007 return CGF.Builder.CreateElementBitCast( 6008 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6009 }); 6010 PrivateScope.Privatize(); 6011 // Emit the combiner body: 6012 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6013 // store <type> %2, <type>* %lhs 6014 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6015 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6016 cast<DeclRefExpr>(RHS)); 6017 CGF.FinishFunction(); 6018 return Fn; 6019 } 6020 6021 /// Emits reduction finalizer function: 6022 /// \code 6023 /// void @.red_fini(void* %arg) { 6024 /// %0 = bitcast void* %arg to <type>* 6025 /// <destroy>(<type>* %0) 6026 /// ret void 6027 /// } 6028 /// \endcode 6029 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6030 SourceLocation Loc, 6031 ReductionCodeGen &RCG, unsigned N) { 6032 if (!RCG.needCleanups(N)) 6033 return nullptr; 6034 ASTContext &C = CGM.getContext(); 6035 FunctionArgList Args; 6036 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6037 ImplicitParamDecl::Other); 6038 Args.emplace_back(&Param); 6039 const auto &FnInfo = 6040 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6041 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6042 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6043 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6044 Name, &CGM.getModule()); 6045 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6046 Fn->setDoesNotRecurse(); 6047 CodeGenFunction CGF(CGM); 6048 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6049 Address PrivateAddr = CGF.EmitLoadOfPointer( 6050 CGF.GetAddrOfLocalVar(&Param), 6051 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6052 llvm::Value *Size = nullptr; 6053 // If the size of the reduction item is non-constant, load it from global 6054 // threadprivate variable. 6055 if (RCG.getSizes(N).second) { 6056 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6057 CGF, CGM.getContext().getSizeType(), 6058 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6059 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6060 CGM.getContext().getSizeType(), Loc); 6061 } 6062 RCG.emitAggregateType(CGF, N, Size); 6063 // Emit the finalizer body: 6064 // <destroy>(<type>* %0) 6065 RCG.emitCleanups(CGF, N, PrivateAddr); 6066 CGF.FinishFunction(Loc); 6067 return Fn; 6068 } 6069 6070 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6071 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6072 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6073 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6074 return nullptr; 6075 6076 // Build typedef struct: 6077 // kmp_taskred_input { 6078 // void *reduce_shar; // shared reduction item 6079 // void *reduce_orig; // original reduction item used for initialization 6080 // size_t reduce_size; // size of data item 6081 // void *reduce_init; // data initialization routine 6082 // void *reduce_fini; // data finalization routine 6083 // void *reduce_comb; // data combiner routine 6084 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6085 // } kmp_taskred_input_t; 6086 ASTContext &C = CGM.getContext(); 6087 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6088 RD->startDefinition(); 6089 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6090 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6091 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6092 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6093 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6094 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6095 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6096 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6097 RD->completeDefinition(); 6098 QualType RDType = C.getRecordType(RD); 6099 unsigned Size = Data.ReductionVars.size(); 6100 llvm::APInt ArraySize(/*numBits=*/64, Size); 6101 QualType ArrayRDType = C.getConstantArrayType( 6102 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6103 // kmp_task_red_input_t .rd_input.[Size]; 6104 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6105 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6106 Data.ReductionCopies, Data.ReductionOps); 6107 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6108 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6109 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6110 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6111 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6112 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 6113 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6114 ".rd_input.gep."); 6115 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6116 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6117 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6118 RCG.emitSharedOrigLValue(CGF, Cnt); 6119 llvm::Value *CastedShared = 6120 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6121 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6122 // ElemLVal.reduce_orig = &Origs[Cnt]; 6123 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6124 llvm::Value *CastedOrig = 6125 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6126 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6127 RCG.emitAggregateType(CGF, Cnt); 6128 llvm::Value *SizeValInChars; 6129 llvm::Value *SizeVal; 6130 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6131 // We use delayed creation/initialization for VLAs and array sections. It is 6132 // required because runtime does not provide the way to pass the sizes of 6133 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6134 // threadprivate global variables are used to store these values and use 6135 // them in the functions. 6136 bool DelayedCreation = !!SizeVal; 6137 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6138 /*isSigned=*/false); 6139 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6140 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6141 // ElemLVal.reduce_init = init; 6142 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6143 llvm::Value *InitAddr = 6144 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6145 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6146 // ElemLVal.reduce_fini = fini; 6147 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6148 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6149 llvm::Value *FiniAddr = Fini 6150 ? CGF.EmitCastToVoidPtr(Fini) 6151 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6152 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6153 // ElemLVal.reduce_comb = comb; 6154 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6155 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6156 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6157 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6158 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6159 // ElemLVal.flags = 0; 6160 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6161 if (DelayedCreation) { 6162 CGF.EmitStoreOfScalar( 6163 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6164 FlagsLVal); 6165 } else 6166 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6167 FlagsLVal.getType()); 6168 } 6169 if (Data.IsReductionWithTaskMod) { 6170 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6171 // is_ws, int num, void *data); 6172 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6173 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6174 CGM.IntTy, /*isSigned=*/true); 6175 llvm::Value *Args[] = { 6176 IdentTLoc, GTid, 6177 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6178 /*isSigned=*/true), 6179 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6180 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6181 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6182 return CGF.EmitRuntimeCall( 6183 OMPBuilder.getOrCreateRuntimeFunction( 6184 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6185 Args); 6186 } 6187 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6188 llvm::Value *Args[] = { 6189 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6190 /*isSigned=*/true), 6191 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6192 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6193 CGM.VoidPtrTy)}; 6194 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6195 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6196 Args); 6197 } 6198 6199 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6200 SourceLocation Loc, 6201 bool IsWorksharingReduction) { 6202 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6203 // is_ws, int num, void *data); 6204 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6205 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6206 CGM.IntTy, /*isSigned=*/true); 6207 llvm::Value *Args[] = {IdentTLoc, GTid, 6208 llvm::ConstantInt::get(CGM.IntTy, 6209 IsWorksharingReduction ? 1 : 0, 6210 /*isSigned=*/true)}; 6211 (void)CGF.EmitRuntimeCall( 6212 OMPBuilder.getOrCreateRuntimeFunction( 6213 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6214 Args); 6215 } 6216 6217 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6218 SourceLocation Loc, 6219 ReductionCodeGen &RCG, 6220 unsigned N) { 6221 auto Sizes = RCG.getSizes(N); 6222 // Emit threadprivate global variable if the type is non-constant 6223 // (Sizes.second = nullptr). 6224 if (Sizes.second) { 6225 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6226 /*isSigned=*/false); 6227 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6228 CGF, CGM.getContext().getSizeType(), 6229 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6230 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6231 } 6232 } 6233 6234 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6235 SourceLocation Loc, 6236 llvm::Value *ReductionsPtr, 6237 LValue SharedLVal) { 6238 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6239 // *d); 6240 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6241 CGM.IntTy, 6242 /*isSigned=*/true), 6243 ReductionsPtr, 6244 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6245 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6246 return Address::deprecated( 6247 CGF.EmitRuntimeCall( 6248 OMPBuilder.getOrCreateRuntimeFunction( 6249 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6250 Args), 6251 SharedLVal.getAlignment()); 6252 } 6253 6254 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6255 const OMPTaskDataTy &Data) { 6256 if (!CGF.HaveInsertPoint()) 6257 return; 6258 6259 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6260 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6261 OMPBuilder.createTaskwait(CGF.Builder); 6262 } else { 6263 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6264 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6265 auto &M = CGM.getModule(); 6266 Address DependenciesArray = Address::invalid(); 6267 llvm::Value *NumOfElements; 6268 std::tie(NumOfElements, DependenciesArray) = 6269 emitDependClause(CGF, Data.Dependences, Loc); 6270 llvm::Value *DepWaitTaskArgs[6]; 6271 if (!Data.Dependences.empty()) { 6272 DepWaitTaskArgs[0] = UpLoc; 6273 DepWaitTaskArgs[1] = ThreadID; 6274 DepWaitTaskArgs[2] = NumOfElements; 6275 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6276 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6277 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6278 6279 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6280 6281 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6282 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6283 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6284 // is specified. 6285 CGF.EmitRuntimeCall( 6286 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6287 DepWaitTaskArgs); 6288 6289 } else { 6290 6291 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6292 // global_tid); 6293 llvm::Value *Args[] = {UpLoc, ThreadID}; 6294 // Ignore return result until untied tasks are supported. 6295 CGF.EmitRuntimeCall( 6296 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6297 Args); 6298 } 6299 } 6300 6301 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6302 Region->emitUntiedSwitch(CGF); 6303 } 6304 6305 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6306 OpenMPDirectiveKind InnerKind, 6307 const RegionCodeGenTy &CodeGen, 6308 bool HasCancel) { 6309 if (!CGF.HaveInsertPoint()) 6310 return; 6311 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6312 InnerKind != OMPD_critical && 6313 InnerKind != OMPD_master && 6314 InnerKind != OMPD_masked); 6315 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6316 } 6317 6318 namespace { 6319 enum RTCancelKind { 6320 CancelNoreq = 0, 6321 CancelParallel = 1, 6322 CancelLoop = 2, 6323 CancelSections = 3, 6324 CancelTaskgroup = 4 6325 }; 6326 } // anonymous namespace 6327 6328 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6329 RTCancelKind CancelKind = CancelNoreq; 6330 if (CancelRegion == OMPD_parallel) 6331 CancelKind = CancelParallel; 6332 else if (CancelRegion == OMPD_for) 6333 CancelKind = CancelLoop; 6334 else if (CancelRegion == OMPD_sections) 6335 CancelKind = CancelSections; 6336 else { 6337 assert(CancelRegion == OMPD_taskgroup); 6338 CancelKind = CancelTaskgroup; 6339 } 6340 return CancelKind; 6341 } 6342 6343 void CGOpenMPRuntime::emitCancellationPointCall( 6344 CodeGenFunction &CGF, SourceLocation Loc, 6345 OpenMPDirectiveKind CancelRegion) { 6346 if (!CGF.HaveInsertPoint()) 6347 return; 6348 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6349 // global_tid, kmp_int32 cncl_kind); 6350 if (auto *OMPRegionInfo = 6351 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6352 // For 'cancellation point taskgroup', the task region info may not have a 6353 // cancel. This may instead happen in another adjacent task. 6354 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6355 llvm::Value *Args[] = { 6356 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6357 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6358 // Ignore return result until untied tasks are supported. 6359 llvm::Value *Result = CGF.EmitRuntimeCall( 6360 OMPBuilder.getOrCreateRuntimeFunction( 6361 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6362 Args); 6363 // if (__kmpc_cancellationpoint()) { 6364 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6365 // exit from construct; 6366 // } 6367 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6368 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6369 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6370 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6371 CGF.EmitBlock(ExitBB); 6372 if (CancelRegion == OMPD_parallel) 6373 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6374 // exit from construct; 6375 CodeGenFunction::JumpDest CancelDest = 6376 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6377 CGF.EmitBranchThroughCleanup(CancelDest); 6378 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6379 } 6380 } 6381 } 6382 6383 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6384 const Expr *IfCond, 6385 OpenMPDirectiveKind CancelRegion) { 6386 if (!CGF.HaveInsertPoint()) 6387 return; 6388 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6389 // kmp_int32 cncl_kind); 6390 auto &M = CGM.getModule(); 6391 if (auto *OMPRegionInfo = 6392 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6393 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6394 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6395 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6396 llvm::Value *Args[] = { 6397 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6398 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6399 // Ignore return result until untied tasks are supported. 6400 llvm::Value *Result = CGF.EmitRuntimeCall( 6401 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6402 // if (__kmpc_cancel()) { 6403 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6404 // exit from construct; 6405 // } 6406 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6407 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6408 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6409 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6410 CGF.EmitBlock(ExitBB); 6411 if (CancelRegion == OMPD_parallel) 6412 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6413 // exit from construct; 6414 CodeGenFunction::JumpDest CancelDest = 6415 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6416 CGF.EmitBranchThroughCleanup(CancelDest); 6417 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6418 }; 6419 if (IfCond) { 6420 emitIfClause(CGF, IfCond, ThenGen, 6421 [](CodeGenFunction &, PrePostActionTy &) {}); 6422 } else { 6423 RegionCodeGenTy ThenRCG(ThenGen); 6424 ThenRCG(CGF); 6425 } 6426 } 6427 } 6428 6429 namespace { 6430 /// Cleanup action for uses_allocators support. 6431 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6432 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6433 6434 public: 6435 OMPUsesAllocatorsActionTy( 6436 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6437 : Allocators(Allocators) {} 6438 void Enter(CodeGenFunction &CGF) override { 6439 if (!CGF.HaveInsertPoint()) 6440 return; 6441 for (const auto &AllocatorData : Allocators) { 6442 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6443 CGF, AllocatorData.first, AllocatorData.second); 6444 } 6445 } 6446 void Exit(CodeGenFunction &CGF) override { 6447 if (!CGF.HaveInsertPoint()) 6448 return; 6449 for (const auto &AllocatorData : Allocators) { 6450 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6451 AllocatorData.first); 6452 } 6453 } 6454 }; 6455 } // namespace 6456 6457 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6458 const OMPExecutableDirective &D, StringRef ParentName, 6459 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6460 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6461 assert(!ParentName.empty() && "Invalid target region parent name!"); 6462 HasEmittedTargetRegion = true; 6463 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6464 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6465 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6466 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6467 if (!D.AllocatorTraits) 6468 continue; 6469 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6470 } 6471 } 6472 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6473 CodeGen.setAction(UsesAllocatorAction); 6474 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6475 IsOffloadEntry, CodeGen); 6476 } 6477 6478 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6479 const Expr *Allocator, 6480 const Expr *AllocatorTraits) { 6481 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6482 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6483 // Use default memspace handle. 6484 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6485 llvm::Value *NumTraits = llvm::ConstantInt::get( 6486 CGF.IntTy, cast<ConstantArrayType>( 6487 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6488 ->getSize() 6489 .getLimitedValue()); 6490 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6491 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6492 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6493 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6494 AllocatorTraitsLVal.getBaseInfo(), 6495 AllocatorTraitsLVal.getTBAAInfo()); 6496 llvm::Value *Traits = 6497 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6498 6499 llvm::Value *AllocatorVal = 6500 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6501 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6502 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6503 // Store to allocator. 6504 CGF.EmitVarDecl(*cast<VarDecl>( 6505 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6506 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6507 AllocatorVal = 6508 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6509 Allocator->getType(), Allocator->getExprLoc()); 6510 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6511 } 6512 6513 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6514 const Expr *Allocator) { 6515 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6516 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6517 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6518 llvm::Value *AllocatorVal = 6519 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6520 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6521 CGF.getContext().VoidPtrTy, 6522 Allocator->getExprLoc()); 6523 (void)CGF.EmitRuntimeCall( 6524 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6525 OMPRTL___kmpc_destroy_allocator), 6526 {ThreadId, AllocatorVal}); 6527 } 6528 6529 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6530 const OMPExecutableDirective &D, StringRef ParentName, 6531 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6532 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6533 // Create a unique name for the entry function using the source location 6534 // information of the current target region. The name will be something like: 6535 // 6536 // __omp_offloading_DD_FFFF_PP_lBB 6537 // 6538 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6539 // mangled name of the function that encloses the target region and BB is the 6540 // line number of the target region. 6541 6542 const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice || 6543 !CGM.getLangOpts().OpenMPOffloadMandatory; 6544 unsigned DeviceID; 6545 unsigned FileID; 6546 unsigned Line; 6547 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6548 Line); 6549 SmallString<64> EntryFnName; 6550 { 6551 llvm::raw_svector_ostream OS(EntryFnName); 6552 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6553 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6554 } 6555 6556 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6557 6558 CodeGenFunction CGF(CGM, true); 6559 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6560 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6561 6562 if (BuildOutlinedFn) 6563 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6564 6565 // If this target outline function is not an offload entry, we don't need to 6566 // register it. 6567 if (!IsOffloadEntry) 6568 return; 6569 6570 // The target region ID is used by the runtime library to identify the current 6571 // target region, so it only has to be unique and not necessarily point to 6572 // anything. It could be the pointer to the outlined function that implements 6573 // the target region, but we aren't using that so that the compiler doesn't 6574 // need to keep that, and could therefore inline the host function if proven 6575 // worthwhile during optimization. In the other hand, if emitting code for the 6576 // device, the ID has to be the function address so that it can retrieved from 6577 // the offloading entry and launched by the runtime library. We also mark the 6578 // outlined function to have external linkage in case we are emitting code for 6579 // the device, because these functions will be entry points to the device. 6580 6581 if (CGM.getLangOpts().OpenMPIsDevice) { 6582 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6583 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6584 OutlinedFn->setDSOLocal(false); 6585 if (CGM.getTriple().isAMDGCN()) 6586 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6587 } else { 6588 std::string Name = getName({EntryFnName, "region_id"}); 6589 OutlinedFnID = new llvm::GlobalVariable( 6590 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6591 llvm::GlobalValue::WeakAnyLinkage, 6592 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6593 } 6594 6595 // If we do not allow host fallback we still need a named address to use. 6596 llvm::Constant *TargetRegionEntryAddr = OutlinedFn; 6597 if (!BuildOutlinedFn) { 6598 assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) && 6599 "Named kernel already exists?"); 6600 TargetRegionEntryAddr = new llvm::GlobalVariable( 6601 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6602 llvm::GlobalValue::InternalLinkage, 6603 llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName); 6604 } 6605 6606 // Register the information for the entry associated with this target region. 6607 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6608 DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID, 6609 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6610 6611 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6612 int32_t DefaultValTeams = -1; 6613 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6614 if (DefaultValTeams > 0 && OutlinedFn) { 6615 OutlinedFn->addFnAttr("omp_target_num_teams", 6616 std::to_string(DefaultValTeams)); 6617 } 6618 int32_t DefaultValThreads = -1; 6619 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6620 if (DefaultValThreads > 0 && OutlinedFn) { 6621 OutlinedFn->addFnAttr("omp_target_thread_limit", 6622 std::to_string(DefaultValThreads)); 6623 } 6624 6625 if (BuildOutlinedFn) 6626 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6627 } 6628 6629 /// Checks if the expression is constant or does not have non-trivial function 6630 /// calls. 6631 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6632 // We can skip constant expressions. 6633 // We can skip expressions with trivial calls or simple expressions. 6634 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6635 !E->hasNonTrivialCall(Ctx)) && 6636 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6637 } 6638 6639 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6640 const Stmt *Body) { 6641 const Stmt *Child = Body->IgnoreContainers(); 6642 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6643 Child = nullptr; 6644 for (const Stmt *S : C->body()) { 6645 if (const auto *E = dyn_cast<Expr>(S)) { 6646 if (isTrivial(Ctx, E)) 6647 continue; 6648 } 6649 // Some of the statements can be ignored. 6650 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6651 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6652 continue; 6653 // Analyze declarations. 6654 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6655 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6656 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6657 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6658 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6659 isa<UsingDirectiveDecl>(D) || 6660 isa<OMPDeclareReductionDecl>(D) || 6661 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6662 return true; 6663 const auto *VD = dyn_cast<VarDecl>(D); 6664 if (!VD) 6665 return false; 6666 return VD->hasGlobalStorage() || !VD->isUsed(); 6667 })) 6668 continue; 6669 } 6670 // Found multiple children - cannot get the one child only. 6671 if (Child) 6672 return nullptr; 6673 Child = S; 6674 } 6675 if (Child) 6676 Child = Child->IgnoreContainers(); 6677 } 6678 return Child; 6679 } 6680 6681 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6682 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6683 int32_t &DefaultVal) { 6684 6685 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6686 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6687 "Expected target-based executable directive."); 6688 switch (DirectiveKind) { 6689 case OMPD_target: { 6690 const auto *CS = D.getInnermostCapturedStmt(); 6691 const auto *Body = 6692 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6693 const Stmt *ChildStmt = 6694 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6695 if (const auto *NestedDir = 6696 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6697 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6698 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6699 const Expr *NumTeams = 6700 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6701 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6702 if (auto Constant = 6703 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6704 DefaultVal = Constant->getExtValue(); 6705 return NumTeams; 6706 } 6707 DefaultVal = 0; 6708 return nullptr; 6709 } 6710 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6711 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6712 DefaultVal = 1; 6713 return nullptr; 6714 } 6715 DefaultVal = 1; 6716 return nullptr; 6717 } 6718 // A value of -1 is used to check if we need to emit no teams region 6719 DefaultVal = -1; 6720 return nullptr; 6721 } 6722 case OMPD_target_teams: 6723 case OMPD_target_teams_distribute: 6724 case OMPD_target_teams_distribute_simd: 6725 case OMPD_target_teams_distribute_parallel_for: 6726 case OMPD_target_teams_distribute_parallel_for_simd: { 6727 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6728 const Expr *NumTeams = 6729 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6730 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6731 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6732 DefaultVal = Constant->getExtValue(); 6733 return NumTeams; 6734 } 6735 DefaultVal = 0; 6736 return nullptr; 6737 } 6738 case OMPD_target_parallel: 6739 case OMPD_target_parallel_for: 6740 case OMPD_target_parallel_for_simd: 6741 case OMPD_target_simd: 6742 DefaultVal = 1; 6743 return nullptr; 6744 case OMPD_parallel: 6745 case OMPD_for: 6746 case OMPD_parallel_for: 6747 case OMPD_parallel_master: 6748 case OMPD_parallel_sections: 6749 case OMPD_for_simd: 6750 case OMPD_parallel_for_simd: 6751 case OMPD_cancel: 6752 case OMPD_cancellation_point: 6753 case OMPD_ordered: 6754 case OMPD_threadprivate: 6755 case OMPD_allocate: 6756 case OMPD_task: 6757 case OMPD_simd: 6758 case OMPD_tile: 6759 case OMPD_unroll: 6760 case OMPD_sections: 6761 case OMPD_section: 6762 case OMPD_single: 6763 case OMPD_master: 6764 case OMPD_critical: 6765 case OMPD_taskyield: 6766 case OMPD_barrier: 6767 case OMPD_taskwait: 6768 case OMPD_taskgroup: 6769 case OMPD_atomic: 6770 case OMPD_flush: 6771 case OMPD_depobj: 6772 case OMPD_scan: 6773 case OMPD_teams: 6774 case OMPD_target_data: 6775 case OMPD_target_exit_data: 6776 case OMPD_target_enter_data: 6777 case OMPD_distribute: 6778 case OMPD_distribute_simd: 6779 case OMPD_distribute_parallel_for: 6780 case OMPD_distribute_parallel_for_simd: 6781 case OMPD_teams_distribute: 6782 case OMPD_teams_distribute_simd: 6783 case OMPD_teams_distribute_parallel_for: 6784 case OMPD_teams_distribute_parallel_for_simd: 6785 case OMPD_target_update: 6786 case OMPD_declare_simd: 6787 case OMPD_declare_variant: 6788 case OMPD_begin_declare_variant: 6789 case OMPD_end_declare_variant: 6790 case OMPD_declare_target: 6791 case OMPD_end_declare_target: 6792 case OMPD_declare_reduction: 6793 case OMPD_declare_mapper: 6794 case OMPD_taskloop: 6795 case OMPD_taskloop_simd: 6796 case OMPD_master_taskloop: 6797 case OMPD_master_taskloop_simd: 6798 case OMPD_parallel_master_taskloop: 6799 case OMPD_parallel_master_taskloop_simd: 6800 case OMPD_requires: 6801 case OMPD_metadirective: 6802 case OMPD_unknown: 6803 break; 6804 default: 6805 break; 6806 } 6807 llvm_unreachable("Unexpected directive kind."); 6808 } 6809 6810 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6811 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6812 assert(!CGF.getLangOpts().OpenMPIsDevice && 6813 "Clauses associated with the teams directive expected to be emitted " 6814 "only for the host!"); 6815 CGBuilderTy &Bld = CGF.Builder; 6816 int32_t DefaultNT = -1; 6817 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6818 if (NumTeams != nullptr) { 6819 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6820 6821 switch (DirectiveKind) { 6822 case OMPD_target: { 6823 const auto *CS = D.getInnermostCapturedStmt(); 6824 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6825 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6826 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6827 /*IgnoreResultAssign*/ true); 6828 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6829 /*isSigned=*/true); 6830 } 6831 case OMPD_target_teams: 6832 case OMPD_target_teams_distribute: 6833 case OMPD_target_teams_distribute_simd: 6834 case OMPD_target_teams_distribute_parallel_for: 6835 case OMPD_target_teams_distribute_parallel_for_simd: { 6836 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6837 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6838 /*IgnoreResultAssign*/ true); 6839 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6840 /*isSigned=*/true); 6841 } 6842 default: 6843 break; 6844 } 6845 } else if (DefaultNT == -1) { 6846 return nullptr; 6847 } 6848 6849 return Bld.getInt32(DefaultNT); 6850 } 6851 6852 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6853 llvm::Value *DefaultThreadLimitVal) { 6854 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6855 CGF.getContext(), CS->getCapturedStmt()); 6856 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6857 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6858 llvm::Value *NumThreads = nullptr; 6859 llvm::Value *CondVal = nullptr; 6860 // Handle if clause. If if clause present, the number of threads is 6861 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6862 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6863 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6864 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6865 const OMPIfClause *IfClause = nullptr; 6866 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6867 if (C->getNameModifier() == OMPD_unknown || 6868 C->getNameModifier() == OMPD_parallel) { 6869 IfClause = C; 6870 break; 6871 } 6872 } 6873 if (IfClause) { 6874 const Expr *Cond = IfClause->getCondition(); 6875 bool Result; 6876 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6877 if (!Result) 6878 return CGF.Builder.getInt32(1); 6879 } else { 6880 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6881 if (const auto *PreInit = 6882 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6883 for (const auto *I : PreInit->decls()) { 6884 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6885 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6886 } else { 6887 CodeGenFunction::AutoVarEmission Emission = 6888 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6889 CGF.EmitAutoVarCleanups(Emission); 6890 } 6891 } 6892 } 6893 CondVal = CGF.EvaluateExprAsBool(Cond); 6894 } 6895 } 6896 } 6897 // Check the value of num_threads clause iff if clause was not specified 6898 // or is not evaluated to false. 6899 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6900 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6901 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6902 const auto *NumThreadsClause = 6903 Dir->getSingleClause<OMPNumThreadsClause>(); 6904 CodeGenFunction::LexicalScope Scope( 6905 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6906 if (const auto *PreInit = 6907 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6908 for (const auto *I : PreInit->decls()) { 6909 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6910 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6911 } else { 6912 CodeGenFunction::AutoVarEmission Emission = 6913 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6914 CGF.EmitAutoVarCleanups(Emission); 6915 } 6916 } 6917 } 6918 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6919 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6920 /*isSigned=*/false); 6921 if (DefaultThreadLimitVal) 6922 NumThreads = CGF.Builder.CreateSelect( 6923 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6924 DefaultThreadLimitVal, NumThreads); 6925 } else { 6926 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6927 : CGF.Builder.getInt32(0); 6928 } 6929 // Process condition of the if clause. 6930 if (CondVal) { 6931 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6932 CGF.Builder.getInt32(1)); 6933 } 6934 return NumThreads; 6935 } 6936 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6937 return CGF.Builder.getInt32(1); 6938 return DefaultThreadLimitVal; 6939 } 6940 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6941 : CGF.Builder.getInt32(0); 6942 } 6943 6944 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6945 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6946 int32_t &DefaultVal) { 6947 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6948 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6949 "Expected target-based executable directive."); 6950 6951 switch (DirectiveKind) { 6952 case OMPD_target: 6953 // Teams have no clause thread_limit 6954 return nullptr; 6955 case OMPD_target_teams: 6956 case OMPD_target_teams_distribute: 6957 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6958 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6959 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6960 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6961 if (auto Constant = 6962 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6963 DefaultVal = Constant->getExtValue(); 6964 return ThreadLimit; 6965 } 6966 return nullptr; 6967 case OMPD_target_parallel: 6968 case OMPD_target_parallel_for: 6969 case OMPD_target_parallel_for_simd: 6970 case OMPD_target_teams_distribute_parallel_for: 6971 case OMPD_target_teams_distribute_parallel_for_simd: { 6972 Expr *ThreadLimit = nullptr; 6973 Expr *NumThreads = nullptr; 6974 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6975 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6976 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6977 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6978 if (auto Constant = 6979 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6980 DefaultVal = Constant->getExtValue(); 6981 } 6982 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6983 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6984 NumThreads = NumThreadsClause->getNumThreads(); 6985 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6986 if (auto Constant = 6987 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6988 if (Constant->getExtValue() < DefaultVal) { 6989 DefaultVal = Constant->getExtValue(); 6990 ThreadLimit = NumThreads; 6991 } 6992 } 6993 } 6994 } 6995 return ThreadLimit; 6996 } 6997 case OMPD_target_teams_distribute_simd: 6998 case OMPD_target_simd: 6999 DefaultVal = 1; 7000 return nullptr; 7001 case OMPD_parallel: 7002 case OMPD_for: 7003 case OMPD_parallel_for: 7004 case OMPD_parallel_master: 7005 case OMPD_parallel_sections: 7006 case OMPD_for_simd: 7007 case OMPD_parallel_for_simd: 7008 case OMPD_cancel: 7009 case OMPD_cancellation_point: 7010 case OMPD_ordered: 7011 case OMPD_threadprivate: 7012 case OMPD_allocate: 7013 case OMPD_task: 7014 case OMPD_simd: 7015 case OMPD_tile: 7016 case OMPD_unroll: 7017 case OMPD_sections: 7018 case OMPD_section: 7019 case OMPD_single: 7020 case OMPD_master: 7021 case OMPD_critical: 7022 case OMPD_taskyield: 7023 case OMPD_barrier: 7024 case OMPD_taskwait: 7025 case OMPD_taskgroup: 7026 case OMPD_atomic: 7027 case OMPD_flush: 7028 case OMPD_depobj: 7029 case OMPD_scan: 7030 case OMPD_teams: 7031 case OMPD_target_data: 7032 case OMPD_target_exit_data: 7033 case OMPD_target_enter_data: 7034 case OMPD_distribute: 7035 case OMPD_distribute_simd: 7036 case OMPD_distribute_parallel_for: 7037 case OMPD_distribute_parallel_for_simd: 7038 case OMPD_teams_distribute: 7039 case OMPD_teams_distribute_simd: 7040 case OMPD_teams_distribute_parallel_for: 7041 case OMPD_teams_distribute_parallel_for_simd: 7042 case OMPD_target_update: 7043 case OMPD_declare_simd: 7044 case OMPD_declare_variant: 7045 case OMPD_begin_declare_variant: 7046 case OMPD_end_declare_variant: 7047 case OMPD_declare_target: 7048 case OMPD_end_declare_target: 7049 case OMPD_declare_reduction: 7050 case OMPD_declare_mapper: 7051 case OMPD_taskloop: 7052 case OMPD_taskloop_simd: 7053 case OMPD_master_taskloop: 7054 case OMPD_master_taskloop_simd: 7055 case OMPD_parallel_master_taskloop: 7056 case OMPD_parallel_master_taskloop_simd: 7057 case OMPD_requires: 7058 case OMPD_unknown: 7059 break; 7060 default: 7061 break; 7062 } 7063 llvm_unreachable("Unsupported directive kind."); 7064 } 7065 7066 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7067 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7068 assert(!CGF.getLangOpts().OpenMPIsDevice && 7069 "Clauses associated with the teams directive expected to be emitted " 7070 "only for the host!"); 7071 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7072 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7073 "Expected target-based executable directive."); 7074 CGBuilderTy &Bld = CGF.Builder; 7075 llvm::Value *ThreadLimitVal = nullptr; 7076 llvm::Value *NumThreadsVal = nullptr; 7077 switch (DirectiveKind) { 7078 case OMPD_target: { 7079 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7080 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7081 return NumThreads; 7082 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7083 CGF.getContext(), CS->getCapturedStmt()); 7084 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7085 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7086 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7087 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7088 const auto *ThreadLimitClause = 7089 Dir->getSingleClause<OMPThreadLimitClause>(); 7090 CodeGenFunction::LexicalScope Scope( 7091 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7092 if (const auto *PreInit = 7093 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7094 for (const auto *I : PreInit->decls()) { 7095 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7096 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7097 } else { 7098 CodeGenFunction::AutoVarEmission Emission = 7099 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7100 CGF.EmitAutoVarCleanups(Emission); 7101 } 7102 } 7103 } 7104 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7105 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7106 ThreadLimitVal = 7107 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7108 } 7109 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7110 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7111 CS = Dir->getInnermostCapturedStmt(); 7112 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7113 CGF.getContext(), CS->getCapturedStmt()); 7114 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7115 } 7116 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7117 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7118 CS = Dir->getInnermostCapturedStmt(); 7119 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7120 return NumThreads; 7121 } 7122 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7123 return Bld.getInt32(1); 7124 } 7125 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7126 } 7127 case OMPD_target_teams: { 7128 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7129 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7130 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7131 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7132 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7133 ThreadLimitVal = 7134 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7135 } 7136 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7137 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7138 return NumThreads; 7139 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7140 CGF.getContext(), CS->getCapturedStmt()); 7141 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7142 if (Dir->getDirectiveKind() == OMPD_distribute) { 7143 CS = Dir->getInnermostCapturedStmt(); 7144 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7145 return NumThreads; 7146 } 7147 } 7148 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7149 } 7150 case OMPD_target_teams_distribute: 7151 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7152 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7153 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7154 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7155 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7156 ThreadLimitVal = 7157 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7158 } 7159 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7160 case OMPD_target_parallel: 7161 case OMPD_target_parallel_for: 7162 case OMPD_target_parallel_for_simd: 7163 case OMPD_target_teams_distribute_parallel_for: 7164 case OMPD_target_teams_distribute_parallel_for_simd: { 7165 llvm::Value *CondVal = nullptr; 7166 // Handle if clause. If if clause present, the number of threads is 7167 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7168 if (D.hasClausesOfKind<OMPIfClause>()) { 7169 const OMPIfClause *IfClause = nullptr; 7170 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7171 if (C->getNameModifier() == OMPD_unknown || 7172 C->getNameModifier() == OMPD_parallel) { 7173 IfClause = C; 7174 break; 7175 } 7176 } 7177 if (IfClause) { 7178 const Expr *Cond = IfClause->getCondition(); 7179 bool Result; 7180 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7181 if (!Result) 7182 return Bld.getInt32(1); 7183 } else { 7184 CodeGenFunction::RunCleanupsScope Scope(CGF); 7185 CondVal = CGF.EvaluateExprAsBool(Cond); 7186 } 7187 } 7188 } 7189 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7190 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7191 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7192 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7193 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7194 ThreadLimitVal = 7195 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7196 } 7197 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7198 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7199 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7200 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7201 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7202 NumThreadsVal = 7203 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7204 ThreadLimitVal = ThreadLimitVal 7205 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7206 ThreadLimitVal), 7207 NumThreadsVal, ThreadLimitVal) 7208 : NumThreadsVal; 7209 } 7210 if (!ThreadLimitVal) 7211 ThreadLimitVal = Bld.getInt32(0); 7212 if (CondVal) 7213 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7214 return ThreadLimitVal; 7215 } 7216 case OMPD_target_teams_distribute_simd: 7217 case OMPD_target_simd: 7218 return Bld.getInt32(1); 7219 case OMPD_parallel: 7220 case OMPD_for: 7221 case OMPD_parallel_for: 7222 case OMPD_parallel_master: 7223 case OMPD_parallel_sections: 7224 case OMPD_for_simd: 7225 case OMPD_parallel_for_simd: 7226 case OMPD_cancel: 7227 case OMPD_cancellation_point: 7228 case OMPD_ordered: 7229 case OMPD_threadprivate: 7230 case OMPD_allocate: 7231 case OMPD_task: 7232 case OMPD_simd: 7233 case OMPD_tile: 7234 case OMPD_unroll: 7235 case OMPD_sections: 7236 case OMPD_section: 7237 case OMPD_single: 7238 case OMPD_master: 7239 case OMPD_critical: 7240 case OMPD_taskyield: 7241 case OMPD_barrier: 7242 case OMPD_taskwait: 7243 case OMPD_taskgroup: 7244 case OMPD_atomic: 7245 case OMPD_flush: 7246 case OMPD_depobj: 7247 case OMPD_scan: 7248 case OMPD_teams: 7249 case OMPD_target_data: 7250 case OMPD_target_exit_data: 7251 case OMPD_target_enter_data: 7252 case OMPD_distribute: 7253 case OMPD_distribute_simd: 7254 case OMPD_distribute_parallel_for: 7255 case OMPD_distribute_parallel_for_simd: 7256 case OMPD_teams_distribute: 7257 case OMPD_teams_distribute_simd: 7258 case OMPD_teams_distribute_parallel_for: 7259 case OMPD_teams_distribute_parallel_for_simd: 7260 case OMPD_target_update: 7261 case OMPD_declare_simd: 7262 case OMPD_declare_variant: 7263 case OMPD_begin_declare_variant: 7264 case OMPD_end_declare_variant: 7265 case OMPD_declare_target: 7266 case OMPD_end_declare_target: 7267 case OMPD_declare_reduction: 7268 case OMPD_declare_mapper: 7269 case OMPD_taskloop: 7270 case OMPD_taskloop_simd: 7271 case OMPD_master_taskloop: 7272 case OMPD_master_taskloop_simd: 7273 case OMPD_parallel_master_taskloop: 7274 case OMPD_parallel_master_taskloop_simd: 7275 case OMPD_requires: 7276 case OMPD_metadirective: 7277 case OMPD_unknown: 7278 break; 7279 default: 7280 break; 7281 } 7282 llvm_unreachable("Unsupported directive kind."); 7283 } 7284 7285 namespace { 7286 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7287 7288 // Utility to handle information from clauses associated with a given 7289 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7290 // It provides a convenient interface to obtain the information and generate 7291 // code for that information. 7292 class MappableExprsHandler { 7293 public: 7294 /// Values for bit flags used to specify the mapping type for 7295 /// offloading. 7296 enum OpenMPOffloadMappingFlags : uint64_t { 7297 /// No flags 7298 OMP_MAP_NONE = 0x0, 7299 /// Allocate memory on the device and move data from host to device. 7300 OMP_MAP_TO = 0x01, 7301 /// Allocate memory on the device and move data from device to host. 7302 OMP_MAP_FROM = 0x02, 7303 /// Always perform the requested mapping action on the element, even 7304 /// if it was already mapped before. 7305 OMP_MAP_ALWAYS = 0x04, 7306 /// Delete the element from the device environment, ignoring the 7307 /// current reference count associated with the element. 7308 OMP_MAP_DELETE = 0x08, 7309 /// The element being mapped is a pointer-pointee pair; both the 7310 /// pointer and the pointee should be mapped. 7311 OMP_MAP_PTR_AND_OBJ = 0x10, 7312 /// This flags signals that the base address of an entry should be 7313 /// passed to the target kernel as an argument. 7314 OMP_MAP_TARGET_PARAM = 0x20, 7315 /// Signal that the runtime library has to return the device pointer 7316 /// in the current position for the data being mapped. Used when we have the 7317 /// use_device_ptr or use_device_addr clause. 7318 OMP_MAP_RETURN_PARAM = 0x40, 7319 /// This flag signals that the reference being passed is a pointer to 7320 /// private data. 7321 OMP_MAP_PRIVATE = 0x80, 7322 /// Pass the element to the device by value. 7323 OMP_MAP_LITERAL = 0x100, 7324 /// Implicit map 7325 OMP_MAP_IMPLICIT = 0x200, 7326 /// Close is a hint to the runtime to allocate memory close to 7327 /// the target device. 7328 OMP_MAP_CLOSE = 0x400, 7329 /// 0x800 is reserved for compatibility with XLC. 7330 /// Produce a runtime error if the data is not already allocated. 7331 OMP_MAP_PRESENT = 0x1000, 7332 // Increment and decrement a separate reference counter so that the data 7333 // cannot be unmapped within the associated region. Thus, this flag is 7334 // intended to be used on 'target' and 'target data' directives because they 7335 // are inherently structured. It is not intended to be used on 'target 7336 // enter data' and 'target exit data' directives because they are inherently 7337 // dynamic. 7338 // This is an OpenMP extension for the sake of OpenACC support. 7339 OMP_MAP_OMPX_HOLD = 0x2000, 7340 /// Signal that the runtime library should use args as an array of 7341 /// descriptor_dim pointers and use args_size as dims. Used when we have 7342 /// non-contiguous list items in target update directive 7343 OMP_MAP_NON_CONTIG = 0x100000000000, 7344 /// The 16 MSBs of the flags indicate whether the entry is member of some 7345 /// struct/class. 7346 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7347 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7348 }; 7349 7350 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7351 static unsigned getFlagMemberOffset() { 7352 unsigned Offset = 0; 7353 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7354 Remain = Remain >> 1) 7355 Offset++; 7356 return Offset; 7357 } 7358 7359 /// Class that holds debugging information for a data mapping to be passed to 7360 /// the runtime library. 7361 class MappingExprInfo { 7362 /// The variable declaration used for the data mapping. 7363 const ValueDecl *MapDecl = nullptr; 7364 /// The original expression used in the map clause, or null if there is 7365 /// none. 7366 const Expr *MapExpr = nullptr; 7367 7368 public: 7369 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7370 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7371 7372 const ValueDecl *getMapDecl() const { return MapDecl; } 7373 const Expr *getMapExpr() const { return MapExpr; } 7374 }; 7375 7376 /// Class that associates information with a base pointer to be passed to the 7377 /// runtime library. 7378 class BasePointerInfo { 7379 /// The base pointer. 7380 llvm::Value *Ptr = nullptr; 7381 /// The base declaration that refers to this device pointer, or null if 7382 /// there is none. 7383 const ValueDecl *DevPtrDecl = nullptr; 7384 7385 public: 7386 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7387 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7388 llvm::Value *operator*() const { return Ptr; } 7389 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7390 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7391 }; 7392 7393 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7394 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7395 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7396 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7397 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7398 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7399 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7400 7401 /// This structure contains combined information generated for mappable 7402 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7403 /// mappers, and non-contiguous information. 7404 struct MapCombinedInfoTy { 7405 struct StructNonContiguousInfo { 7406 bool IsNonContiguous = false; 7407 MapDimArrayTy Dims; 7408 MapNonContiguousArrayTy Offsets; 7409 MapNonContiguousArrayTy Counts; 7410 MapNonContiguousArrayTy Strides; 7411 }; 7412 MapExprsArrayTy Exprs; 7413 MapBaseValuesArrayTy BasePointers; 7414 MapValuesArrayTy Pointers; 7415 MapValuesArrayTy Sizes; 7416 MapFlagsArrayTy Types; 7417 MapMappersArrayTy Mappers; 7418 StructNonContiguousInfo NonContigInfo; 7419 7420 /// Append arrays in \a CurInfo. 7421 void append(MapCombinedInfoTy &CurInfo) { 7422 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7423 BasePointers.append(CurInfo.BasePointers.begin(), 7424 CurInfo.BasePointers.end()); 7425 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7426 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7427 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7428 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7429 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7430 CurInfo.NonContigInfo.Dims.end()); 7431 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7432 CurInfo.NonContigInfo.Offsets.end()); 7433 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7434 CurInfo.NonContigInfo.Counts.end()); 7435 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7436 CurInfo.NonContigInfo.Strides.end()); 7437 } 7438 }; 7439 7440 /// Map between a struct and the its lowest & highest elements which have been 7441 /// mapped. 7442 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7443 /// HE(FieldIndex, Pointer)} 7444 struct StructRangeInfoTy { 7445 MapCombinedInfoTy PreliminaryMapData; 7446 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7447 0, Address::invalid()}; 7448 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7449 0, Address::invalid()}; 7450 Address Base = Address::invalid(); 7451 Address LB = Address::invalid(); 7452 bool IsArraySection = false; 7453 bool HasCompleteRecord = false; 7454 }; 7455 7456 private: 7457 /// Kind that defines how a device pointer has to be returned. 7458 struct MapInfo { 7459 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7460 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7461 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7462 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7463 bool ReturnDevicePointer = false; 7464 bool IsImplicit = false; 7465 const ValueDecl *Mapper = nullptr; 7466 const Expr *VarRef = nullptr; 7467 bool ForDeviceAddr = false; 7468 7469 MapInfo() = default; 7470 MapInfo( 7471 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7472 OpenMPMapClauseKind MapType, 7473 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7474 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7475 bool ReturnDevicePointer, bool IsImplicit, 7476 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7477 bool ForDeviceAddr = false) 7478 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7479 MotionModifiers(MotionModifiers), 7480 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7481 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7482 }; 7483 7484 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7485 /// member and there is no map information about it, then emission of that 7486 /// entry is deferred until the whole struct has been processed. 7487 struct DeferredDevicePtrEntryTy { 7488 const Expr *IE = nullptr; 7489 const ValueDecl *VD = nullptr; 7490 bool ForDeviceAddr = false; 7491 7492 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7493 bool ForDeviceAddr) 7494 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7495 }; 7496 7497 /// The target directive from where the mappable clauses were extracted. It 7498 /// is either a executable directive or a user-defined mapper directive. 7499 llvm::PointerUnion<const OMPExecutableDirective *, 7500 const OMPDeclareMapperDecl *> 7501 CurDir; 7502 7503 /// Function the directive is being generated for. 7504 CodeGenFunction &CGF; 7505 7506 /// Set of all first private variables in the current directive. 7507 /// bool data is set to true if the variable is implicitly marked as 7508 /// firstprivate, false otherwise. 7509 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7510 7511 /// Map between device pointer declarations and their expression components. 7512 /// The key value for declarations in 'this' is null. 7513 llvm::DenseMap< 7514 const ValueDecl *, 7515 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7516 DevPointersMap; 7517 7518 /// Map between lambda declarations and their map type. 7519 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7520 7521 llvm::Value *getExprTypeSize(const Expr *E) const { 7522 QualType ExprTy = E->getType().getCanonicalType(); 7523 7524 // Calculate the size for array shaping expression. 7525 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7526 llvm::Value *Size = 7527 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7528 for (const Expr *SE : OAE->getDimensions()) { 7529 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7530 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7531 CGF.getContext().getSizeType(), 7532 SE->getExprLoc()); 7533 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7534 } 7535 return Size; 7536 } 7537 7538 // Reference types are ignored for mapping purposes. 7539 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7540 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7541 7542 // Given that an array section is considered a built-in type, we need to 7543 // do the calculation based on the length of the section instead of relying 7544 // on CGF.getTypeSize(E->getType()). 7545 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7546 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7547 OAE->getBase()->IgnoreParenImpCasts()) 7548 .getCanonicalType(); 7549 7550 // If there is no length associated with the expression and lower bound is 7551 // not specified too, that means we are using the whole length of the 7552 // base. 7553 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7554 !OAE->getLowerBound()) 7555 return CGF.getTypeSize(BaseTy); 7556 7557 llvm::Value *ElemSize; 7558 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7559 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7560 } else { 7561 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7562 assert(ATy && "Expecting array type if not a pointer type."); 7563 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7564 } 7565 7566 // If we don't have a length at this point, that is because we have an 7567 // array section with a single element. 7568 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7569 return ElemSize; 7570 7571 if (const Expr *LenExpr = OAE->getLength()) { 7572 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7573 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7574 CGF.getContext().getSizeType(), 7575 LenExpr->getExprLoc()); 7576 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7577 } 7578 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7579 OAE->getLowerBound() && "expected array_section[lb:]."); 7580 // Size = sizetype - lb * elemtype; 7581 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7582 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7583 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7584 CGF.getContext().getSizeType(), 7585 OAE->getLowerBound()->getExprLoc()); 7586 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7587 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7588 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7589 LengthVal = CGF.Builder.CreateSelect( 7590 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7591 return LengthVal; 7592 } 7593 return CGF.getTypeSize(ExprTy); 7594 } 7595 7596 /// Return the corresponding bits for a given map clause modifier. Add 7597 /// a flag marking the map as a pointer if requested. Add a flag marking the 7598 /// map as the first one of a series of maps that relate to the same map 7599 /// expression. 7600 OpenMPOffloadMappingFlags getMapTypeBits( 7601 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7602 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7603 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7604 OpenMPOffloadMappingFlags Bits = 7605 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7606 switch (MapType) { 7607 case OMPC_MAP_alloc: 7608 case OMPC_MAP_release: 7609 // alloc and release is the default behavior in the runtime library, i.e. 7610 // if we don't pass any bits alloc/release that is what the runtime is 7611 // going to do. Therefore, we don't need to signal anything for these two 7612 // type modifiers. 7613 break; 7614 case OMPC_MAP_to: 7615 Bits |= OMP_MAP_TO; 7616 break; 7617 case OMPC_MAP_from: 7618 Bits |= OMP_MAP_FROM; 7619 break; 7620 case OMPC_MAP_tofrom: 7621 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7622 break; 7623 case OMPC_MAP_delete: 7624 Bits |= OMP_MAP_DELETE; 7625 break; 7626 case OMPC_MAP_unknown: 7627 llvm_unreachable("Unexpected map type!"); 7628 } 7629 if (AddPtrFlag) 7630 Bits |= OMP_MAP_PTR_AND_OBJ; 7631 if (AddIsTargetParamFlag) 7632 Bits |= OMP_MAP_TARGET_PARAM; 7633 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7634 Bits |= OMP_MAP_ALWAYS; 7635 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7636 Bits |= OMP_MAP_CLOSE; 7637 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7638 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7639 Bits |= OMP_MAP_PRESENT; 7640 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7641 Bits |= OMP_MAP_OMPX_HOLD; 7642 if (IsNonContiguous) 7643 Bits |= OMP_MAP_NON_CONTIG; 7644 return Bits; 7645 } 7646 7647 /// Return true if the provided expression is a final array section. A 7648 /// final array section, is one whose length can't be proved to be one. 7649 bool isFinalArraySectionExpression(const Expr *E) const { 7650 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7651 7652 // It is not an array section and therefore not a unity-size one. 7653 if (!OASE) 7654 return false; 7655 7656 // An array section with no colon always refer to a single element. 7657 if (OASE->getColonLocFirst().isInvalid()) 7658 return false; 7659 7660 const Expr *Length = OASE->getLength(); 7661 7662 // If we don't have a length we have to check if the array has size 1 7663 // for this dimension. Also, we should always expect a length if the 7664 // base type is pointer. 7665 if (!Length) { 7666 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7667 OASE->getBase()->IgnoreParenImpCasts()) 7668 .getCanonicalType(); 7669 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7670 return ATy->getSize().getSExtValue() != 1; 7671 // If we don't have a constant dimension length, we have to consider 7672 // the current section as having any size, so it is not necessarily 7673 // unitary. If it happen to be unity size, that's user fault. 7674 return true; 7675 } 7676 7677 // Check if the length evaluates to 1. 7678 Expr::EvalResult Result; 7679 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7680 return true; // Can have more that size 1. 7681 7682 llvm::APSInt ConstLength = Result.Val.getInt(); 7683 return ConstLength.getSExtValue() != 1; 7684 } 7685 7686 /// Generate the base pointers, section pointers, sizes, map type bits, and 7687 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7688 /// map type, map or motion modifiers, and expression components. 7689 /// \a IsFirstComponent should be set to true if the provided set of 7690 /// components is the first associated with a capture. 7691 void generateInfoForComponentList( 7692 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7693 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7694 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7695 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7696 bool IsFirstComponentList, bool IsImplicit, 7697 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7698 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7699 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7700 OverlappedElements = llvm::None) const { 7701 // The following summarizes what has to be generated for each map and the 7702 // types below. The generated information is expressed in this order: 7703 // base pointer, section pointer, size, flags 7704 // (to add to the ones that come from the map type and modifier). 7705 // 7706 // double d; 7707 // int i[100]; 7708 // float *p; 7709 // 7710 // struct S1 { 7711 // int i; 7712 // float f[50]; 7713 // } 7714 // struct S2 { 7715 // int i; 7716 // float f[50]; 7717 // S1 s; 7718 // double *p; 7719 // struct S2 *ps; 7720 // int &ref; 7721 // } 7722 // S2 s; 7723 // S2 *ps; 7724 // 7725 // map(d) 7726 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7727 // 7728 // map(i) 7729 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7730 // 7731 // map(i[1:23]) 7732 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7733 // 7734 // map(p) 7735 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7736 // 7737 // map(p[1:24]) 7738 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7739 // in unified shared memory mode or for local pointers 7740 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7741 // 7742 // map(s) 7743 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7744 // 7745 // map(s.i) 7746 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7747 // 7748 // map(s.s.f) 7749 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7750 // 7751 // map(s.p) 7752 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7753 // 7754 // map(to: s.p[:22]) 7755 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7756 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7757 // &(s.p), &(s.p[0]), 22*sizeof(double), 7758 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7759 // (*) alloc space for struct members, only this is a target parameter 7760 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7761 // optimizes this entry out, same in the examples below) 7762 // (***) map the pointee (map: to) 7763 // 7764 // map(to: s.ref) 7765 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7766 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7767 // (*) alloc space for struct members, only this is a target parameter 7768 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7769 // optimizes this entry out, same in the examples below) 7770 // (***) map the pointee (map: to) 7771 // 7772 // map(s.ps) 7773 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7774 // 7775 // map(from: s.ps->s.i) 7776 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7777 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7778 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7779 // 7780 // map(to: s.ps->ps) 7781 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7782 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7783 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7784 // 7785 // map(s.ps->ps->ps) 7786 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7787 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7788 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7789 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7790 // 7791 // map(to: s.ps->ps->s.f[:22]) 7792 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7793 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7794 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7795 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7796 // 7797 // map(ps) 7798 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7799 // 7800 // map(ps->i) 7801 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7802 // 7803 // map(ps->s.f) 7804 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7805 // 7806 // map(from: ps->p) 7807 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7808 // 7809 // map(to: ps->p[:22]) 7810 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7811 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7812 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7813 // 7814 // map(ps->ps) 7815 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7816 // 7817 // map(from: ps->ps->s.i) 7818 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7819 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7820 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7821 // 7822 // map(from: ps->ps->ps) 7823 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7824 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7825 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7826 // 7827 // map(ps->ps->ps->ps) 7828 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7829 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7830 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7831 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7832 // 7833 // map(to: ps->ps->ps->s.f[:22]) 7834 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7835 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7836 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7837 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7838 // 7839 // map(to: s.f[:22]) map(from: s.p[:33]) 7840 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7841 // sizeof(double*) (**), TARGET_PARAM 7842 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7843 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7844 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7845 // (*) allocate contiguous space needed to fit all mapped members even if 7846 // we allocate space for members not mapped (in this example, 7847 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7848 // them as well because they fall between &s.f[0] and &s.p) 7849 // 7850 // map(from: s.f[:22]) map(to: ps->p[:33]) 7851 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7852 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7853 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7854 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7855 // (*) the struct this entry pertains to is the 2nd element in the list of 7856 // arguments, hence MEMBER_OF(2) 7857 // 7858 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7859 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7860 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7861 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7862 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7863 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7864 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7865 // (*) the struct this entry pertains to is the 4th element in the list 7866 // of arguments, hence MEMBER_OF(4) 7867 7868 // Track if the map information being generated is the first for a capture. 7869 bool IsCaptureFirstInfo = IsFirstComponentList; 7870 // When the variable is on a declare target link or in a to clause with 7871 // unified memory, a reference is needed to hold the host/device address 7872 // of the variable. 7873 bool RequiresReference = false; 7874 7875 // Scan the components from the base to the complete expression. 7876 auto CI = Components.rbegin(); 7877 auto CE = Components.rend(); 7878 auto I = CI; 7879 7880 // Track if the map information being generated is the first for a list of 7881 // components. 7882 bool IsExpressionFirstInfo = true; 7883 bool FirstPointerInComplexData = false; 7884 Address BP = Address::invalid(); 7885 const Expr *AssocExpr = I->getAssociatedExpression(); 7886 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7887 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7888 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7889 7890 if (isa<MemberExpr>(AssocExpr)) { 7891 // The base is the 'this' pointer. The content of the pointer is going 7892 // to be the base of the field being mapped. 7893 BP = CGF.LoadCXXThisAddress(); 7894 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7895 (OASE && 7896 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7897 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7898 } else if (OAShE && 7899 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7900 BP = Address::deprecated( 7901 CGF.EmitScalarExpr(OAShE->getBase()), 7902 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7903 } else { 7904 // The base is the reference to the variable. 7905 // BP = &Var. 7906 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7907 if (const auto *VD = 7908 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7909 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7910 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7911 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7912 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7913 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7914 RequiresReference = true; 7915 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7916 } 7917 } 7918 } 7919 7920 // If the variable is a pointer and is being dereferenced (i.e. is not 7921 // the last component), the base has to be the pointer itself, not its 7922 // reference. References are ignored for mapping purposes. 7923 QualType Ty = 7924 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7925 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7926 // No need to generate individual map information for the pointer, it 7927 // can be associated with the combined storage if shared memory mode is 7928 // active or the base declaration is not global variable. 7929 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7930 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7931 !VD || VD->hasLocalStorage()) 7932 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7933 else 7934 FirstPointerInComplexData = true; 7935 ++I; 7936 } 7937 } 7938 7939 // Track whether a component of the list should be marked as MEMBER_OF some 7940 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7941 // in a component list should be marked as MEMBER_OF, all subsequent entries 7942 // do not belong to the base struct. E.g. 7943 // struct S2 s; 7944 // s.ps->ps->ps->f[:] 7945 // (1) (2) (3) (4) 7946 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7947 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7948 // is the pointee of ps(2) which is not member of struct s, so it should not 7949 // be marked as such (it is still PTR_AND_OBJ). 7950 // The variable is initialized to false so that PTR_AND_OBJ entries which 7951 // are not struct members are not considered (e.g. array of pointers to 7952 // data). 7953 bool ShouldBeMemberOf = false; 7954 7955 // Variable keeping track of whether or not we have encountered a component 7956 // in the component list which is a member expression. Useful when we have a 7957 // pointer or a final array section, in which case it is the previous 7958 // component in the list which tells us whether we have a member expression. 7959 // E.g. X.f[:] 7960 // While processing the final array section "[:]" it is "f" which tells us 7961 // whether we are dealing with a member of a declared struct. 7962 const MemberExpr *EncounteredME = nullptr; 7963 7964 // Track for the total number of dimension. Start from one for the dummy 7965 // dimension. 7966 uint64_t DimSize = 1; 7967 7968 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7969 bool IsPrevMemberReference = false; 7970 7971 for (; I != CE; ++I) { 7972 // If the current component is member of a struct (parent struct) mark it. 7973 if (!EncounteredME) { 7974 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7975 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7976 // as MEMBER_OF the parent struct. 7977 if (EncounteredME) { 7978 ShouldBeMemberOf = true; 7979 // Do not emit as complex pointer if this is actually not array-like 7980 // expression. 7981 if (FirstPointerInComplexData) { 7982 QualType Ty = std::prev(I) 7983 ->getAssociatedDeclaration() 7984 ->getType() 7985 .getNonReferenceType(); 7986 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7987 FirstPointerInComplexData = false; 7988 } 7989 } 7990 } 7991 7992 auto Next = std::next(I); 7993 7994 // We need to generate the addresses and sizes if this is the last 7995 // component, if the component is a pointer or if it is an array section 7996 // whose length can't be proved to be one. If this is a pointer, it 7997 // becomes the base address for the following components. 7998 7999 // A final array section, is one whose length can't be proved to be one. 8000 // If the map item is non-contiguous then we don't treat any array section 8001 // as final array section. 8002 bool IsFinalArraySection = 8003 !IsNonContiguous && 8004 isFinalArraySectionExpression(I->getAssociatedExpression()); 8005 8006 // If we have a declaration for the mapping use that, otherwise use 8007 // the base declaration of the map clause. 8008 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 8009 ? I->getAssociatedDeclaration() 8010 : BaseDecl; 8011 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 8012 : MapExpr; 8013 8014 // Get information on whether the element is a pointer. Have to do a 8015 // special treatment for array sections given that they are built-in 8016 // types. 8017 const auto *OASE = 8018 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 8019 const auto *OAShE = 8020 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 8021 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 8022 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 8023 bool IsPointer = 8024 OAShE || 8025 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8026 .getCanonicalType() 8027 ->isAnyPointerType()) || 8028 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8029 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 8030 MapDecl && 8031 MapDecl->getType()->isLValueReferenceType(); 8032 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 8033 8034 if (OASE) 8035 ++DimSize; 8036 8037 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8038 IsFinalArraySection) { 8039 // If this is not the last component, we expect the pointer to be 8040 // associated with an array expression or member expression. 8041 assert((Next == CE || 8042 isa<MemberExpr>(Next->getAssociatedExpression()) || 8043 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8044 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8045 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8046 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8047 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8048 "Unexpected expression"); 8049 8050 Address LB = Address::invalid(); 8051 Address LowestElem = Address::invalid(); 8052 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8053 const MemberExpr *E) { 8054 const Expr *BaseExpr = E->getBase(); 8055 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8056 // scalar. 8057 LValue BaseLV; 8058 if (E->isArrow()) { 8059 LValueBaseInfo BaseInfo; 8060 TBAAAccessInfo TBAAInfo; 8061 Address Addr = 8062 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8063 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8064 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8065 } else { 8066 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8067 } 8068 return BaseLV; 8069 }; 8070 if (OAShE) { 8071 LowestElem = LB = 8072 Address::deprecated(CGF.EmitScalarExpr(OAShE->getBase()), 8073 CGF.getContext().getTypeAlignInChars( 8074 OAShE->getBase()->getType())); 8075 } else if (IsMemberReference) { 8076 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8077 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8078 LowestElem = CGF.EmitLValueForFieldInitialization( 8079 BaseLVal, cast<FieldDecl>(MapDecl)) 8080 .getAddress(CGF); 8081 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8082 .getAddress(CGF); 8083 } else { 8084 LowestElem = LB = 8085 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8086 .getAddress(CGF); 8087 } 8088 8089 // If this component is a pointer inside the base struct then we don't 8090 // need to create any entry for it - it will be combined with the object 8091 // it is pointing to into a single PTR_AND_OBJ entry. 8092 bool IsMemberPointerOrAddr = 8093 EncounteredME && 8094 (((IsPointer || ForDeviceAddr) && 8095 I->getAssociatedExpression() == EncounteredME) || 8096 (IsPrevMemberReference && !IsPointer) || 8097 (IsMemberReference && Next != CE && 8098 !Next->getAssociatedExpression()->getType()->isPointerType())); 8099 if (!OverlappedElements.empty() && Next == CE) { 8100 // Handle base element with the info for overlapped elements. 8101 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8102 assert(!IsPointer && 8103 "Unexpected base element with the pointer type."); 8104 // Mark the whole struct as the struct that requires allocation on the 8105 // device. 8106 PartialStruct.LowestElem = {0, LowestElem}; 8107 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8108 I->getAssociatedExpression()->getType()); 8109 Address HB = CGF.Builder.CreateConstGEP( 8110 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 8111 CGF.VoidPtrTy), 8112 TypeSize.getQuantity() - 1); 8113 PartialStruct.HighestElem = { 8114 std::numeric_limits<decltype( 8115 PartialStruct.HighestElem.first)>::max(), 8116 HB}; 8117 PartialStruct.Base = BP; 8118 PartialStruct.LB = LB; 8119 assert( 8120 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8121 "Overlapped elements must be used only once for the variable."); 8122 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8123 // Emit data for non-overlapped data. 8124 OpenMPOffloadMappingFlags Flags = 8125 OMP_MAP_MEMBER_OF | 8126 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8127 /*AddPtrFlag=*/false, 8128 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8129 llvm::Value *Size = nullptr; 8130 // Do bitcopy of all non-overlapped structure elements. 8131 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8132 Component : OverlappedElements) { 8133 Address ComponentLB = Address::invalid(); 8134 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8135 Component) { 8136 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8137 const auto *FD = dyn_cast<FieldDecl>(VD); 8138 if (FD && FD->getType()->isLValueReferenceType()) { 8139 const auto *ME = 8140 cast<MemberExpr>(MC.getAssociatedExpression()); 8141 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8142 ComponentLB = 8143 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8144 .getAddress(CGF); 8145 } else { 8146 ComponentLB = 8147 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8148 .getAddress(CGF); 8149 } 8150 Size = CGF.Builder.CreatePtrDiff( 8151 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8152 CGF.EmitCastToVoidPtr(LB.getPointer())); 8153 break; 8154 } 8155 } 8156 assert(Size && "Failed to determine structure size"); 8157 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8158 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8159 CombinedInfo.Pointers.push_back(LB.getPointer()); 8160 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8161 Size, CGF.Int64Ty, /*isSigned=*/true)); 8162 CombinedInfo.Types.push_back(Flags); 8163 CombinedInfo.Mappers.push_back(nullptr); 8164 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8165 : 1); 8166 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8167 } 8168 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8169 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8170 CombinedInfo.Pointers.push_back(LB.getPointer()); 8171 Size = CGF.Builder.CreatePtrDiff( 8172 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8173 CGF.EmitCastToVoidPtr(LB.getPointer())); 8174 CombinedInfo.Sizes.push_back( 8175 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8176 CombinedInfo.Types.push_back(Flags); 8177 CombinedInfo.Mappers.push_back(nullptr); 8178 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8179 : 1); 8180 break; 8181 } 8182 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8183 if (!IsMemberPointerOrAddr || 8184 (Next == CE && MapType != OMPC_MAP_unknown)) { 8185 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8186 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8187 CombinedInfo.Pointers.push_back(LB.getPointer()); 8188 CombinedInfo.Sizes.push_back( 8189 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8190 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8191 : 1); 8192 8193 // If Mapper is valid, the last component inherits the mapper. 8194 bool HasMapper = Mapper && Next == CE; 8195 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8196 8197 // We need to add a pointer flag for each map that comes from the 8198 // same expression except for the first one. We also need to signal 8199 // this map is the first one that relates with the current capture 8200 // (there is a set of entries for each capture). 8201 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8202 MapType, MapModifiers, MotionModifiers, IsImplicit, 8203 !IsExpressionFirstInfo || RequiresReference || 8204 FirstPointerInComplexData || IsMemberReference, 8205 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8206 8207 if (!IsExpressionFirstInfo || IsMemberReference) { 8208 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8209 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8210 if (IsPointer || (IsMemberReference && Next != CE)) 8211 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8212 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8213 8214 if (ShouldBeMemberOf) { 8215 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8216 // should be later updated with the correct value of MEMBER_OF. 8217 Flags |= OMP_MAP_MEMBER_OF; 8218 // From now on, all subsequent PTR_AND_OBJ entries should not be 8219 // marked as MEMBER_OF. 8220 ShouldBeMemberOf = false; 8221 } 8222 } 8223 8224 CombinedInfo.Types.push_back(Flags); 8225 } 8226 8227 // If we have encountered a member expression so far, keep track of the 8228 // mapped member. If the parent is "*this", then the value declaration 8229 // is nullptr. 8230 if (EncounteredME) { 8231 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8232 unsigned FieldIndex = FD->getFieldIndex(); 8233 8234 // Update info about the lowest and highest elements for this struct 8235 if (!PartialStruct.Base.isValid()) { 8236 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8237 if (IsFinalArraySection) { 8238 Address HB = 8239 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8240 .getAddress(CGF); 8241 PartialStruct.HighestElem = {FieldIndex, HB}; 8242 } else { 8243 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8244 } 8245 PartialStruct.Base = BP; 8246 PartialStruct.LB = BP; 8247 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8248 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8249 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8250 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8251 } 8252 } 8253 8254 // Need to emit combined struct for array sections. 8255 if (IsFinalArraySection || IsNonContiguous) 8256 PartialStruct.IsArraySection = true; 8257 8258 // If we have a final array section, we are done with this expression. 8259 if (IsFinalArraySection) 8260 break; 8261 8262 // The pointer becomes the base for the next element. 8263 if (Next != CE) 8264 BP = IsMemberReference ? LowestElem : LB; 8265 8266 IsExpressionFirstInfo = false; 8267 IsCaptureFirstInfo = false; 8268 FirstPointerInComplexData = false; 8269 IsPrevMemberReference = IsMemberReference; 8270 } else if (FirstPointerInComplexData) { 8271 QualType Ty = Components.rbegin() 8272 ->getAssociatedDeclaration() 8273 ->getType() 8274 .getNonReferenceType(); 8275 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8276 FirstPointerInComplexData = false; 8277 } 8278 } 8279 // If ran into the whole component - allocate the space for the whole 8280 // record. 8281 if (!EncounteredME) 8282 PartialStruct.HasCompleteRecord = true; 8283 8284 if (!IsNonContiguous) 8285 return; 8286 8287 const ASTContext &Context = CGF.getContext(); 8288 8289 // For supporting stride in array section, we need to initialize the first 8290 // dimension size as 1, first offset as 0, and first count as 1 8291 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8292 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8293 MapValuesArrayTy CurStrides; 8294 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8295 uint64_t ElementTypeSize; 8296 8297 // Collect Size information for each dimension and get the element size as 8298 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8299 // should be [10, 10] and the first stride is 4 btyes. 8300 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8301 Components) { 8302 const Expr *AssocExpr = Component.getAssociatedExpression(); 8303 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8304 8305 if (!OASE) 8306 continue; 8307 8308 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8309 auto *CAT = Context.getAsConstantArrayType(Ty); 8310 auto *VAT = Context.getAsVariableArrayType(Ty); 8311 8312 // We need all the dimension size except for the last dimension. 8313 assert((VAT || CAT || &Component == &*Components.begin()) && 8314 "Should be either ConstantArray or VariableArray if not the " 8315 "first Component"); 8316 8317 // Get element size if CurStrides is empty. 8318 if (CurStrides.empty()) { 8319 const Type *ElementType = nullptr; 8320 if (CAT) 8321 ElementType = CAT->getElementType().getTypePtr(); 8322 else if (VAT) 8323 ElementType = VAT->getElementType().getTypePtr(); 8324 else 8325 assert(&Component == &*Components.begin() && 8326 "Only expect pointer (non CAT or VAT) when this is the " 8327 "first Component"); 8328 // If ElementType is null, then it means the base is a pointer 8329 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8330 // for next iteration. 8331 if (ElementType) { 8332 // For the case that having pointer as base, we need to remove one 8333 // level of indirection. 8334 if (&Component != &*Components.begin()) 8335 ElementType = ElementType->getPointeeOrArrayElementType(); 8336 ElementTypeSize = 8337 Context.getTypeSizeInChars(ElementType).getQuantity(); 8338 CurStrides.push_back( 8339 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8340 } 8341 } 8342 // Get dimension value except for the last dimension since we don't need 8343 // it. 8344 if (DimSizes.size() < Components.size() - 1) { 8345 if (CAT) 8346 DimSizes.push_back(llvm::ConstantInt::get( 8347 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8348 else if (VAT) 8349 DimSizes.push_back(CGF.Builder.CreateIntCast( 8350 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8351 /*IsSigned=*/false)); 8352 } 8353 } 8354 8355 // Skip the dummy dimension since we have already have its information. 8356 auto *DI = DimSizes.begin() + 1; 8357 // Product of dimension. 8358 llvm::Value *DimProd = 8359 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8360 8361 // Collect info for non-contiguous. Notice that offset, count, and stride 8362 // are only meaningful for array-section, so we insert a null for anything 8363 // other than array-section. 8364 // Also, the size of offset, count, and stride are not the same as 8365 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8366 // count, and stride are the same as the number of non-contiguous 8367 // declaration in target update to/from clause. 8368 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8369 Components) { 8370 const Expr *AssocExpr = Component.getAssociatedExpression(); 8371 8372 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8373 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8374 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8375 /*isSigned=*/false); 8376 CurOffsets.push_back(Offset); 8377 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8378 CurStrides.push_back(CurStrides.back()); 8379 continue; 8380 } 8381 8382 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8383 8384 if (!OASE) 8385 continue; 8386 8387 // Offset 8388 const Expr *OffsetExpr = OASE->getLowerBound(); 8389 llvm::Value *Offset = nullptr; 8390 if (!OffsetExpr) { 8391 // If offset is absent, then we just set it to zero. 8392 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8393 } else { 8394 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8395 CGF.Int64Ty, 8396 /*isSigned=*/false); 8397 } 8398 CurOffsets.push_back(Offset); 8399 8400 // Count 8401 const Expr *CountExpr = OASE->getLength(); 8402 llvm::Value *Count = nullptr; 8403 if (!CountExpr) { 8404 // In Clang, once a high dimension is an array section, we construct all 8405 // the lower dimension as array section, however, for case like 8406 // arr[0:2][2], Clang construct the inner dimension as an array section 8407 // but it actually is not in an array section form according to spec. 8408 if (!OASE->getColonLocFirst().isValid() && 8409 !OASE->getColonLocSecond().isValid()) { 8410 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8411 } else { 8412 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8413 // When the length is absent it defaults to ⌈(size − 8414 // lower-bound)/stride⌉, where size is the size of the array 8415 // dimension. 8416 const Expr *StrideExpr = OASE->getStride(); 8417 llvm::Value *Stride = 8418 StrideExpr 8419 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8420 CGF.Int64Ty, /*isSigned=*/false) 8421 : nullptr; 8422 if (Stride) 8423 Count = CGF.Builder.CreateUDiv( 8424 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8425 else 8426 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8427 } 8428 } else { 8429 Count = CGF.EmitScalarExpr(CountExpr); 8430 } 8431 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8432 CurCounts.push_back(Count); 8433 8434 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8435 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8436 // Offset Count Stride 8437 // D0 0 1 4 (int) <- dummy dimension 8438 // D1 0 2 8 (2 * (1) * 4) 8439 // D2 1 2 20 (1 * (1 * 5) * 4) 8440 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8441 const Expr *StrideExpr = OASE->getStride(); 8442 llvm::Value *Stride = 8443 StrideExpr 8444 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8445 CGF.Int64Ty, /*isSigned=*/false) 8446 : nullptr; 8447 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8448 if (Stride) 8449 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8450 else 8451 CurStrides.push_back(DimProd); 8452 if (DI != DimSizes.end()) 8453 ++DI; 8454 } 8455 8456 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8457 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8458 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8459 } 8460 8461 /// Return the adjusted map modifiers if the declaration a capture refers to 8462 /// appears in a first-private clause. This is expected to be used only with 8463 /// directives that start with 'target'. 8464 MappableExprsHandler::OpenMPOffloadMappingFlags 8465 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8466 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8467 8468 // A first private variable captured by reference will use only the 8469 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8470 // declaration is known as first-private in this handler. 8471 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8472 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8473 return MappableExprsHandler::OMP_MAP_TO | 8474 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8475 return MappableExprsHandler::OMP_MAP_PRIVATE | 8476 MappableExprsHandler::OMP_MAP_TO; 8477 } 8478 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8479 if (I != LambdasMap.end()) 8480 // for map(to: lambda): using user specified map type. 8481 return getMapTypeBits( 8482 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8483 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8484 /*AddPtrFlag=*/false, 8485 /*AddIsTargetParamFlag=*/false, 8486 /*isNonContiguous=*/false); 8487 return MappableExprsHandler::OMP_MAP_TO | 8488 MappableExprsHandler::OMP_MAP_FROM; 8489 } 8490 8491 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8492 // Rotate by getFlagMemberOffset() bits. 8493 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8494 << getFlagMemberOffset()); 8495 } 8496 8497 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8498 OpenMPOffloadMappingFlags MemberOfFlag) { 8499 // If the entry is PTR_AND_OBJ but has not been marked with the special 8500 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8501 // marked as MEMBER_OF. 8502 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8503 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8504 return; 8505 8506 // Reset the placeholder value to prepare the flag for the assignment of the 8507 // proper MEMBER_OF value. 8508 Flags &= ~OMP_MAP_MEMBER_OF; 8509 Flags |= MemberOfFlag; 8510 } 8511 8512 void getPlainLayout(const CXXRecordDecl *RD, 8513 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8514 bool AsBase) const { 8515 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8516 8517 llvm::StructType *St = 8518 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8519 8520 unsigned NumElements = St->getNumElements(); 8521 llvm::SmallVector< 8522 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8523 RecordLayout(NumElements); 8524 8525 // Fill bases. 8526 for (const auto &I : RD->bases()) { 8527 if (I.isVirtual()) 8528 continue; 8529 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8530 // Ignore empty bases. 8531 if (Base->isEmpty() || CGF.getContext() 8532 .getASTRecordLayout(Base) 8533 .getNonVirtualSize() 8534 .isZero()) 8535 continue; 8536 8537 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8538 RecordLayout[FieldIndex] = Base; 8539 } 8540 // Fill in virtual bases. 8541 for (const auto &I : RD->vbases()) { 8542 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8543 // Ignore empty bases. 8544 if (Base->isEmpty()) 8545 continue; 8546 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8547 if (RecordLayout[FieldIndex]) 8548 continue; 8549 RecordLayout[FieldIndex] = Base; 8550 } 8551 // Fill in all the fields. 8552 assert(!RD->isUnion() && "Unexpected union."); 8553 for (const auto *Field : RD->fields()) { 8554 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8555 // will fill in later.) 8556 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8557 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8558 RecordLayout[FieldIndex] = Field; 8559 } 8560 } 8561 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8562 &Data : RecordLayout) { 8563 if (Data.isNull()) 8564 continue; 8565 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8566 getPlainLayout(Base, Layout, /*AsBase=*/true); 8567 else 8568 Layout.push_back(Data.get<const FieldDecl *>()); 8569 } 8570 } 8571 8572 /// Generate all the base pointers, section pointers, sizes, map types, and 8573 /// mappers for the extracted mappable expressions (all included in \a 8574 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8575 /// pair of the relevant declaration and index where it occurs is appended to 8576 /// the device pointers info array. 8577 void generateAllInfoForClauses( 8578 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8579 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8580 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8581 // We have to process the component lists that relate with the same 8582 // declaration in a single chunk so that we can generate the map flags 8583 // correctly. Therefore, we organize all lists in a map. 8584 enum MapKind { Present, Allocs, Other, Total }; 8585 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8586 SmallVector<SmallVector<MapInfo, 8>, 4>> 8587 Info; 8588 8589 // Helper function to fill the information map for the different supported 8590 // clauses. 8591 auto &&InfoGen = 8592 [&Info, &SkipVarSet]( 8593 const ValueDecl *D, MapKind Kind, 8594 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8595 OpenMPMapClauseKind MapType, 8596 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8597 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8598 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8599 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8600 if (SkipVarSet.contains(D)) 8601 return; 8602 auto It = Info.find(D); 8603 if (It == Info.end()) 8604 It = Info 8605 .insert(std::make_pair( 8606 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8607 .first; 8608 It->second[Kind].emplace_back( 8609 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8610 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8611 }; 8612 8613 for (const auto *Cl : Clauses) { 8614 const auto *C = dyn_cast<OMPMapClause>(Cl); 8615 if (!C) 8616 continue; 8617 MapKind Kind = Other; 8618 if (llvm::is_contained(C->getMapTypeModifiers(), 8619 OMPC_MAP_MODIFIER_present)) 8620 Kind = Present; 8621 else if (C->getMapType() == OMPC_MAP_alloc) 8622 Kind = Allocs; 8623 const auto *EI = C->getVarRefs().begin(); 8624 for (const auto L : C->component_lists()) { 8625 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8626 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8627 C->getMapTypeModifiers(), llvm::None, 8628 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8629 E); 8630 ++EI; 8631 } 8632 } 8633 for (const auto *Cl : Clauses) { 8634 const auto *C = dyn_cast<OMPToClause>(Cl); 8635 if (!C) 8636 continue; 8637 MapKind Kind = Other; 8638 if (llvm::is_contained(C->getMotionModifiers(), 8639 OMPC_MOTION_MODIFIER_present)) 8640 Kind = Present; 8641 const auto *EI = C->getVarRefs().begin(); 8642 for (const auto L : C->component_lists()) { 8643 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8644 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8645 C->isImplicit(), std::get<2>(L), *EI); 8646 ++EI; 8647 } 8648 } 8649 for (const auto *Cl : Clauses) { 8650 const auto *C = dyn_cast<OMPFromClause>(Cl); 8651 if (!C) 8652 continue; 8653 MapKind Kind = Other; 8654 if (llvm::is_contained(C->getMotionModifiers(), 8655 OMPC_MOTION_MODIFIER_present)) 8656 Kind = Present; 8657 const auto *EI = C->getVarRefs().begin(); 8658 for (const auto L : C->component_lists()) { 8659 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8660 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8661 C->isImplicit(), std::get<2>(L), *EI); 8662 ++EI; 8663 } 8664 } 8665 8666 // Look at the use_device_ptr clause information and mark the existing map 8667 // entries as such. If there is no map information for an entry in the 8668 // use_device_ptr list, we create one with map type 'alloc' and zero size 8669 // section. It is the user fault if that was not mapped before. If there is 8670 // no map information and the pointer is a struct member, then we defer the 8671 // emission of that entry until the whole struct has been processed. 8672 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8673 SmallVector<DeferredDevicePtrEntryTy, 4>> 8674 DeferredInfo; 8675 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8676 8677 for (const auto *Cl : Clauses) { 8678 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8679 if (!C) 8680 continue; 8681 for (const auto L : C->component_lists()) { 8682 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8683 std::get<1>(L); 8684 assert(!Components.empty() && 8685 "Not expecting empty list of components!"); 8686 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8687 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8688 const Expr *IE = Components.back().getAssociatedExpression(); 8689 // If the first component is a member expression, we have to look into 8690 // 'this', which maps to null in the map of map information. Otherwise 8691 // look directly for the information. 8692 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8693 8694 // We potentially have map information for this declaration already. 8695 // Look for the first set of components that refer to it. 8696 if (It != Info.end()) { 8697 bool Found = false; 8698 for (auto &Data : It->second) { 8699 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8700 return MI.Components.back().getAssociatedDeclaration() == VD; 8701 }); 8702 // If we found a map entry, signal that the pointer has to be 8703 // returned and move on to the next declaration. Exclude cases where 8704 // the base pointer is mapped as array subscript, array section or 8705 // array shaping. The base address is passed as a pointer to base in 8706 // this case and cannot be used as a base for use_device_ptr list 8707 // item. 8708 if (CI != Data.end()) { 8709 auto PrevCI = std::next(CI->Components.rbegin()); 8710 const auto *VarD = dyn_cast<VarDecl>(VD); 8711 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8712 isa<MemberExpr>(IE) || 8713 !VD->getType().getNonReferenceType()->isPointerType() || 8714 PrevCI == CI->Components.rend() || 8715 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8716 VarD->hasLocalStorage()) { 8717 CI->ReturnDevicePointer = true; 8718 Found = true; 8719 break; 8720 } 8721 } 8722 } 8723 if (Found) 8724 continue; 8725 } 8726 8727 // We didn't find any match in our map information - generate a zero 8728 // size array section - if the pointer is a struct member we defer this 8729 // action until the whole struct has been processed. 8730 if (isa<MemberExpr>(IE)) { 8731 // Insert the pointer into Info to be processed by 8732 // generateInfoForComponentList. Because it is a member pointer 8733 // without a pointee, no entry will be generated for it, therefore 8734 // we need to generate one after the whole struct has been processed. 8735 // Nonetheless, generateInfoForComponentList must be called to take 8736 // the pointer into account for the calculation of the range of the 8737 // partial struct. 8738 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8739 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8740 nullptr); 8741 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8742 } else { 8743 llvm::Value *Ptr = 8744 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8745 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8746 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8747 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8748 UseDevicePtrCombinedInfo.Sizes.push_back( 8749 llvm::Constant::getNullValue(CGF.Int64Ty)); 8750 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8751 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8752 } 8753 } 8754 } 8755 8756 // Look at the use_device_addr clause information and mark the existing map 8757 // entries as such. If there is no map information for an entry in the 8758 // use_device_addr list, we create one with map type 'alloc' and zero size 8759 // section. It is the user fault if that was not mapped before. If there is 8760 // no map information and the pointer is a struct member, then we defer the 8761 // emission of that entry until the whole struct has been processed. 8762 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8763 for (const auto *Cl : Clauses) { 8764 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8765 if (!C) 8766 continue; 8767 for (const auto L : C->component_lists()) { 8768 assert(!std::get<1>(L).empty() && 8769 "Not expecting empty list of components!"); 8770 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8771 if (!Processed.insert(VD).second) 8772 continue; 8773 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8774 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8775 // If the first component is a member expression, we have to look into 8776 // 'this', which maps to null in the map of map information. Otherwise 8777 // look directly for the information. 8778 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8779 8780 // We potentially have map information for this declaration already. 8781 // Look for the first set of components that refer to it. 8782 if (It != Info.end()) { 8783 bool Found = false; 8784 for (auto &Data : It->second) { 8785 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8786 return MI.Components.back().getAssociatedDeclaration() == VD; 8787 }); 8788 // If we found a map entry, signal that the pointer has to be 8789 // returned and move on to the next declaration. 8790 if (CI != Data.end()) { 8791 CI->ReturnDevicePointer = true; 8792 Found = true; 8793 break; 8794 } 8795 } 8796 if (Found) 8797 continue; 8798 } 8799 8800 // We didn't find any match in our map information - generate a zero 8801 // size array section - if the pointer is a struct member we defer this 8802 // action until the whole struct has been processed. 8803 if (isa<MemberExpr>(IE)) { 8804 // Insert the pointer into Info to be processed by 8805 // generateInfoForComponentList. Because it is a member pointer 8806 // without a pointee, no entry will be generated for it, therefore 8807 // we need to generate one after the whole struct has been processed. 8808 // Nonetheless, generateInfoForComponentList must be called to take 8809 // the pointer into account for the calculation of the range of the 8810 // partial struct. 8811 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8812 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8813 nullptr, nullptr, /*ForDeviceAddr=*/true); 8814 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8815 } else { 8816 llvm::Value *Ptr; 8817 if (IE->isGLValue()) 8818 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8819 else 8820 Ptr = CGF.EmitScalarExpr(IE); 8821 CombinedInfo.Exprs.push_back(VD); 8822 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8823 CombinedInfo.Pointers.push_back(Ptr); 8824 CombinedInfo.Sizes.push_back( 8825 llvm::Constant::getNullValue(CGF.Int64Ty)); 8826 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8827 CombinedInfo.Mappers.push_back(nullptr); 8828 } 8829 } 8830 } 8831 8832 for (const auto &Data : Info) { 8833 StructRangeInfoTy PartialStruct; 8834 // Temporary generated information. 8835 MapCombinedInfoTy CurInfo; 8836 const Decl *D = Data.first; 8837 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8838 for (const auto &M : Data.second) { 8839 for (const MapInfo &L : M) { 8840 assert(!L.Components.empty() && 8841 "Not expecting declaration with no component lists."); 8842 8843 // Remember the current base pointer index. 8844 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8845 CurInfo.NonContigInfo.IsNonContiguous = 8846 L.Components.back().isNonContiguous(); 8847 generateInfoForComponentList( 8848 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8849 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8850 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8851 8852 // If this entry relates with a device pointer, set the relevant 8853 // declaration and add the 'return pointer' flag. 8854 if (L.ReturnDevicePointer) { 8855 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8856 "Unexpected number of mapped base pointers."); 8857 8858 const ValueDecl *RelevantVD = 8859 L.Components.back().getAssociatedDeclaration(); 8860 assert(RelevantVD && 8861 "No relevant declaration related with device pointer??"); 8862 8863 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8864 RelevantVD); 8865 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8866 } 8867 } 8868 } 8869 8870 // Append any pending zero-length pointers which are struct members and 8871 // used with use_device_ptr or use_device_addr. 8872 auto CI = DeferredInfo.find(Data.first); 8873 if (CI != DeferredInfo.end()) { 8874 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8875 llvm::Value *BasePtr; 8876 llvm::Value *Ptr; 8877 if (L.ForDeviceAddr) { 8878 if (L.IE->isGLValue()) 8879 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8880 else 8881 Ptr = this->CGF.EmitScalarExpr(L.IE); 8882 BasePtr = Ptr; 8883 // Entry is RETURN_PARAM. Also, set the placeholder value 8884 // MEMBER_OF=FFFF so that the entry is later updated with the 8885 // correct value of MEMBER_OF. 8886 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8887 } else { 8888 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8889 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8890 L.IE->getExprLoc()); 8891 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8892 // placeholder value MEMBER_OF=FFFF so that the entry is later 8893 // updated with the correct value of MEMBER_OF. 8894 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8895 OMP_MAP_MEMBER_OF); 8896 } 8897 CurInfo.Exprs.push_back(L.VD); 8898 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8899 CurInfo.Pointers.push_back(Ptr); 8900 CurInfo.Sizes.push_back( 8901 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8902 CurInfo.Mappers.push_back(nullptr); 8903 } 8904 } 8905 // If there is an entry in PartialStruct it means we have a struct with 8906 // individual members mapped. Emit an extra combined entry. 8907 if (PartialStruct.Base.isValid()) { 8908 CurInfo.NonContigInfo.Dims.push_back(0); 8909 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8910 } 8911 8912 // We need to append the results of this capture to what we already 8913 // have. 8914 CombinedInfo.append(CurInfo); 8915 } 8916 // Append data for use_device_ptr clauses. 8917 CombinedInfo.append(UseDevicePtrCombinedInfo); 8918 } 8919 8920 public: 8921 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8922 : CurDir(&Dir), CGF(CGF) { 8923 // Extract firstprivate clause information. 8924 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8925 for (const auto *D : C->varlists()) 8926 FirstPrivateDecls.try_emplace( 8927 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8928 // Extract implicit firstprivates from uses_allocators clauses. 8929 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8930 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8931 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8932 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8933 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8934 /*Implicit=*/true); 8935 else if (const auto *VD = dyn_cast<VarDecl>( 8936 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8937 ->getDecl())) 8938 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8939 } 8940 } 8941 // Extract device pointer clause information. 8942 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8943 for (auto L : C->component_lists()) 8944 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8945 // Extract map information. 8946 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8947 if (C->getMapType() != OMPC_MAP_to) 8948 continue; 8949 for (auto L : C->component_lists()) { 8950 const ValueDecl *VD = std::get<0>(L); 8951 const auto *RD = VD ? VD->getType() 8952 .getCanonicalType() 8953 .getNonReferenceType() 8954 ->getAsCXXRecordDecl() 8955 : nullptr; 8956 if (RD && RD->isLambda()) 8957 LambdasMap.try_emplace(std::get<0>(L), C); 8958 } 8959 } 8960 } 8961 8962 /// Constructor for the declare mapper directive. 8963 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8964 : CurDir(&Dir), CGF(CGF) {} 8965 8966 /// Generate code for the combined entry if we have a partially mapped struct 8967 /// and take care of the mapping flags of the arguments corresponding to 8968 /// individual struct members. 8969 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8970 MapFlagsArrayTy &CurTypes, 8971 const StructRangeInfoTy &PartialStruct, 8972 const ValueDecl *VD = nullptr, 8973 bool NotTargetParams = true) const { 8974 if (CurTypes.size() == 1 && 8975 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8976 !PartialStruct.IsArraySection) 8977 return; 8978 Address LBAddr = PartialStruct.LowestElem.second; 8979 Address HBAddr = PartialStruct.HighestElem.second; 8980 if (PartialStruct.HasCompleteRecord) { 8981 LBAddr = PartialStruct.LB; 8982 HBAddr = PartialStruct.LB; 8983 } 8984 CombinedInfo.Exprs.push_back(VD); 8985 // Base is the base of the struct 8986 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8987 // Pointer is the address of the lowest element 8988 llvm::Value *LB = LBAddr.getPointer(); 8989 CombinedInfo.Pointers.push_back(LB); 8990 // There should not be a mapper for a combined entry. 8991 CombinedInfo.Mappers.push_back(nullptr); 8992 // Size is (addr of {highest+1} element) - (addr of lowest element) 8993 llvm::Value *HB = HBAddr.getPointer(); 8994 llvm::Value *HAddr = 8995 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8996 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8997 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8998 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8999 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 9000 /*isSigned=*/false); 9001 CombinedInfo.Sizes.push_back(Size); 9002 // Map type is always TARGET_PARAM, if generate info for captures. 9003 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 9004 : OMP_MAP_TARGET_PARAM); 9005 // If any element has the present modifier, then make sure the runtime 9006 // doesn't attempt to allocate the struct. 9007 if (CurTypes.end() != 9008 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9009 return Type & OMP_MAP_PRESENT; 9010 })) 9011 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 9012 // Remove TARGET_PARAM flag from the first element 9013 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 9014 // If any element has the ompx_hold modifier, then make sure the runtime 9015 // uses the hold reference count for the struct as a whole so that it won't 9016 // be unmapped by an extra dynamic reference count decrement. Add it to all 9017 // elements as well so the runtime knows which reference count to check 9018 // when determining whether it's time for device-to-host transfers of 9019 // individual elements. 9020 if (CurTypes.end() != 9021 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9022 return Type & OMP_MAP_OMPX_HOLD; 9023 })) { 9024 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 9025 for (auto &M : CurTypes) 9026 M |= OMP_MAP_OMPX_HOLD; 9027 } 9028 9029 // All other current entries will be MEMBER_OF the combined entry 9030 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9031 // 0xFFFF in the MEMBER_OF field). 9032 OpenMPOffloadMappingFlags MemberOfFlag = 9033 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 9034 for (auto &M : CurTypes) 9035 setCorrectMemberOfFlag(M, MemberOfFlag); 9036 } 9037 9038 /// Generate all the base pointers, section pointers, sizes, map types, and 9039 /// mappers for the extracted mappable expressions (all included in \a 9040 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9041 /// pair of the relevant declaration and index where it occurs is appended to 9042 /// the device pointers info array. 9043 void generateAllInfo( 9044 MapCombinedInfoTy &CombinedInfo, 9045 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9046 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9047 assert(CurDir.is<const OMPExecutableDirective *>() && 9048 "Expect a executable directive"); 9049 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9050 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9051 } 9052 9053 /// Generate all the base pointers, section pointers, sizes, map types, and 9054 /// mappers for the extracted map clauses of user-defined mapper (all included 9055 /// in \a CombinedInfo). 9056 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9057 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9058 "Expect a declare mapper directive"); 9059 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9060 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9061 } 9062 9063 /// Emit capture info for lambdas for variables captured by reference. 9064 void generateInfoForLambdaCaptures( 9065 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9066 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9067 const auto *RD = VD->getType() 9068 .getCanonicalType() 9069 .getNonReferenceType() 9070 ->getAsCXXRecordDecl(); 9071 if (!RD || !RD->isLambda()) 9072 return; 9073 Address VDAddr = 9074 Address::deprecated(Arg, CGF.getContext().getDeclAlign(VD)); 9075 LValue VDLVal = CGF.MakeAddrLValue( 9076 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 9077 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9078 FieldDecl *ThisCapture = nullptr; 9079 RD->getCaptureFields(Captures, ThisCapture); 9080 if (ThisCapture) { 9081 LValue ThisLVal = 9082 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9083 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9084 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9085 VDLVal.getPointer(CGF)); 9086 CombinedInfo.Exprs.push_back(VD); 9087 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9088 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9089 CombinedInfo.Sizes.push_back( 9090 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9091 CGF.Int64Ty, /*isSigned=*/true)); 9092 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9093 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9094 CombinedInfo.Mappers.push_back(nullptr); 9095 } 9096 for (const LambdaCapture &LC : RD->captures()) { 9097 if (!LC.capturesVariable()) 9098 continue; 9099 const VarDecl *VD = LC.getCapturedVar(); 9100 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9101 continue; 9102 auto It = Captures.find(VD); 9103 assert(It != Captures.end() && "Found lambda capture without field."); 9104 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9105 if (LC.getCaptureKind() == LCK_ByRef) { 9106 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9107 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9108 VDLVal.getPointer(CGF)); 9109 CombinedInfo.Exprs.push_back(VD); 9110 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9111 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9112 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9113 CGF.getTypeSize( 9114 VD->getType().getCanonicalType().getNonReferenceType()), 9115 CGF.Int64Ty, /*isSigned=*/true)); 9116 } else { 9117 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9118 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9119 VDLVal.getPointer(CGF)); 9120 CombinedInfo.Exprs.push_back(VD); 9121 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9122 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9123 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9124 } 9125 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9126 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9127 CombinedInfo.Mappers.push_back(nullptr); 9128 } 9129 } 9130 9131 /// Set correct indices for lambdas captures. 9132 void adjustMemberOfForLambdaCaptures( 9133 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9134 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9135 MapFlagsArrayTy &Types) const { 9136 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9137 // Set correct member_of idx for all implicit lambda captures. 9138 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9139 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9140 continue; 9141 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9142 assert(BasePtr && "Unable to find base lambda address."); 9143 int TgtIdx = -1; 9144 for (unsigned J = I; J > 0; --J) { 9145 unsigned Idx = J - 1; 9146 if (Pointers[Idx] != BasePtr) 9147 continue; 9148 TgtIdx = Idx; 9149 break; 9150 } 9151 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9152 // All other current entries will be MEMBER_OF the combined entry 9153 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9154 // 0xFFFF in the MEMBER_OF field). 9155 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9156 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9157 } 9158 } 9159 9160 /// Generate the base pointers, section pointers, sizes, map types, and 9161 /// mappers associated to a given capture (all included in \a CombinedInfo). 9162 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9163 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9164 StructRangeInfoTy &PartialStruct) const { 9165 assert(!Cap->capturesVariableArrayType() && 9166 "Not expecting to generate map info for a variable array type!"); 9167 9168 // We need to know when we generating information for the first component 9169 const ValueDecl *VD = Cap->capturesThis() 9170 ? nullptr 9171 : Cap->getCapturedVar()->getCanonicalDecl(); 9172 9173 // for map(to: lambda): skip here, processing it in 9174 // generateDefaultMapInfo 9175 if (LambdasMap.count(VD)) 9176 return; 9177 9178 // If this declaration appears in a is_device_ptr clause we just have to 9179 // pass the pointer by value. If it is a reference to a declaration, we just 9180 // pass its value. 9181 if (DevPointersMap.count(VD)) { 9182 CombinedInfo.Exprs.push_back(VD); 9183 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9184 CombinedInfo.Pointers.push_back(Arg); 9185 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9186 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9187 /*isSigned=*/true)); 9188 CombinedInfo.Types.push_back( 9189 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9190 OMP_MAP_TARGET_PARAM); 9191 CombinedInfo.Mappers.push_back(nullptr); 9192 return; 9193 } 9194 9195 using MapData = 9196 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9197 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9198 const ValueDecl *, const Expr *>; 9199 SmallVector<MapData, 4> DeclComponentLists; 9200 assert(CurDir.is<const OMPExecutableDirective *>() && 9201 "Expect a executable directive"); 9202 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9203 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9204 const auto *EI = C->getVarRefs().begin(); 9205 for (const auto L : C->decl_component_lists(VD)) { 9206 const ValueDecl *VDecl, *Mapper; 9207 // The Expression is not correct if the mapping is implicit 9208 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9209 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9210 std::tie(VDecl, Components, Mapper) = L; 9211 assert(VDecl == VD && "We got information for the wrong declaration??"); 9212 assert(!Components.empty() && 9213 "Not expecting declaration with no component lists."); 9214 DeclComponentLists.emplace_back(Components, C->getMapType(), 9215 C->getMapTypeModifiers(), 9216 C->isImplicit(), Mapper, E); 9217 ++EI; 9218 } 9219 } 9220 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9221 const MapData &RHS) { 9222 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9223 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9224 bool HasPresent = 9225 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9226 bool HasAllocs = MapType == OMPC_MAP_alloc; 9227 MapModifiers = std::get<2>(RHS); 9228 MapType = std::get<1>(LHS); 9229 bool HasPresentR = 9230 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9231 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9232 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9233 }); 9234 9235 // Find overlapping elements (including the offset from the base element). 9236 llvm::SmallDenseMap< 9237 const MapData *, 9238 llvm::SmallVector< 9239 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9240 4> 9241 OverlappedData; 9242 size_t Count = 0; 9243 for (const MapData &L : DeclComponentLists) { 9244 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9245 OpenMPMapClauseKind MapType; 9246 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9247 bool IsImplicit; 9248 const ValueDecl *Mapper; 9249 const Expr *VarRef; 9250 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9251 L; 9252 ++Count; 9253 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9254 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9255 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9256 VarRef) = L1; 9257 auto CI = Components.rbegin(); 9258 auto CE = Components.rend(); 9259 auto SI = Components1.rbegin(); 9260 auto SE = Components1.rend(); 9261 for (; CI != CE && SI != SE; ++CI, ++SI) { 9262 if (CI->getAssociatedExpression()->getStmtClass() != 9263 SI->getAssociatedExpression()->getStmtClass()) 9264 break; 9265 // Are we dealing with different variables/fields? 9266 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9267 break; 9268 } 9269 // Found overlapping if, at least for one component, reached the head 9270 // of the components list. 9271 if (CI == CE || SI == SE) { 9272 // Ignore it if it is the same component. 9273 if (CI == CE && SI == SE) 9274 continue; 9275 const auto It = (SI == SE) ? CI : SI; 9276 // If one component is a pointer and another one is a kind of 9277 // dereference of this pointer (array subscript, section, dereference, 9278 // etc.), it is not an overlapping. 9279 // Same, if one component is a base and another component is a 9280 // dereferenced pointer memberexpr with the same base. 9281 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9282 (std::prev(It)->getAssociatedDeclaration() && 9283 std::prev(It) 9284 ->getAssociatedDeclaration() 9285 ->getType() 9286 ->isPointerType()) || 9287 (It->getAssociatedDeclaration() && 9288 It->getAssociatedDeclaration()->getType()->isPointerType() && 9289 std::next(It) != CE && std::next(It) != SE)) 9290 continue; 9291 const MapData &BaseData = CI == CE ? L : L1; 9292 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9293 SI == SE ? Components : Components1; 9294 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9295 OverlappedElements.getSecond().push_back(SubData); 9296 } 9297 } 9298 } 9299 // Sort the overlapped elements for each item. 9300 llvm::SmallVector<const FieldDecl *, 4> Layout; 9301 if (!OverlappedData.empty()) { 9302 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9303 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9304 while (BaseType != OrigType) { 9305 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9306 OrigType = BaseType->getPointeeOrArrayElementType(); 9307 } 9308 9309 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9310 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9311 else { 9312 const auto *RD = BaseType->getAsRecordDecl(); 9313 Layout.append(RD->field_begin(), RD->field_end()); 9314 } 9315 } 9316 for (auto &Pair : OverlappedData) { 9317 llvm::stable_sort( 9318 Pair.getSecond(), 9319 [&Layout]( 9320 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9321 OMPClauseMappableExprCommon::MappableExprComponentListRef 9322 Second) { 9323 auto CI = First.rbegin(); 9324 auto CE = First.rend(); 9325 auto SI = Second.rbegin(); 9326 auto SE = Second.rend(); 9327 for (; CI != CE && SI != SE; ++CI, ++SI) { 9328 if (CI->getAssociatedExpression()->getStmtClass() != 9329 SI->getAssociatedExpression()->getStmtClass()) 9330 break; 9331 // Are we dealing with different variables/fields? 9332 if (CI->getAssociatedDeclaration() != 9333 SI->getAssociatedDeclaration()) 9334 break; 9335 } 9336 9337 // Lists contain the same elements. 9338 if (CI == CE && SI == SE) 9339 return false; 9340 9341 // List with less elements is less than list with more elements. 9342 if (CI == CE || SI == SE) 9343 return CI == CE; 9344 9345 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9346 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9347 if (FD1->getParent() == FD2->getParent()) 9348 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9349 const auto *It = 9350 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9351 return FD == FD1 || FD == FD2; 9352 }); 9353 return *It == FD1; 9354 }); 9355 } 9356 9357 // Associated with a capture, because the mapping flags depend on it. 9358 // Go through all of the elements with the overlapped elements. 9359 bool IsFirstComponentList = true; 9360 for (const auto &Pair : OverlappedData) { 9361 const MapData &L = *Pair.getFirst(); 9362 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9363 OpenMPMapClauseKind MapType; 9364 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9365 bool IsImplicit; 9366 const ValueDecl *Mapper; 9367 const Expr *VarRef; 9368 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9369 L; 9370 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9371 OverlappedComponents = Pair.getSecond(); 9372 generateInfoForComponentList( 9373 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9374 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9375 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9376 IsFirstComponentList = false; 9377 } 9378 // Go through other elements without overlapped elements. 9379 for (const MapData &L : DeclComponentLists) { 9380 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9381 OpenMPMapClauseKind MapType; 9382 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9383 bool IsImplicit; 9384 const ValueDecl *Mapper; 9385 const Expr *VarRef; 9386 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9387 L; 9388 auto It = OverlappedData.find(&L); 9389 if (It == OverlappedData.end()) 9390 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9391 Components, CombinedInfo, PartialStruct, 9392 IsFirstComponentList, IsImplicit, Mapper, 9393 /*ForDeviceAddr=*/false, VD, VarRef); 9394 IsFirstComponentList = false; 9395 } 9396 } 9397 9398 /// Generate the default map information for a given capture \a CI, 9399 /// record field declaration \a RI and captured value \a CV. 9400 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9401 const FieldDecl &RI, llvm::Value *CV, 9402 MapCombinedInfoTy &CombinedInfo) const { 9403 bool IsImplicit = true; 9404 // Do the default mapping. 9405 if (CI.capturesThis()) { 9406 CombinedInfo.Exprs.push_back(nullptr); 9407 CombinedInfo.BasePointers.push_back(CV); 9408 CombinedInfo.Pointers.push_back(CV); 9409 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9410 CombinedInfo.Sizes.push_back( 9411 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9412 CGF.Int64Ty, /*isSigned=*/true)); 9413 // Default map type. 9414 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9415 } else if (CI.capturesVariableByCopy()) { 9416 const VarDecl *VD = CI.getCapturedVar(); 9417 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9418 CombinedInfo.BasePointers.push_back(CV); 9419 CombinedInfo.Pointers.push_back(CV); 9420 if (!RI.getType()->isAnyPointerType()) { 9421 // We have to signal to the runtime captures passed by value that are 9422 // not pointers. 9423 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9424 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9425 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9426 } else { 9427 // Pointers are implicitly mapped with a zero size and no flags 9428 // (other than first map that is added for all implicit maps). 9429 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9430 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9431 } 9432 auto I = FirstPrivateDecls.find(VD); 9433 if (I != FirstPrivateDecls.end()) 9434 IsImplicit = I->getSecond(); 9435 } else { 9436 assert(CI.capturesVariable() && "Expected captured reference."); 9437 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9438 QualType ElementType = PtrTy->getPointeeType(); 9439 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9440 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9441 // The default map type for a scalar/complex type is 'to' because by 9442 // default the value doesn't have to be retrieved. For an aggregate 9443 // type, the default is 'tofrom'. 9444 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9445 const VarDecl *VD = CI.getCapturedVar(); 9446 auto I = FirstPrivateDecls.find(VD); 9447 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9448 CombinedInfo.BasePointers.push_back(CV); 9449 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9450 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9451 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9452 AlignmentSource::Decl)); 9453 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9454 } else { 9455 CombinedInfo.Pointers.push_back(CV); 9456 } 9457 if (I != FirstPrivateDecls.end()) 9458 IsImplicit = I->getSecond(); 9459 } 9460 // Every default map produces a single argument which is a target parameter. 9461 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9462 9463 // Add flag stating this is an implicit map. 9464 if (IsImplicit) 9465 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9466 9467 // No user-defined mapper for default mapping. 9468 CombinedInfo.Mappers.push_back(nullptr); 9469 } 9470 }; 9471 } // anonymous namespace 9472 9473 static void emitNonContiguousDescriptor( 9474 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9475 CGOpenMPRuntime::TargetDataInfo &Info) { 9476 CodeGenModule &CGM = CGF.CGM; 9477 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9478 &NonContigInfo = CombinedInfo.NonContigInfo; 9479 9480 // Build an array of struct descriptor_dim and then assign it to 9481 // offload_args. 9482 // 9483 // struct descriptor_dim { 9484 // uint64_t offset; 9485 // uint64_t count; 9486 // uint64_t stride 9487 // }; 9488 ASTContext &C = CGF.getContext(); 9489 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9490 RecordDecl *RD; 9491 RD = C.buildImplicitRecord("descriptor_dim"); 9492 RD->startDefinition(); 9493 addFieldToRecordDecl(C, RD, Int64Ty); 9494 addFieldToRecordDecl(C, RD, Int64Ty); 9495 addFieldToRecordDecl(C, RD, Int64Ty); 9496 RD->completeDefinition(); 9497 QualType DimTy = C.getRecordType(RD); 9498 9499 enum { OffsetFD = 0, CountFD, StrideFD }; 9500 // We need two index variable here since the size of "Dims" is the same as the 9501 // size of Components, however, the size of offset, count, and stride is equal 9502 // to the size of base declaration that is non-contiguous. 9503 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9504 // Skip emitting ir if dimension size is 1 since it cannot be 9505 // non-contiguous. 9506 if (NonContigInfo.Dims[I] == 1) 9507 continue; 9508 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9509 QualType ArrayTy = 9510 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9511 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9512 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9513 unsigned RevIdx = EE - II - 1; 9514 LValue DimsLVal = CGF.MakeAddrLValue( 9515 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9516 // Offset 9517 LValue OffsetLVal = CGF.EmitLValueForField( 9518 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9519 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9520 // Count 9521 LValue CountLVal = CGF.EmitLValueForField( 9522 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9523 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9524 // Stride 9525 LValue StrideLVal = CGF.EmitLValueForField( 9526 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9527 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9528 } 9529 // args[I] = &dims 9530 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9531 DimsAddr, CGM.Int8PtrTy); 9532 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9533 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9534 Info.PointersArray, 0, I); 9535 Address PAddr = Address::deprecated(P, CGF.getPointerAlign()); 9536 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9537 ++L; 9538 } 9539 } 9540 9541 // Try to extract the base declaration from a `this->x` expression if possible. 9542 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9543 if (!E) 9544 return nullptr; 9545 9546 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9547 if (const MemberExpr *ME = 9548 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9549 return ME->getMemberDecl(); 9550 return nullptr; 9551 } 9552 9553 /// Emit a string constant containing the names of the values mapped to the 9554 /// offloading runtime library. 9555 llvm::Constant * 9556 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9557 MappableExprsHandler::MappingExprInfo &MapExprs) { 9558 9559 uint32_t SrcLocStrSize; 9560 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9561 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9562 9563 SourceLocation Loc; 9564 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9565 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9566 Loc = VD->getLocation(); 9567 else 9568 Loc = MapExprs.getMapExpr()->getExprLoc(); 9569 } else { 9570 Loc = MapExprs.getMapDecl()->getLocation(); 9571 } 9572 9573 std::string ExprName; 9574 if (MapExprs.getMapExpr()) { 9575 PrintingPolicy P(CGF.getContext().getLangOpts()); 9576 llvm::raw_string_ostream OS(ExprName); 9577 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9578 OS.flush(); 9579 } else { 9580 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9581 } 9582 9583 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9584 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9585 PLoc.getLine(), PLoc.getColumn(), 9586 SrcLocStrSize); 9587 } 9588 9589 /// Emit the arrays used to pass the captures and map information to the 9590 /// offloading runtime library. If there is no map or capture information, 9591 /// return nullptr by reference. 9592 static void emitOffloadingArrays( 9593 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9594 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9595 bool IsNonContiguous = false) { 9596 CodeGenModule &CGM = CGF.CGM; 9597 ASTContext &Ctx = CGF.getContext(); 9598 9599 // Reset the array information. 9600 Info.clearArrayInfo(); 9601 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9602 9603 if (Info.NumberOfPtrs) { 9604 // Detect if we have any capture size requiring runtime evaluation of the 9605 // size so that a constant array could be eventually used. 9606 9607 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9608 QualType PointerArrayType = Ctx.getConstantArrayType( 9609 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9610 /*IndexTypeQuals=*/0); 9611 9612 Info.BasePointersArray = 9613 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9614 Info.PointersArray = 9615 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9616 Address MappersArray = 9617 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9618 Info.MappersArray = MappersArray.getPointer(); 9619 9620 // If we don't have any VLA types or other types that require runtime 9621 // evaluation, we can use a constant array for the map sizes, otherwise we 9622 // need to fill up the arrays as we do for the pointers. 9623 QualType Int64Ty = 9624 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9625 SmallVector<llvm::Constant *> ConstSizes( 9626 CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9627 llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size()); 9628 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9629 if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) { 9630 if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) { 9631 if (IsNonContiguous && (CombinedInfo.Types[I] & 9632 MappableExprsHandler::OMP_MAP_NON_CONTIG)) 9633 ConstSizes[I] = llvm::ConstantInt::get( 9634 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]); 9635 else 9636 ConstSizes[I] = CI; 9637 continue; 9638 } 9639 } 9640 RuntimeSizes.set(I); 9641 } 9642 9643 if (RuntimeSizes.all()) { 9644 QualType SizeArrayType = Ctx.getConstantArrayType( 9645 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9646 /*IndexTypeQuals=*/0); 9647 Info.SizesArray = 9648 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9649 } else { 9650 auto *SizesArrayInit = llvm::ConstantArray::get( 9651 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9652 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9653 auto *SizesArrayGbl = new llvm::GlobalVariable( 9654 CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, 9655 llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name); 9656 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9657 if (RuntimeSizes.any()) { 9658 QualType SizeArrayType = Ctx.getConstantArrayType( 9659 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9660 /*IndexTypeQuals=*/0); 9661 Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes"); 9662 llvm::Value *GblConstPtr = 9663 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9664 SizesArrayGbl, CGM.Int64Ty->getPointerTo()); 9665 CGF.Builder.CreateMemCpy( 9666 Buffer, 9667 Address(GblConstPtr, CGM.Int64Ty, 9668 CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth( 9669 /*DestWidth=*/64, /*Signed=*/false))), 9670 CGF.getTypeSize(SizeArrayType)); 9671 Info.SizesArray = Buffer.getPointer(); 9672 } else { 9673 Info.SizesArray = SizesArrayGbl; 9674 } 9675 } 9676 9677 // The map types are always constant so we don't need to generate code to 9678 // fill arrays. Instead, we create an array constant. 9679 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9680 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9681 std::string MaptypesName = 9682 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9683 auto *MapTypesArrayGbl = 9684 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9685 Info.MapTypesArray = MapTypesArrayGbl; 9686 9687 // The information types are only built if there is debug information 9688 // requested. 9689 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9690 Info.MapNamesArray = llvm::Constant::getNullValue( 9691 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9692 } else { 9693 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9694 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9695 }; 9696 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9697 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9698 std::string MapnamesName = 9699 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9700 auto *MapNamesArrayGbl = 9701 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9702 Info.MapNamesArray = MapNamesArrayGbl; 9703 } 9704 9705 // If there's a present map type modifier, it must not be applied to the end 9706 // of a region, so generate a separate map type array in that case. 9707 if (Info.separateBeginEndCalls()) { 9708 bool EndMapTypesDiffer = false; 9709 for (uint64_t &Type : Mapping) { 9710 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9711 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9712 EndMapTypesDiffer = true; 9713 } 9714 } 9715 if (EndMapTypesDiffer) { 9716 MapTypesArrayGbl = 9717 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9718 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9719 } 9720 } 9721 9722 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9723 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9724 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9725 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9726 Info.BasePointersArray, 0, I); 9727 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9728 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9729 Address BPAddr = 9730 Address::deprecated(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9731 CGF.Builder.CreateStore(BPVal, BPAddr); 9732 9733 if (Info.requiresDevicePointerInfo()) 9734 if (const ValueDecl *DevVD = 9735 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9736 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9737 9738 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9739 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9740 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9741 Info.PointersArray, 0, I); 9742 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9743 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9744 Address PAddr = 9745 Address::deprecated(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9746 CGF.Builder.CreateStore(PVal, PAddr); 9747 9748 if (RuntimeSizes.test(I)) { 9749 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9750 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9751 Info.SizesArray, 9752 /*Idx0=*/0, 9753 /*Idx1=*/I); 9754 Address SAddr = 9755 Address::deprecated(S, Ctx.getTypeAlignInChars(Int64Ty)); 9756 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9757 CGM.Int64Ty, 9758 /*isSigned=*/true), 9759 SAddr); 9760 } 9761 9762 // Fill up the mapper array. 9763 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9764 if (CombinedInfo.Mappers[I]) { 9765 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9766 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9767 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9768 Info.HasMapper = true; 9769 } 9770 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9771 CGF.Builder.CreateStore(MFunc, MAddr); 9772 } 9773 } 9774 9775 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9776 Info.NumberOfPtrs == 0) 9777 return; 9778 9779 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9780 } 9781 9782 namespace { 9783 /// Additional arguments for emitOffloadingArraysArgument function. 9784 struct ArgumentsOptions { 9785 bool ForEndCall = false; 9786 ArgumentsOptions() = default; 9787 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9788 }; 9789 } // namespace 9790 9791 /// Emit the arguments to be passed to the runtime library based on the 9792 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9793 /// ForEndCall, emit map types to be passed for the end of the region instead of 9794 /// the beginning. 9795 static void emitOffloadingArraysArgument( 9796 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9797 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9798 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9799 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9800 const ArgumentsOptions &Options = ArgumentsOptions()) { 9801 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9802 "expected region end call to runtime only when end call is separate"); 9803 CodeGenModule &CGM = CGF.CGM; 9804 if (Info.NumberOfPtrs) { 9805 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9806 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9807 Info.BasePointersArray, 9808 /*Idx0=*/0, /*Idx1=*/0); 9809 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9810 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9811 Info.PointersArray, 9812 /*Idx0=*/0, 9813 /*Idx1=*/0); 9814 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9815 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9816 /*Idx0=*/0, /*Idx1=*/0); 9817 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9818 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9819 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9820 : Info.MapTypesArray, 9821 /*Idx0=*/0, 9822 /*Idx1=*/0); 9823 9824 // Only emit the mapper information arrays if debug information is 9825 // requested. 9826 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9827 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9828 else 9829 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9830 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9831 Info.MapNamesArray, 9832 /*Idx0=*/0, 9833 /*Idx1=*/0); 9834 // If there is no user-defined mapper, set the mapper array to nullptr to 9835 // avoid an unnecessary data privatization 9836 if (!Info.HasMapper) 9837 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9838 else 9839 MappersArrayArg = 9840 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9841 } else { 9842 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9843 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9844 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9845 MapTypesArrayArg = 9846 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9847 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9848 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9849 } 9850 } 9851 9852 /// Check for inner distribute directive. 9853 static const OMPExecutableDirective * 9854 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9855 const auto *CS = D.getInnermostCapturedStmt(); 9856 const auto *Body = 9857 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9858 const Stmt *ChildStmt = 9859 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9860 9861 if (const auto *NestedDir = 9862 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9863 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9864 switch (D.getDirectiveKind()) { 9865 case OMPD_target: 9866 if (isOpenMPDistributeDirective(DKind)) 9867 return NestedDir; 9868 if (DKind == OMPD_teams) { 9869 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9870 /*IgnoreCaptured=*/true); 9871 if (!Body) 9872 return nullptr; 9873 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9874 if (const auto *NND = 9875 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9876 DKind = NND->getDirectiveKind(); 9877 if (isOpenMPDistributeDirective(DKind)) 9878 return NND; 9879 } 9880 } 9881 return nullptr; 9882 case OMPD_target_teams: 9883 if (isOpenMPDistributeDirective(DKind)) 9884 return NestedDir; 9885 return nullptr; 9886 case OMPD_target_parallel: 9887 case OMPD_target_simd: 9888 case OMPD_target_parallel_for: 9889 case OMPD_target_parallel_for_simd: 9890 return nullptr; 9891 case OMPD_target_teams_distribute: 9892 case OMPD_target_teams_distribute_simd: 9893 case OMPD_target_teams_distribute_parallel_for: 9894 case OMPD_target_teams_distribute_parallel_for_simd: 9895 case OMPD_parallel: 9896 case OMPD_for: 9897 case OMPD_parallel_for: 9898 case OMPD_parallel_master: 9899 case OMPD_parallel_sections: 9900 case OMPD_for_simd: 9901 case OMPD_parallel_for_simd: 9902 case OMPD_cancel: 9903 case OMPD_cancellation_point: 9904 case OMPD_ordered: 9905 case OMPD_threadprivate: 9906 case OMPD_allocate: 9907 case OMPD_task: 9908 case OMPD_simd: 9909 case OMPD_tile: 9910 case OMPD_unroll: 9911 case OMPD_sections: 9912 case OMPD_section: 9913 case OMPD_single: 9914 case OMPD_master: 9915 case OMPD_critical: 9916 case OMPD_taskyield: 9917 case OMPD_barrier: 9918 case OMPD_taskwait: 9919 case OMPD_taskgroup: 9920 case OMPD_atomic: 9921 case OMPD_flush: 9922 case OMPD_depobj: 9923 case OMPD_scan: 9924 case OMPD_teams: 9925 case OMPD_target_data: 9926 case OMPD_target_exit_data: 9927 case OMPD_target_enter_data: 9928 case OMPD_distribute: 9929 case OMPD_distribute_simd: 9930 case OMPD_distribute_parallel_for: 9931 case OMPD_distribute_parallel_for_simd: 9932 case OMPD_teams_distribute: 9933 case OMPD_teams_distribute_simd: 9934 case OMPD_teams_distribute_parallel_for: 9935 case OMPD_teams_distribute_parallel_for_simd: 9936 case OMPD_target_update: 9937 case OMPD_declare_simd: 9938 case OMPD_declare_variant: 9939 case OMPD_begin_declare_variant: 9940 case OMPD_end_declare_variant: 9941 case OMPD_declare_target: 9942 case OMPD_end_declare_target: 9943 case OMPD_declare_reduction: 9944 case OMPD_declare_mapper: 9945 case OMPD_taskloop: 9946 case OMPD_taskloop_simd: 9947 case OMPD_master_taskloop: 9948 case OMPD_master_taskloop_simd: 9949 case OMPD_parallel_master_taskloop: 9950 case OMPD_parallel_master_taskloop_simd: 9951 case OMPD_requires: 9952 case OMPD_metadirective: 9953 case OMPD_unknown: 9954 default: 9955 llvm_unreachable("Unexpected directive."); 9956 } 9957 } 9958 9959 return nullptr; 9960 } 9961 9962 /// Emit the user-defined mapper function. The code generation follows the 9963 /// pattern in the example below. 9964 /// \code 9965 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9966 /// void *base, void *begin, 9967 /// int64_t size, int64_t type, 9968 /// void *name = nullptr) { 9969 /// // Allocate space for an array section first or add a base/begin for 9970 /// // pointer dereference. 9971 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9972 /// !maptype.IsDelete) 9973 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9974 /// size*sizeof(Ty), clearToFromMember(type)); 9975 /// // Map members. 9976 /// for (unsigned i = 0; i < size; i++) { 9977 /// // For each component specified by this mapper: 9978 /// for (auto c : begin[i]->all_components) { 9979 /// if (c.hasMapper()) 9980 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9981 /// c.arg_type, c.arg_name); 9982 /// else 9983 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9984 /// c.arg_begin, c.arg_size, c.arg_type, 9985 /// c.arg_name); 9986 /// } 9987 /// } 9988 /// // Delete the array section. 9989 /// if (size > 1 && maptype.IsDelete) 9990 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9991 /// size*sizeof(Ty), clearToFromMember(type)); 9992 /// } 9993 /// \endcode 9994 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9995 CodeGenFunction *CGF) { 9996 if (UDMMap.count(D) > 0) 9997 return; 9998 ASTContext &C = CGM.getContext(); 9999 QualType Ty = D->getType(); 10000 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 10001 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10002 auto *MapperVarDecl = 10003 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 10004 SourceLocation Loc = D->getLocation(); 10005 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 10006 10007 // Prepare mapper function arguments and attributes. 10008 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 10009 C.VoidPtrTy, ImplicitParamDecl::Other); 10010 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 10011 ImplicitParamDecl::Other); 10012 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 10013 C.VoidPtrTy, ImplicitParamDecl::Other); 10014 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 10015 ImplicitParamDecl::Other); 10016 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 10017 ImplicitParamDecl::Other); 10018 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 10019 ImplicitParamDecl::Other); 10020 FunctionArgList Args; 10021 Args.push_back(&HandleArg); 10022 Args.push_back(&BaseArg); 10023 Args.push_back(&BeginArg); 10024 Args.push_back(&SizeArg); 10025 Args.push_back(&TypeArg); 10026 Args.push_back(&NameArg); 10027 const CGFunctionInfo &FnInfo = 10028 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 10029 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 10030 SmallString<64> TyStr; 10031 llvm::raw_svector_ostream Out(TyStr); 10032 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 10033 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 10034 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 10035 Name, &CGM.getModule()); 10036 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 10037 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 10038 // Start the mapper function code generation. 10039 CodeGenFunction MapperCGF(CGM); 10040 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 10041 // Compute the starting and end addresses of array elements. 10042 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 10043 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 10044 C.getPointerType(Int64Ty), Loc); 10045 // Prepare common arguments for array initiation and deletion. 10046 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 10047 MapperCGF.GetAddrOfLocalVar(&HandleArg), 10048 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10049 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10050 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10051 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10052 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10053 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10054 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10055 // Convert the size in bytes into the number of array elements. 10056 Size = MapperCGF.Builder.CreateExactUDiv( 10057 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10058 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10059 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10060 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP( 10061 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size); 10062 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10063 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10064 C.getPointerType(Int64Ty), Loc); 10065 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10066 MapperCGF.GetAddrOfLocalVar(&NameArg), 10067 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10068 10069 // Emit array initiation if this is an array section and \p MapType indicates 10070 // that memory allocation is required. 10071 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10072 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10073 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10074 10075 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10076 10077 // Emit the loop header block. 10078 MapperCGF.EmitBlock(HeadBB); 10079 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10080 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10081 // Evaluate whether the initial condition is satisfied. 10082 llvm::Value *IsEmpty = 10083 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10084 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10085 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10086 10087 // Emit the loop body block. 10088 MapperCGF.EmitBlock(BodyBB); 10089 llvm::BasicBlock *LastBB = BodyBB; 10090 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10091 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10092 PtrPHI->addIncoming(PtrBegin, EntryBB); 10093 Address PtrCurrent = 10094 Address::deprecated(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 10095 .getAlignment() 10096 .alignmentOfArrayElement(ElementSize)); 10097 // Privatize the declared variable of mapper to be the current array element. 10098 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10099 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 10100 (void)Scope.Privatize(); 10101 10102 // Get map clause information. Fill up the arrays with all mapped variables. 10103 MappableExprsHandler::MapCombinedInfoTy Info; 10104 MappableExprsHandler MEHandler(*D, MapperCGF); 10105 MEHandler.generateAllInfoForMapper(Info); 10106 10107 // Call the runtime API __tgt_mapper_num_components to get the number of 10108 // pre-existing components. 10109 llvm::Value *OffloadingArgs[] = {Handle}; 10110 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10111 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10112 OMPRTL___tgt_mapper_num_components), 10113 OffloadingArgs); 10114 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10115 PreviousSize, 10116 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10117 10118 // Fill up the runtime mapper handle for all components. 10119 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10120 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10121 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10122 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10123 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10124 llvm::Value *CurSizeArg = Info.Sizes[I]; 10125 llvm::Value *CurNameArg = 10126 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10127 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10128 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10129 10130 // Extract the MEMBER_OF field from the map type. 10131 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10132 llvm::Value *MemberMapType = 10133 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10134 10135 // Combine the map type inherited from user-defined mapper with that 10136 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10137 // bits of the \a MapType, which is the input argument of the mapper 10138 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10139 // bits of MemberMapType. 10140 // [OpenMP 5.0], 1.2.6. map-type decay. 10141 // | alloc | to | from | tofrom | release | delete 10142 // ---------------------------------------------------------- 10143 // alloc | alloc | alloc | alloc | alloc | release | delete 10144 // to | alloc | to | alloc | to | release | delete 10145 // from | alloc | alloc | from | from | release | delete 10146 // tofrom | alloc | to | from | tofrom | release | delete 10147 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10148 MapType, 10149 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10150 MappableExprsHandler::OMP_MAP_FROM)); 10151 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10152 llvm::BasicBlock *AllocElseBB = 10153 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10154 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10155 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10156 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10157 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10158 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10159 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10160 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10161 MapperCGF.EmitBlock(AllocBB); 10162 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10163 MemberMapType, 10164 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10165 MappableExprsHandler::OMP_MAP_FROM))); 10166 MapperCGF.Builder.CreateBr(EndBB); 10167 MapperCGF.EmitBlock(AllocElseBB); 10168 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10169 LeftToFrom, 10170 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10171 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10172 // In case of to, clear OMP_MAP_FROM. 10173 MapperCGF.EmitBlock(ToBB); 10174 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10175 MemberMapType, 10176 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10177 MapperCGF.Builder.CreateBr(EndBB); 10178 MapperCGF.EmitBlock(ToElseBB); 10179 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10180 LeftToFrom, 10181 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10182 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10183 // In case of from, clear OMP_MAP_TO. 10184 MapperCGF.EmitBlock(FromBB); 10185 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10186 MemberMapType, 10187 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10188 // In case of tofrom, do nothing. 10189 MapperCGF.EmitBlock(EndBB); 10190 LastBB = EndBB; 10191 llvm::PHINode *CurMapType = 10192 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10193 CurMapType->addIncoming(AllocMapType, AllocBB); 10194 CurMapType->addIncoming(ToMapType, ToBB); 10195 CurMapType->addIncoming(FromMapType, FromBB); 10196 CurMapType->addIncoming(MemberMapType, ToElseBB); 10197 10198 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10199 CurSizeArg, CurMapType, CurNameArg}; 10200 if (Info.Mappers[I]) { 10201 // Call the corresponding mapper function. 10202 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10203 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10204 assert(MapperFunc && "Expect a valid mapper function is available."); 10205 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10206 } else { 10207 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10208 // data structure. 10209 MapperCGF.EmitRuntimeCall( 10210 OMPBuilder.getOrCreateRuntimeFunction( 10211 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10212 OffloadingArgs); 10213 } 10214 } 10215 10216 // Update the pointer to point to the next element that needs to be mapped, 10217 // and check whether we have mapped all elements. 10218 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType(); 10219 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10220 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10221 PtrPHI->addIncoming(PtrNext, LastBB); 10222 llvm::Value *IsDone = 10223 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10224 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10225 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10226 10227 MapperCGF.EmitBlock(ExitBB); 10228 // Emit array deletion if this is an array section and \p MapType indicates 10229 // that deletion is required. 10230 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10231 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10232 10233 // Emit the function exit block. 10234 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10235 MapperCGF.FinishFunction(); 10236 UDMMap.try_emplace(D, Fn); 10237 if (CGF) { 10238 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10239 Decls.second.push_back(D); 10240 } 10241 } 10242 10243 /// Emit the array initialization or deletion portion for user-defined mapper 10244 /// code generation. First, it evaluates whether an array section is mapped and 10245 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10246 /// true, and \a MapType indicates to not delete this array, array 10247 /// initialization code is generated. If \a IsInit is false, and \a MapType 10248 /// indicates to not this array, array deletion code is generated. 10249 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10250 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10251 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10252 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10253 bool IsInit) { 10254 StringRef Prefix = IsInit ? ".init" : ".del"; 10255 10256 // Evaluate if this is an array section. 10257 llvm::BasicBlock *BodyBB = 10258 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10259 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10260 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10261 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10262 MapType, 10263 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10264 llvm::Value *DeleteCond; 10265 llvm::Value *Cond; 10266 if (IsInit) { 10267 // base != begin? 10268 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 10269 // IsPtrAndObj? 10270 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10271 MapType, 10272 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10273 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10274 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10275 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10276 DeleteCond = MapperCGF.Builder.CreateIsNull( 10277 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10278 } else { 10279 Cond = IsArray; 10280 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10281 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10282 } 10283 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10284 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10285 10286 MapperCGF.EmitBlock(BodyBB); 10287 // Get the array size by multiplying element size and element number (i.e., \p 10288 // Size). 10289 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10290 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10291 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10292 // memory allocation/deletion purpose only. 10293 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10294 MapType, 10295 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10296 MappableExprsHandler::OMP_MAP_FROM))); 10297 MapTypeArg = MapperCGF.Builder.CreateOr( 10298 MapTypeArg, 10299 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10300 10301 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10302 // data structure. 10303 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10304 ArraySize, MapTypeArg, MapName}; 10305 MapperCGF.EmitRuntimeCall( 10306 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10307 OMPRTL___tgt_push_mapper_component), 10308 OffloadingArgs); 10309 } 10310 10311 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10312 const OMPDeclareMapperDecl *D) { 10313 auto I = UDMMap.find(D); 10314 if (I != UDMMap.end()) 10315 return I->second; 10316 emitUserDefinedMapper(D); 10317 return UDMMap.lookup(D); 10318 } 10319 10320 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10321 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10322 llvm::Value *DeviceID, 10323 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10324 const OMPLoopDirective &D)> 10325 SizeEmitter) { 10326 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10327 const OMPExecutableDirective *TD = &D; 10328 // Get nested teams distribute kind directive, if any. 10329 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10330 TD = getNestedDistributeDirective(CGM.getContext(), D); 10331 if (!TD) 10332 return; 10333 const auto *LD = cast<OMPLoopDirective>(TD); 10334 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10335 PrePostActionTy &) { 10336 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10337 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10338 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10339 CGF.EmitRuntimeCall( 10340 OMPBuilder.getOrCreateRuntimeFunction( 10341 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10342 Args); 10343 } 10344 }; 10345 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10346 } 10347 10348 void CGOpenMPRuntime::emitTargetCall( 10349 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10350 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10351 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10352 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10353 const OMPLoopDirective &D)> 10354 SizeEmitter) { 10355 if (!CGF.HaveInsertPoint()) 10356 return; 10357 10358 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice && 10359 CGM.getLangOpts().OpenMPOffloadMandatory; 10360 10361 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 10362 10363 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10364 D.hasClausesOfKind<OMPNowaitClause>(); 10365 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10366 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10367 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10368 PrePostActionTy &) { 10369 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10370 }; 10371 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10372 10373 CodeGenFunction::OMPTargetDataInfo InputInfo; 10374 llvm::Value *MapTypesArray = nullptr; 10375 llvm::Value *MapNamesArray = nullptr; 10376 // Generate code for the host fallback function. 10377 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, 10378 &CS, OffloadingMandatory](CodeGenFunction &CGF) { 10379 if (OffloadingMandatory) { 10380 CGF.Builder.CreateUnreachable(); 10381 } else { 10382 if (RequiresOuterTask) { 10383 CapturedVars.clear(); 10384 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10385 } 10386 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10387 } 10388 }; 10389 // Fill up the pointer arrays and transfer execution to the device. 10390 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray, 10391 &MapNamesArray, SizeEmitter, 10392 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10393 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10394 // Reverse offloading is not supported, so just execute on the host. 10395 FallbackGen(CGF); 10396 return; 10397 } 10398 10399 // On top of the arrays that were filled up, the target offloading call 10400 // takes as arguments the device id as well as the host pointer. The host 10401 // pointer is used by the runtime library to identify the current target 10402 // region, so it only has to be unique and not necessarily point to 10403 // anything. It could be the pointer to the outlined function that 10404 // implements the target region, but we aren't using that so that the 10405 // compiler doesn't need to keep that, and could therefore inline the host 10406 // function if proven worthwhile during optimization. 10407 10408 // From this point on, we need to have an ID of the target region defined. 10409 assert(OutlinedFnID && "Invalid outlined function ID!"); 10410 (void)OutlinedFnID; 10411 10412 // Emit device ID if any. 10413 llvm::Value *DeviceID; 10414 if (Device.getPointer()) { 10415 assert((Device.getInt() == OMPC_DEVICE_unknown || 10416 Device.getInt() == OMPC_DEVICE_device_num) && 10417 "Expected device_num modifier."); 10418 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10419 DeviceID = 10420 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10421 } else { 10422 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10423 } 10424 10425 // Emit the number of elements in the offloading arrays. 10426 llvm::Value *PointerNum = 10427 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10428 10429 // Return value of the runtime offloading call. 10430 llvm::Value *Return; 10431 10432 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10433 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10434 10435 // Source location for the ident struct 10436 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10437 10438 // Emit tripcount for the target loop-based directive. 10439 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10440 10441 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10442 // The target region is an outlined function launched by the runtime 10443 // via calls __tgt_target() or __tgt_target_teams(). 10444 // 10445 // __tgt_target() launches a target region with one team and one thread, 10446 // executing a serial region. This master thread may in turn launch 10447 // more threads within its team upon encountering a parallel region, 10448 // however, no additional teams can be launched on the device. 10449 // 10450 // __tgt_target_teams() launches a target region with one or more teams, 10451 // each with one or more threads. This call is required for target 10452 // constructs such as: 10453 // 'target teams' 10454 // 'target' / 'teams' 10455 // 'target teams distribute parallel for' 10456 // 'target parallel' 10457 // and so on. 10458 // 10459 // Note that on the host and CPU targets, the runtime implementation of 10460 // these calls simply call the outlined function without forking threads. 10461 // The outlined functions themselves have runtime calls to 10462 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10463 // the compiler in emitTeamsCall() and emitParallelCall(). 10464 // 10465 // In contrast, on the NVPTX target, the implementation of 10466 // __tgt_target_teams() launches a GPU kernel with the requested number 10467 // of teams and threads so no additional calls to the runtime are required. 10468 if (NumTeams) { 10469 // If we have NumTeams defined this means that we have an enclosed teams 10470 // region. Therefore we also expect to have NumThreads defined. These two 10471 // values should be defined in the presence of a teams directive, 10472 // regardless of having any clauses associated. If the user is using teams 10473 // but no clauses, these two values will be the default that should be 10474 // passed to the runtime library - a 32-bit integer with the value zero. 10475 assert(NumThreads && "Thread limit expression should be available along " 10476 "with number of teams."); 10477 SmallVector<llvm::Value *> OffloadingArgs = { 10478 RTLoc, 10479 DeviceID, 10480 OutlinedFnID, 10481 PointerNum, 10482 InputInfo.BasePointersArray.getPointer(), 10483 InputInfo.PointersArray.getPointer(), 10484 InputInfo.SizesArray.getPointer(), 10485 MapTypesArray, 10486 MapNamesArray, 10487 InputInfo.MappersArray.getPointer(), 10488 NumTeams, 10489 NumThreads}; 10490 if (HasNowait) { 10491 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10492 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10493 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10494 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10495 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10496 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10497 } 10498 Return = CGF.EmitRuntimeCall( 10499 OMPBuilder.getOrCreateRuntimeFunction( 10500 CGM.getModule(), HasNowait 10501 ? OMPRTL___tgt_target_teams_nowait_mapper 10502 : OMPRTL___tgt_target_teams_mapper), 10503 OffloadingArgs); 10504 } else { 10505 SmallVector<llvm::Value *> OffloadingArgs = { 10506 RTLoc, 10507 DeviceID, 10508 OutlinedFnID, 10509 PointerNum, 10510 InputInfo.BasePointersArray.getPointer(), 10511 InputInfo.PointersArray.getPointer(), 10512 InputInfo.SizesArray.getPointer(), 10513 MapTypesArray, 10514 MapNamesArray, 10515 InputInfo.MappersArray.getPointer()}; 10516 if (HasNowait) { 10517 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10518 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10519 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10520 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10521 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10522 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10523 } 10524 Return = CGF.EmitRuntimeCall( 10525 OMPBuilder.getOrCreateRuntimeFunction( 10526 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10527 : OMPRTL___tgt_target_mapper), 10528 OffloadingArgs); 10529 } 10530 10531 // Check the error code and execute the host version if required. 10532 llvm::BasicBlock *OffloadFailedBlock = 10533 CGF.createBasicBlock("omp_offload.failed"); 10534 llvm::BasicBlock *OffloadContBlock = 10535 CGF.createBasicBlock("omp_offload.cont"); 10536 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10537 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10538 10539 CGF.EmitBlock(OffloadFailedBlock); 10540 FallbackGen(CGF); 10541 10542 CGF.EmitBranch(OffloadContBlock); 10543 10544 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10545 }; 10546 10547 // Notify that the host version must be executed. 10548 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10549 FallbackGen(CGF); 10550 }; 10551 10552 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10553 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10554 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10555 // Fill up the arrays with all the captured variables. 10556 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10557 10558 // Get mappable expression information. 10559 MappableExprsHandler MEHandler(D, CGF); 10560 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10561 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10562 10563 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10564 auto *CV = CapturedVars.begin(); 10565 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10566 CE = CS.capture_end(); 10567 CI != CE; ++CI, ++RI, ++CV) { 10568 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10569 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10570 10571 // VLA sizes are passed to the outlined region by copy and do not have map 10572 // information associated. 10573 if (CI->capturesVariableArrayType()) { 10574 CurInfo.Exprs.push_back(nullptr); 10575 CurInfo.BasePointers.push_back(*CV); 10576 CurInfo.Pointers.push_back(*CV); 10577 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10578 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10579 // Copy to the device as an argument. No need to retrieve it. 10580 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10581 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10582 MappableExprsHandler::OMP_MAP_IMPLICIT); 10583 CurInfo.Mappers.push_back(nullptr); 10584 } else { 10585 // If we have any information in the map clause, we use it, otherwise we 10586 // just do a default mapping. 10587 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10588 if (!CI->capturesThis()) 10589 MappedVarSet.insert(CI->getCapturedVar()); 10590 else 10591 MappedVarSet.insert(nullptr); 10592 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10593 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10594 // Generate correct mapping for variables captured by reference in 10595 // lambdas. 10596 if (CI->capturesVariable()) 10597 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10598 CurInfo, LambdaPointers); 10599 } 10600 // We expect to have at least an element of information for this capture. 10601 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10602 "Non-existing map pointer for capture!"); 10603 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10604 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10605 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10606 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10607 "Inconsistent map information sizes!"); 10608 10609 // If there is an entry in PartialStruct it means we have a struct with 10610 // individual members mapped. Emit an extra combined entry. 10611 if (PartialStruct.Base.isValid()) { 10612 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10613 MEHandler.emitCombinedEntry( 10614 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10615 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10616 } 10617 10618 // We need to append the results of this capture to what we already have. 10619 CombinedInfo.append(CurInfo); 10620 } 10621 // Adjust MEMBER_OF flags for the lambdas captures. 10622 MEHandler.adjustMemberOfForLambdaCaptures( 10623 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10624 CombinedInfo.Types); 10625 // Map any list items in a map clause that were not captures because they 10626 // weren't referenced within the construct. 10627 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10628 10629 TargetDataInfo Info; 10630 // Fill up the arrays and create the arguments. 10631 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10632 emitOffloadingArraysArgument( 10633 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10634 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10635 {/*ForEndCall=*/false}); 10636 10637 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10638 InputInfo.BasePointersArray = 10639 Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign()); 10640 InputInfo.PointersArray = 10641 Address::deprecated(Info.PointersArray, CGM.getPointerAlign()); 10642 InputInfo.SizesArray = 10643 Address::deprecated(Info.SizesArray, CGM.getPointerAlign()); 10644 InputInfo.MappersArray = 10645 Address::deprecated(Info.MappersArray, CGM.getPointerAlign()); 10646 MapTypesArray = Info.MapTypesArray; 10647 MapNamesArray = Info.MapNamesArray; 10648 if (RequiresOuterTask) 10649 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10650 else 10651 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10652 }; 10653 10654 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10655 CodeGenFunction &CGF, PrePostActionTy &) { 10656 if (RequiresOuterTask) { 10657 CodeGenFunction::OMPTargetDataInfo InputInfo; 10658 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10659 } else { 10660 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10661 } 10662 }; 10663 10664 // If we have a target function ID it means that we need to support 10665 // offloading, otherwise, just execute on the host. We need to execute on host 10666 // regardless of the conditional in the if clause if, e.g., the user do not 10667 // specify target triples. 10668 if (OutlinedFnID) { 10669 if (IfCond) { 10670 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10671 } else { 10672 RegionCodeGenTy ThenRCG(TargetThenGen); 10673 ThenRCG(CGF); 10674 } 10675 } else { 10676 RegionCodeGenTy ElseRCG(TargetElseGen); 10677 ElseRCG(CGF); 10678 } 10679 } 10680 10681 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10682 StringRef ParentName) { 10683 if (!S) 10684 return; 10685 10686 // Codegen OMP target directives that offload compute to the device. 10687 bool RequiresDeviceCodegen = 10688 isa<OMPExecutableDirective>(S) && 10689 isOpenMPTargetExecutionDirective( 10690 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10691 10692 if (RequiresDeviceCodegen) { 10693 const auto &E = *cast<OMPExecutableDirective>(S); 10694 unsigned DeviceID; 10695 unsigned FileID; 10696 unsigned Line; 10697 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10698 FileID, Line); 10699 10700 // Is this a target region that should not be emitted as an entry point? If 10701 // so just signal we are done with this target region. 10702 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10703 ParentName, Line)) 10704 return; 10705 10706 switch (E.getDirectiveKind()) { 10707 case OMPD_target: 10708 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10709 cast<OMPTargetDirective>(E)); 10710 break; 10711 case OMPD_target_parallel: 10712 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10713 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10714 break; 10715 case OMPD_target_teams: 10716 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10717 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10718 break; 10719 case OMPD_target_teams_distribute: 10720 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10721 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10722 break; 10723 case OMPD_target_teams_distribute_simd: 10724 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10725 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10726 break; 10727 case OMPD_target_parallel_for: 10728 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10729 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10730 break; 10731 case OMPD_target_parallel_for_simd: 10732 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10733 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10734 break; 10735 case OMPD_target_simd: 10736 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10737 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10738 break; 10739 case OMPD_target_teams_distribute_parallel_for: 10740 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10741 CGM, ParentName, 10742 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10743 break; 10744 case OMPD_target_teams_distribute_parallel_for_simd: 10745 CodeGenFunction:: 10746 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10747 CGM, ParentName, 10748 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10749 break; 10750 case OMPD_parallel: 10751 case OMPD_for: 10752 case OMPD_parallel_for: 10753 case OMPD_parallel_master: 10754 case OMPD_parallel_sections: 10755 case OMPD_for_simd: 10756 case OMPD_parallel_for_simd: 10757 case OMPD_cancel: 10758 case OMPD_cancellation_point: 10759 case OMPD_ordered: 10760 case OMPD_threadprivate: 10761 case OMPD_allocate: 10762 case OMPD_task: 10763 case OMPD_simd: 10764 case OMPD_tile: 10765 case OMPD_unroll: 10766 case OMPD_sections: 10767 case OMPD_section: 10768 case OMPD_single: 10769 case OMPD_master: 10770 case OMPD_critical: 10771 case OMPD_taskyield: 10772 case OMPD_barrier: 10773 case OMPD_taskwait: 10774 case OMPD_taskgroup: 10775 case OMPD_atomic: 10776 case OMPD_flush: 10777 case OMPD_depobj: 10778 case OMPD_scan: 10779 case OMPD_teams: 10780 case OMPD_target_data: 10781 case OMPD_target_exit_data: 10782 case OMPD_target_enter_data: 10783 case OMPD_distribute: 10784 case OMPD_distribute_simd: 10785 case OMPD_distribute_parallel_for: 10786 case OMPD_distribute_parallel_for_simd: 10787 case OMPD_teams_distribute: 10788 case OMPD_teams_distribute_simd: 10789 case OMPD_teams_distribute_parallel_for: 10790 case OMPD_teams_distribute_parallel_for_simd: 10791 case OMPD_target_update: 10792 case OMPD_declare_simd: 10793 case OMPD_declare_variant: 10794 case OMPD_begin_declare_variant: 10795 case OMPD_end_declare_variant: 10796 case OMPD_declare_target: 10797 case OMPD_end_declare_target: 10798 case OMPD_declare_reduction: 10799 case OMPD_declare_mapper: 10800 case OMPD_taskloop: 10801 case OMPD_taskloop_simd: 10802 case OMPD_master_taskloop: 10803 case OMPD_master_taskloop_simd: 10804 case OMPD_parallel_master_taskloop: 10805 case OMPD_parallel_master_taskloop_simd: 10806 case OMPD_requires: 10807 case OMPD_metadirective: 10808 case OMPD_unknown: 10809 default: 10810 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10811 } 10812 return; 10813 } 10814 10815 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10816 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10817 return; 10818 10819 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10820 return; 10821 } 10822 10823 // If this is a lambda function, look into its body. 10824 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10825 S = L->getBody(); 10826 10827 // Keep looking for target regions recursively. 10828 for (const Stmt *II : S->children()) 10829 scanForTargetRegionsFunctions(II, ParentName); 10830 } 10831 10832 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10833 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10834 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10835 if (!DevTy) 10836 return false; 10837 // Do not emit device_type(nohost) functions for the host. 10838 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10839 return true; 10840 // Do not emit device_type(host) functions for the device. 10841 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10842 return true; 10843 return false; 10844 } 10845 10846 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10847 // If emitting code for the host, we do not process FD here. Instead we do 10848 // the normal code generation. 10849 if (!CGM.getLangOpts().OpenMPIsDevice) { 10850 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10851 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10852 CGM.getLangOpts().OpenMPIsDevice)) 10853 return true; 10854 return false; 10855 } 10856 10857 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10858 // Try to detect target regions in the function. 10859 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10860 StringRef Name = CGM.getMangledName(GD); 10861 scanForTargetRegionsFunctions(FD->getBody(), Name); 10862 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10863 CGM.getLangOpts().OpenMPIsDevice)) 10864 return true; 10865 } 10866 10867 // Do not to emit function if it is not marked as declare target. 10868 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10869 AlreadyEmittedTargetDecls.count(VD) == 0; 10870 } 10871 10872 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10873 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10874 CGM.getLangOpts().OpenMPIsDevice)) 10875 return true; 10876 10877 if (!CGM.getLangOpts().OpenMPIsDevice) 10878 return false; 10879 10880 // Check if there are Ctors/Dtors in this declaration and look for target 10881 // regions in it. We use the complete variant to produce the kernel name 10882 // mangling. 10883 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10884 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10885 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10886 StringRef ParentName = 10887 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10888 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10889 } 10890 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10891 StringRef ParentName = 10892 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10893 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10894 } 10895 } 10896 10897 // Do not to emit variable if it is not marked as declare target. 10898 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10899 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10900 cast<VarDecl>(GD.getDecl())); 10901 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10902 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10903 HasRequiresUnifiedSharedMemory)) { 10904 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10905 return true; 10906 } 10907 return false; 10908 } 10909 10910 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10911 llvm::Constant *Addr) { 10912 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10913 !CGM.getLangOpts().OpenMPIsDevice) 10914 return; 10915 10916 // If we have host/nohost variables, they do not need to be registered. 10917 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10918 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10919 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10920 return; 10921 10922 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10923 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10924 if (!Res) { 10925 if (CGM.getLangOpts().OpenMPIsDevice) { 10926 // Register non-target variables being emitted in device code (debug info 10927 // may cause this). 10928 StringRef VarName = CGM.getMangledName(VD); 10929 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10930 } 10931 return; 10932 } 10933 // Register declare target variables. 10934 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10935 StringRef VarName; 10936 CharUnits VarSize; 10937 llvm::GlobalValue::LinkageTypes Linkage; 10938 10939 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10940 !HasRequiresUnifiedSharedMemory) { 10941 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10942 VarName = CGM.getMangledName(VD); 10943 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10944 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10945 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10946 } else { 10947 VarSize = CharUnits::Zero(); 10948 } 10949 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10950 // Temp solution to prevent optimizations of the internal variables. 10951 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10952 // Do not create a "ref-variable" if the original is not also available 10953 // on the host. 10954 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10955 return; 10956 std::string RefName = getName({VarName, "ref"}); 10957 if (!CGM.GetGlobalValue(RefName)) { 10958 llvm::Constant *AddrRef = 10959 getOrCreateInternalVariable(Addr->getType(), RefName); 10960 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10961 GVAddrRef->setConstant(/*Val=*/true); 10962 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10963 GVAddrRef->setInitializer(Addr); 10964 CGM.addCompilerUsedGlobal(GVAddrRef); 10965 } 10966 } 10967 } else { 10968 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10969 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10970 HasRequiresUnifiedSharedMemory)) && 10971 "Declare target attribute must link or to with unified memory."); 10972 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10973 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10974 else 10975 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10976 10977 if (CGM.getLangOpts().OpenMPIsDevice) { 10978 VarName = Addr->getName(); 10979 Addr = nullptr; 10980 } else { 10981 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10982 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10983 } 10984 VarSize = CGM.getPointerSize(); 10985 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10986 } 10987 10988 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10989 VarName, Addr, VarSize, Flags, Linkage); 10990 } 10991 10992 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10993 if (isa<FunctionDecl>(GD.getDecl()) || 10994 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10995 return emitTargetFunctions(GD); 10996 10997 return emitTargetGlobalVariable(GD); 10998 } 10999 11000 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 11001 for (const VarDecl *VD : DeferredGlobalVariables) { 11002 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 11003 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 11004 if (!Res) 11005 continue; 11006 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 11007 !HasRequiresUnifiedSharedMemory) { 11008 CGM.EmitGlobal(VD); 11009 } else { 11010 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 11011 (*Res == OMPDeclareTargetDeclAttr::MT_To && 11012 HasRequiresUnifiedSharedMemory)) && 11013 "Expected link clause or to clause with unified memory."); 11014 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 11015 } 11016 } 11017 } 11018 11019 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 11020 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 11021 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 11022 " Expected target-based directive."); 11023 } 11024 11025 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 11026 for (const OMPClause *Clause : D->clauselists()) { 11027 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 11028 HasRequiresUnifiedSharedMemory = true; 11029 } else if (const auto *AC = 11030 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 11031 switch (AC->getAtomicDefaultMemOrderKind()) { 11032 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 11033 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 11034 break; 11035 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 11036 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 11037 break; 11038 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 11039 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 11040 break; 11041 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 11042 break; 11043 } 11044 } 11045 } 11046 } 11047 11048 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 11049 return RequiresAtomicOrdering; 11050 } 11051 11052 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11053 LangAS &AS) { 11054 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11055 return false; 11056 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11057 switch(A->getAllocatorType()) { 11058 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11059 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11060 // Not supported, fallback to the default mem space. 11061 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11062 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11063 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11064 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11065 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11066 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11067 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11068 AS = LangAS::Default; 11069 return true; 11070 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11071 llvm_unreachable("Expected predefined allocator for the variables with the " 11072 "static storage."); 11073 } 11074 return false; 11075 } 11076 11077 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11078 return HasRequiresUnifiedSharedMemory; 11079 } 11080 11081 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11082 CodeGenModule &CGM) 11083 : CGM(CGM) { 11084 if (CGM.getLangOpts().OpenMPIsDevice) { 11085 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11086 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11087 } 11088 } 11089 11090 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11091 if (CGM.getLangOpts().OpenMPIsDevice) 11092 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11093 } 11094 11095 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11096 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11097 return true; 11098 11099 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11100 // Do not to emit function if it is marked as declare target as it was already 11101 // emitted. 11102 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11103 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11104 if (auto *F = dyn_cast_or_null<llvm::Function>( 11105 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11106 return !F->isDeclaration(); 11107 return false; 11108 } 11109 return true; 11110 } 11111 11112 return !AlreadyEmittedTargetDecls.insert(D).second; 11113 } 11114 11115 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11116 // If we don't have entries or if we are emitting code for the device, we 11117 // don't need to do anything. 11118 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11119 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11120 (OffloadEntriesInfoManager.empty() && 11121 !HasEmittedDeclareTargetRegion && 11122 !HasEmittedTargetRegion)) 11123 return nullptr; 11124 11125 // Create and register the function that handles the requires directives. 11126 ASTContext &C = CGM.getContext(); 11127 11128 llvm::Function *RequiresRegFn; 11129 { 11130 CodeGenFunction CGF(CGM); 11131 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11132 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11133 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11134 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11135 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11136 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11137 // TODO: check for other requires clauses. 11138 // The requires directive takes effect only when a target region is 11139 // present in the compilation unit. Otherwise it is ignored and not 11140 // passed to the runtime. This avoids the runtime from throwing an error 11141 // for mismatching requires clauses across compilation units that don't 11142 // contain at least 1 target region. 11143 assert((HasEmittedTargetRegion || 11144 HasEmittedDeclareTargetRegion || 11145 !OffloadEntriesInfoManager.empty()) && 11146 "Target or declare target region expected."); 11147 if (HasRequiresUnifiedSharedMemory) 11148 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11149 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11150 CGM.getModule(), OMPRTL___tgt_register_requires), 11151 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11152 CGF.FinishFunction(); 11153 } 11154 return RequiresRegFn; 11155 } 11156 11157 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11158 const OMPExecutableDirective &D, 11159 SourceLocation Loc, 11160 llvm::Function *OutlinedFn, 11161 ArrayRef<llvm::Value *> CapturedVars) { 11162 if (!CGF.HaveInsertPoint()) 11163 return; 11164 11165 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11166 CodeGenFunction::RunCleanupsScope Scope(CGF); 11167 11168 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11169 llvm::Value *Args[] = { 11170 RTLoc, 11171 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11172 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11173 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11174 RealArgs.append(std::begin(Args), std::end(Args)); 11175 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11176 11177 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11178 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11179 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11180 } 11181 11182 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11183 const Expr *NumTeams, 11184 const Expr *ThreadLimit, 11185 SourceLocation Loc) { 11186 if (!CGF.HaveInsertPoint()) 11187 return; 11188 11189 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11190 11191 llvm::Value *NumTeamsVal = 11192 NumTeams 11193 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11194 CGF.CGM.Int32Ty, /* isSigned = */ true) 11195 : CGF.Builder.getInt32(0); 11196 11197 llvm::Value *ThreadLimitVal = 11198 ThreadLimit 11199 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11200 CGF.CGM.Int32Ty, /* isSigned = */ true) 11201 : CGF.Builder.getInt32(0); 11202 11203 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11204 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11205 ThreadLimitVal}; 11206 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11207 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11208 PushNumTeamsArgs); 11209 } 11210 11211 void CGOpenMPRuntime::emitTargetDataCalls( 11212 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11213 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11214 if (!CGF.HaveInsertPoint()) 11215 return; 11216 11217 // Action used to replace the default codegen action and turn privatization 11218 // off. 11219 PrePostActionTy NoPrivAction; 11220 11221 // Generate the code for the opening of the data environment. Capture all the 11222 // arguments of the runtime call by reference because they are used in the 11223 // closing of the region. 11224 auto &&BeginThenGen = [this, &D, Device, &Info, 11225 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11226 // Fill up the arrays with all the mapped variables. 11227 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11228 11229 // Get map clause information. 11230 MappableExprsHandler MEHandler(D, CGF); 11231 MEHandler.generateAllInfo(CombinedInfo); 11232 11233 // Fill up the arrays and create the arguments. 11234 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11235 /*IsNonContiguous=*/true); 11236 11237 llvm::Value *BasePointersArrayArg = nullptr; 11238 llvm::Value *PointersArrayArg = nullptr; 11239 llvm::Value *SizesArrayArg = nullptr; 11240 llvm::Value *MapTypesArrayArg = nullptr; 11241 llvm::Value *MapNamesArrayArg = nullptr; 11242 llvm::Value *MappersArrayArg = nullptr; 11243 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11244 SizesArrayArg, MapTypesArrayArg, 11245 MapNamesArrayArg, MappersArrayArg, Info); 11246 11247 // Emit device ID if any. 11248 llvm::Value *DeviceID = nullptr; 11249 if (Device) { 11250 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11251 CGF.Int64Ty, /*isSigned=*/true); 11252 } else { 11253 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11254 } 11255 11256 // Emit the number of elements in the offloading arrays. 11257 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11258 // 11259 // Source location for the ident struct 11260 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11261 11262 llvm::Value *OffloadingArgs[] = {RTLoc, 11263 DeviceID, 11264 PointerNum, 11265 BasePointersArrayArg, 11266 PointersArrayArg, 11267 SizesArrayArg, 11268 MapTypesArrayArg, 11269 MapNamesArrayArg, 11270 MappersArrayArg}; 11271 CGF.EmitRuntimeCall( 11272 OMPBuilder.getOrCreateRuntimeFunction( 11273 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11274 OffloadingArgs); 11275 11276 // If device pointer privatization is required, emit the body of the region 11277 // here. It will have to be duplicated: with and without privatization. 11278 if (!Info.CaptureDeviceAddrMap.empty()) 11279 CodeGen(CGF); 11280 }; 11281 11282 // Generate code for the closing of the data region. 11283 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11284 PrePostActionTy &) { 11285 assert(Info.isValid() && "Invalid data environment closing arguments."); 11286 11287 llvm::Value *BasePointersArrayArg = nullptr; 11288 llvm::Value *PointersArrayArg = nullptr; 11289 llvm::Value *SizesArrayArg = nullptr; 11290 llvm::Value *MapTypesArrayArg = nullptr; 11291 llvm::Value *MapNamesArrayArg = nullptr; 11292 llvm::Value *MappersArrayArg = nullptr; 11293 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11294 SizesArrayArg, MapTypesArrayArg, 11295 MapNamesArrayArg, MappersArrayArg, Info, 11296 {/*ForEndCall=*/true}); 11297 11298 // Emit device ID if any. 11299 llvm::Value *DeviceID = nullptr; 11300 if (Device) { 11301 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11302 CGF.Int64Ty, /*isSigned=*/true); 11303 } else { 11304 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11305 } 11306 11307 // Emit the number of elements in the offloading arrays. 11308 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11309 11310 // Source location for the ident struct 11311 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11312 11313 llvm::Value *OffloadingArgs[] = {RTLoc, 11314 DeviceID, 11315 PointerNum, 11316 BasePointersArrayArg, 11317 PointersArrayArg, 11318 SizesArrayArg, 11319 MapTypesArrayArg, 11320 MapNamesArrayArg, 11321 MappersArrayArg}; 11322 CGF.EmitRuntimeCall( 11323 OMPBuilder.getOrCreateRuntimeFunction( 11324 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11325 OffloadingArgs); 11326 }; 11327 11328 // If we need device pointer privatization, we need to emit the body of the 11329 // region with no privatization in the 'else' branch of the conditional. 11330 // Otherwise, we don't have to do anything. 11331 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11332 PrePostActionTy &) { 11333 if (!Info.CaptureDeviceAddrMap.empty()) { 11334 CodeGen.setAction(NoPrivAction); 11335 CodeGen(CGF); 11336 } 11337 }; 11338 11339 // We don't have to do anything to close the region if the if clause evaluates 11340 // to false. 11341 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11342 11343 if (IfCond) { 11344 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11345 } else { 11346 RegionCodeGenTy RCG(BeginThenGen); 11347 RCG(CGF); 11348 } 11349 11350 // If we don't require privatization of device pointers, we emit the body in 11351 // between the runtime calls. This avoids duplicating the body code. 11352 if (Info.CaptureDeviceAddrMap.empty()) { 11353 CodeGen.setAction(NoPrivAction); 11354 CodeGen(CGF); 11355 } 11356 11357 if (IfCond) { 11358 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11359 } else { 11360 RegionCodeGenTy RCG(EndThenGen); 11361 RCG(CGF); 11362 } 11363 } 11364 11365 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11366 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11367 const Expr *Device) { 11368 if (!CGF.HaveInsertPoint()) 11369 return; 11370 11371 assert((isa<OMPTargetEnterDataDirective>(D) || 11372 isa<OMPTargetExitDataDirective>(D) || 11373 isa<OMPTargetUpdateDirective>(D)) && 11374 "Expecting either target enter, exit data, or update directives."); 11375 11376 CodeGenFunction::OMPTargetDataInfo InputInfo; 11377 llvm::Value *MapTypesArray = nullptr; 11378 llvm::Value *MapNamesArray = nullptr; 11379 // Generate the code for the opening of the data environment. 11380 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11381 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11382 // Emit device ID if any. 11383 llvm::Value *DeviceID = nullptr; 11384 if (Device) { 11385 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11386 CGF.Int64Ty, /*isSigned=*/true); 11387 } else { 11388 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11389 } 11390 11391 // Emit the number of elements in the offloading arrays. 11392 llvm::Constant *PointerNum = 11393 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11394 11395 // Source location for the ident struct 11396 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11397 11398 llvm::Value *OffloadingArgs[] = {RTLoc, 11399 DeviceID, 11400 PointerNum, 11401 InputInfo.BasePointersArray.getPointer(), 11402 InputInfo.PointersArray.getPointer(), 11403 InputInfo.SizesArray.getPointer(), 11404 MapTypesArray, 11405 MapNamesArray, 11406 InputInfo.MappersArray.getPointer()}; 11407 11408 // Select the right runtime function call for each standalone 11409 // directive. 11410 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11411 RuntimeFunction RTLFn; 11412 switch (D.getDirectiveKind()) { 11413 case OMPD_target_enter_data: 11414 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11415 : OMPRTL___tgt_target_data_begin_mapper; 11416 break; 11417 case OMPD_target_exit_data: 11418 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11419 : OMPRTL___tgt_target_data_end_mapper; 11420 break; 11421 case OMPD_target_update: 11422 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11423 : OMPRTL___tgt_target_data_update_mapper; 11424 break; 11425 case OMPD_parallel: 11426 case OMPD_for: 11427 case OMPD_parallel_for: 11428 case OMPD_parallel_master: 11429 case OMPD_parallel_sections: 11430 case OMPD_for_simd: 11431 case OMPD_parallel_for_simd: 11432 case OMPD_cancel: 11433 case OMPD_cancellation_point: 11434 case OMPD_ordered: 11435 case OMPD_threadprivate: 11436 case OMPD_allocate: 11437 case OMPD_task: 11438 case OMPD_simd: 11439 case OMPD_tile: 11440 case OMPD_unroll: 11441 case OMPD_sections: 11442 case OMPD_section: 11443 case OMPD_single: 11444 case OMPD_master: 11445 case OMPD_critical: 11446 case OMPD_taskyield: 11447 case OMPD_barrier: 11448 case OMPD_taskwait: 11449 case OMPD_taskgroup: 11450 case OMPD_atomic: 11451 case OMPD_flush: 11452 case OMPD_depobj: 11453 case OMPD_scan: 11454 case OMPD_teams: 11455 case OMPD_target_data: 11456 case OMPD_distribute: 11457 case OMPD_distribute_simd: 11458 case OMPD_distribute_parallel_for: 11459 case OMPD_distribute_parallel_for_simd: 11460 case OMPD_teams_distribute: 11461 case OMPD_teams_distribute_simd: 11462 case OMPD_teams_distribute_parallel_for: 11463 case OMPD_teams_distribute_parallel_for_simd: 11464 case OMPD_declare_simd: 11465 case OMPD_declare_variant: 11466 case OMPD_begin_declare_variant: 11467 case OMPD_end_declare_variant: 11468 case OMPD_declare_target: 11469 case OMPD_end_declare_target: 11470 case OMPD_declare_reduction: 11471 case OMPD_declare_mapper: 11472 case OMPD_taskloop: 11473 case OMPD_taskloop_simd: 11474 case OMPD_master_taskloop: 11475 case OMPD_master_taskloop_simd: 11476 case OMPD_parallel_master_taskloop: 11477 case OMPD_parallel_master_taskloop_simd: 11478 case OMPD_target: 11479 case OMPD_target_simd: 11480 case OMPD_target_teams_distribute: 11481 case OMPD_target_teams_distribute_simd: 11482 case OMPD_target_teams_distribute_parallel_for: 11483 case OMPD_target_teams_distribute_parallel_for_simd: 11484 case OMPD_target_teams: 11485 case OMPD_target_parallel: 11486 case OMPD_target_parallel_for: 11487 case OMPD_target_parallel_for_simd: 11488 case OMPD_requires: 11489 case OMPD_metadirective: 11490 case OMPD_unknown: 11491 default: 11492 llvm_unreachable("Unexpected standalone target data directive."); 11493 break; 11494 } 11495 CGF.EmitRuntimeCall( 11496 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11497 OffloadingArgs); 11498 }; 11499 11500 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11501 &MapNamesArray](CodeGenFunction &CGF, 11502 PrePostActionTy &) { 11503 // Fill up the arrays with all the mapped variables. 11504 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11505 11506 // Get map clause information. 11507 MappableExprsHandler MEHandler(D, CGF); 11508 MEHandler.generateAllInfo(CombinedInfo); 11509 11510 TargetDataInfo Info; 11511 // Fill up the arrays and create the arguments. 11512 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11513 /*IsNonContiguous=*/true); 11514 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11515 D.hasClausesOfKind<OMPNowaitClause>(); 11516 emitOffloadingArraysArgument( 11517 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11518 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11519 {/*ForEndCall=*/false}); 11520 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11521 InputInfo.BasePointersArray = 11522 Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign()); 11523 InputInfo.PointersArray = 11524 Address::deprecated(Info.PointersArray, CGM.getPointerAlign()); 11525 InputInfo.SizesArray = 11526 Address::deprecated(Info.SizesArray, CGM.getPointerAlign()); 11527 InputInfo.MappersArray = 11528 Address::deprecated(Info.MappersArray, CGM.getPointerAlign()); 11529 MapTypesArray = Info.MapTypesArray; 11530 MapNamesArray = Info.MapNamesArray; 11531 if (RequiresOuterTask) 11532 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11533 else 11534 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11535 }; 11536 11537 if (IfCond) { 11538 emitIfClause(CGF, IfCond, TargetThenGen, 11539 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11540 } else { 11541 RegionCodeGenTy ThenRCG(TargetThenGen); 11542 ThenRCG(CGF); 11543 } 11544 } 11545 11546 namespace { 11547 /// Kind of parameter in a function with 'declare simd' directive. 11548 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11549 /// Attribute set of the parameter. 11550 struct ParamAttrTy { 11551 ParamKindTy Kind = Vector; 11552 llvm::APSInt StrideOrArg; 11553 llvm::APSInt Alignment; 11554 }; 11555 } // namespace 11556 11557 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11558 ArrayRef<ParamAttrTy> ParamAttrs) { 11559 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11560 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11561 // of that clause. The VLEN value must be power of 2. 11562 // In other case the notion of the function`s "characteristic data type" (CDT) 11563 // is used to compute the vector length. 11564 // CDT is defined in the following order: 11565 // a) For non-void function, the CDT is the return type. 11566 // b) If the function has any non-uniform, non-linear parameters, then the 11567 // CDT is the type of the first such parameter. 11568 // c) If the CDT determined by a) or b) above is struct, union, or class 11569 // type which is pass-by-value (except for the type that maps to the 11570 // built-in complex data type), the characteristic data type is int. 11571 // d) If none of the above three cases is applicable, the CDT is int. 11572 // The VLEN is then determined based on the CDT and the size of vector 11573 // register of that ISA for which current vector version is generated. The 11574 // VLEN is computed using the formula below: 11575 // VLEN = sizeof(vector_register) / sizeof(CDT), 11576 // where vector register size specified in section 3.2.1 Registers and the 11577 // Stack Frame of original AMD64 ABI document. 11578 QualType RetType = FD->getReturnType(); 11579 if (RetType.isNull()) 11580 return 0; 11581 ASTContext &C = FD->getASTContext(); 11582 QualType CDT; 11583 if (!RetType.isNull() && !RetType->isVoidType()) { 11584 CDT = RetType; 11585 } else { 11586 unsigned Offset = 0; 11587 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11588 if (ParamAttrs[Offset].Kind == Vector) 11589 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11590 ++Offset; 11591 } 11592 if (CDT.isNull()) { 11593 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11594 if (ParamAttrs[I + Offset].Kind == Vector) { 11595 CDT = FD->getParamDecl(I)->getType(); 11596 break; 11597 } 11598 } 11599 } 11600 } 11601 if (CDT.isNull()) 11602 CDT = C.IntTy; 11603 CDT = CDT->getCanonicalTypeUnqualified(); 11604 if (CDT->isRecordType() || CDT->isUnionType()) 11605 CDT = C.IntTy; 11606 return C.getTypeSize(CDT); 11607 } 11608 11609 static void 11610 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11611 const llvm::APSInt &VLENVal, 11612 ArrayRef<ParamAttrTy> ParamAttrs, 11613 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11614 struct ISADataTy { 11615 char ISA; 11616 unsigned VecRegSize; 11617 }; 11618 ISADataTy ISAData[] = { 11619 { 11620 'b', 128 11621 }, // SSE 11622 { 11623 'c', 256 11624 }, // AVX 11625 { 11626 'd', 256 11627 }, // AVX2 11628 { 11629 'e', 512 11630 }, // AVX512 11631 }; 11632 llvm::SmallVector<char, 2> Masked; 11633 switch (State) { 11634 case OMPDeclareSimdDeclAttr::BS_Undefined: 11635 Masked.push_back('N'); 11636 Masked.push_back('M'); 11637 break; 11638 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11639 Masked.push_back('N'); 11640 break; 11641 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11642 Masked.push_back('M'); 11643 break; 11644 } 11645 for (char Mask : Masked) { 11646 for (const ISADataTy &Data : ISAData) { 11647 SmallString<256> Buffer; 11648 llvm::raw_svector_ostream Out(Buffer); 11649 Out << "_ZGV" << Data.ISA << Mask; 11650 if (!VLENVal) { 11651 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11652 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11653 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11654 } else { 11655 Out << VLENVal; 11656 } 11657 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11658 switch (ParamAttr.Kind){ 11659 case LinearWithVarStride: 11660 Out << 's' << ParamAttr.StrideOrArg; 11661 break; 11662 case Linear: 11663 Out << 'l'; 11664 if (ParamAttr.StrideOrArg != 1) 11665 Out << ParamAttr.StrideOrArg; 11666 break; 11667 case Uniform: 11668 Out << 'u'; 11669 break; 11670 case Vector: 11671 Out << 'v'; 11672 break; 11673 } 11674 if (!!ParamAttr.Alignment) 11675 Out << 'a' << ParamAttr.Alignment; 11676 } 11677 Out << '_' << Fn->getName(); 11678 Fn->addFnAttr(Out.str()); 11679 } 11680 } 11681 } 11682 11683 // This are the Functions that are needed to mangle the name of the 11684 // vector functions generated by the compiler, according to the rules 11685 // defined in the "Vector Function ABI specifications for AArch64", 11686 // available at 11687 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11688 11689 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11690 /// 11691 /// TODO: Need to implement the behavior for reference marked with a 11692 /// var or no linear modifiers (1.b in the section). For this, we 11693 /// need to extend ParamKindTy to support the linear modifiers. 11694 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11695 QT = QT.getCanonicalType(); 11696 11697 if (QT->isVoidType()) 11698 return false; 11699 11700 if (Kind == ParamKindTy::Uniform) 11701 return false; 11702 11703 if (Kind == ParamKindTy::Linear) 11704 return false; 11705 11706 // TODO: Handle linear references with modifiers 11707 11708 if (Kind == ParamKindTy::LinearWithVarStride) 11709 return false; 11710 11711 return true; 11712 } 11713 11714 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11715 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11716 QT = QT.getCanonicalType(); 11717 unsigned Size = C.getTypeSize(QT); 11718 11719 // Only scalars and complex within 16 bytes wide set PVB to true. 11720 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11721 return false; 11722 11723 if (QT->isFloatingType()) 11724 return true; 11725 11726 if (QT->isIntegerType()) 11727 return true; 11728 11729 if (QT->isPointerType()) 11730 return true; 11731 11732 // TODO: Add support for complex types (section 3.1.2, item 2). 11733 11734 return false; 11735 } 11736 11737 /// Computes the lane size (LS) of a return type or of an input parameter, 11738 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11739 /// TODO: Add support for references, section 3.2.1, item 1. 11740 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11741 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11742 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11743 if (getAArch64PBV(PTy, C)) 11744 return C.getTypeSize(PTy); 11745 } 11746 if (getAArch64PBV(QT, C)) 11747 return C.getTypeSize(QT); 11748 11749 return C.getTypeSize(C.getUIntPtrType()); 11750 } 11751 11752 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11753 // signature of the scalar function, as defined in 3.2.2 of the 11754 // AAVFABI. 11755 static std::tuple<unsigned, unsigned, bool> 11756 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11757 QualType RetType = FD->getReturnType().getCanonicalType(); 11758 11759 ASTContext &C = FD->getASTContext(); 11760 11761 bool OutputBecomesInput = false; 11762 11763 llvm::SmallVector<unsigned, 8> Sizes; 11764 if (!RetType->isVoidType()) { 11765 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11766 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11767 OutputBecomesInput = true; 11768 } 11769 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11770 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11771 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11772 } 11773 11774 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11775 // The LS of a function parameter / return value can only be a power 11776 // of 2, starting from 8 bits, up to 128. 11777 assert(llvm::all_of(Sizes, 11778 [](unsigned Size) { 11779 return Size == 8 || Size == 16 || Size == 32 || 11780 Size == 64 || Size == 128; 11781 }) && 11782 "Invalid size"); 11783 11784 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11785 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11786 OutputBecomesInput); 11787 } 11788 11789 /// Mangle the parameter part of the vector function name according to 11790 /// their OpenMP classification. The mangling function is defined in 11791 /// section 3.5 of the AAVFABI. 11792 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11793 SmallString<256> Buffer; 11794 llvm::raw_svector_ostream Out(Buffer); 11795 for (const auto &ParamAttr : ParamAttrs) { 11796 switch (ParamAttr.Kind) { 11797 case LinearWithVarStride: 11798 Out << "ls" << ParamAttr.StrideOrArg; 11799 break; 11800 case Linear: 11801 Out << 'l'; 11802 // Don't print the step value if it is not present or if it is 11803 // equal to 1. 11804 if (ParamAttr.StrideOrArg != 1) 11805 Out << ParamAttr.StrideOrArg; 11806 break; 11807 case Uniform: 11808 Out << 'u'; 11809 break; 11810 case Vector: 11811 Out << 'v'; 11812 break; 11813 } 11814 11815 if (!!ParamAttr.Alignment) 11816 Out << 'a' << ParamAttr.Alignment; 11817 } 11818 11819 return std::string(Out.str()); 11820 } 11821 11822 // Function used to add the attribute. The parameter `VLEN` is 11823 // templated to allow the use of "x" when targeting scalable functions 11824 // for SVE. 11825 template <typename T> 11826 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11827 char ISA, StringRef ParSeq, 11828 StringRef MangledName, bool OutputBecomesInput, 11829 llvm::Function *Fn) { 11830 SmallString<256> Buffer; 11831 llvm::raw_svector_ostream Out(Buffer); 11832 Out << Prefix << ISA << LMask << VLEN; 11833 if (OutputBecomesInput) 11834 Out << "v"; 11835 Out << ParSeq << "_" << MangledName; 11836 Fn->addFnAttr(Out.str()); 11837 } 11838 11839 // Helper function to generate the Advanced SIMD names depending on 11840 // the value of the NDS when simdlen is not present. 11841 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11842 StringRef Prefix, char ISA, 11843 StringRef ParSeq, StringRef MangledName, 11844 bool OutputBecomesInput, 11845 llvm::Function *Fn) { 11846 switch (NDS) { 11847 case 8: 11848 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11849 OutputBecomesInput, Fn); 11850 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11851 OutputBecomesInput, Fn); 11852 break; 11853 case 16: 11854 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11855 OutputBecomesInput, Fn); 11856 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11857 OutputBecomesInput, Fn); 11858 break; 11859 case 32: 11860 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11861 OutputBecomesInput, Fn); 11862 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11863 OutputBecomesInput, Fn); 11864 break; 11865 case 64: 11866 case 128: 11867 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11868 OutputBecomesInput, Fn); 11869 break; 11870 default: 11871 llvm_unreachable("Scalar type is too wide."); 11872 } 11873 } 11874 11875 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11876 static void emitAArch64DeclareSimdFunction( 11877 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11878 ArrayRef<ParamAttrTy> ParamAttrs, 11879 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11880 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11881 11882 // Get basic data for building the vector signature. 11883 const auto Data = getNDSWDS(FD, ParamAttrs); 11884 const unsigned NDS = std::get<0>(Data); 11885 const unsigned WDS = std::get<1>(Data); 11886 const bool OutputBecomesInput = std::get<2>(Data); 11887 11888 // Check the values provided via `simdlen` by the user. 11889 // 1. A `simdlen(1)` doesn't produce vector signatures, 11890 if (UserVLEN == 1) { 11891 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11892 DiagnosticsEngine::Warning, 11893 "The clause simdlen(1) has no effect when targeting aarch64."); 11894 CGM.getDiags().Report(SLoc, DiagID); 11895 return; 11896 } 11897 11898 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11899 // Advanced SIMD output. 11900 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11901 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11902 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11903 "power of 2 when targeting Advanced SIMD."); 11904 CGM.getDiags().Report(SLoc, DiagID); 11905 return; 11906 } 11907 11908 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11909 // limits. 11910 if (ISA == 's' && UserVLEN != 0) { 11911 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11912 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11913 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11914 "lanes in the architectural constraints " 11915 "for SVE (min is 128-bit, max is " 11916 "2048-bit, by steps of 128-bit)"); 11917 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11918 return; 11919 } 11920 } 11921 11922 // Sort out parameter sequence. 11923 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11924 StringRef Prefix = "_ZGV"; 11925 // Generate simdlen from user input (if any). 11926 if (UserVLEN) { 11927 if (ISA == 's') { 11928 // SVE generates only a masked function. 11929 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11930 OutputBecomesInput, Fn); 11931 } else { 11932 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11933 // Advanced SIMD generates one or two functions, depending on 11934 // the `[not]inbranch` clause. 11935 switch (State) { 11936 case OMPDeclareSimdDeclAttr::BS_Undefined: 11937 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11938 OutputBecomesInput, Fn); 11939 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11940 OutputBecomesInput, Fn); 11941 break; 11942 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11943 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11944 OutputBecomesInput, Fn); 11945 break; 11946 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11947 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11948 OutputBecomesInput, Fn); 11949 break; 11950 } 11951 } 11952 } else { 11953 // If no user simdlen is provided, follow the AAVFABI rules for 11954 // generating the vector length. 11955 if (ISA == 's') { 11956 // SVE, section 3.4.1, item 1. 11957 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11958 OutputBecomesInput, Fn); 11959 } else { 11960 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11961 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11962 // two vector names depending on the use of the clause 11963 // `[not]inbranch`. 11964 switch (State) { 11965 case OMPDeclareSimdDeclAttr::BS_Undefined: 11966 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11967 OutputBecomesInput, Fn); 11968 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11969 OutputBecomesInput, Fn); 11970 break; 11971 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11972 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11973 OutputBecomesInput, Fn); 11974 break; 11975 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11976 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11977 OutputBecomesInput, Fn); 11978 break; 11979 } 11980 } 11981 } 11982 } 11983 11984 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11985 llvm::Function *Fn) { 11986 ASTContext &C = CGM.getContext(); 11987 FD = FD->getMostRecentDecl(); 11988 // Map params to their positions in function decl. 11989 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11990 if (isa<CXXMethodDecl>(FD)) 11991 ParamPositions.try_emplace(FD, 0); 11992 unsigned ParamPos = ParamPositions.size(); 11993 for (const ParmVarDecl *P : FD->parameters()) { 11994 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11995 ++ParamPos; 11996 } 11997 while (FD) { 11998 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11999 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 12000 // Mark uniform parameters. 12001 for (const Expr *E : Attr->uniforms()) { 12002 E = E->IgnoreParenImpCasts(); 12003 unsigned Pos; 12004 if (isa<CXXThisExpr>(E)) { 12005 Pos = ParamPositions[FD]; 12006 } else { 12007 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12008 ->getCanonicalDecl(); 12009 Pos = ParamPositions[PVD]; 12010 } 12011 ParamAttrs[Pos].Kind = Uniform; 12012 } 12013 // Get alignment info. 12014 auto *NI = Attr->alignments_begin(); 12015 for (const Expr *E : Attr->aligneds()) { 12016 E = E->IgnoreParenImpCasts(); 12017 unsigned Pos; 12018 QualType ParmTy; 12019 if (isa<CXXThisExpr>(E)) { 12020 Pos = ParamPositions[FD]; 12021 ParmTy = E->getType(); 12022 } else { 12023 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12024 ->getCanonicalDecl(); 12025 Pos = ParamPositions[PVD]; 12026 ParmTy = PVD->getType(); 12027 } 12028 ParamAttrs[Pos].Alignment = 12029 (*NI) 12030 ? (*NI)->EvaluateKnownConstInt(C) 12031 : llvm::APSInt::getUnsigned( 12032 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 12033 .getQuantity()); 12034 ++NI; 12035 } 12036 // Mark linear parameters. 12037 auto *SI = Attr->steps_begin(); 12038 auto *MI = Attr->modifiers_begin(); 12039 for (const Expr *E : Attr->linears()) { 12040 E = E->IgnoreParenImpCasts(); 12041 unsigned Pos; 12042 // Rescaling factor needed to compute the linear parameter 12043 // value in the mangled name. 12044 unsigned PtrRescalingFactor = 1; 12045 if (isa<CXXThisExpr>(E)) { 12046 Pos = ParamPositions[FD]; 12047 } else { 12048 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12049 ->getCanonicalDecl(); 12050 Pos = ParamPositions[PVD]; 12051 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 12052 PtrRescalingFactor = CGM.getContext() 12053 .getTypeSizeInChars(P->getPointeeType()) 12054 .getQuantity(); 12055 } 12056 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12057 ParamAttr.Kind = Linear; 12058 // Assuming a stride of 1, for `linear` without modifiers. 12059 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12060 if (*SI) { 12061 Expr::EvalResult Result; 12062 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12063 if (const auto *DRE = 12064 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12065 if (const auto *StridePVD = 12066 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 12067 ParamAttr.Kind = LinearWithVarStride; 12068 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 12069 ParamPositions[StridePVD->getCanonicalDecl()]); 12070 } 12071 } 12072 } else { 12073 ParamAttr.StrideOrArg = Result.Val.getInt(); 12074 } 12075 } 12076 // If we are using a linear clause on a pointer, we need to 12077 // rescale the value of linear_step with the byte size of the 12078 // pointee type. 12079 if (Linear == ParamAttr.Kind) 12080 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12081 ++SI; 12082 ++MI; 12083 } 12084 llvm::APSInt VLENVal; 12085 SourceLocation ExprLoc; 12086 const Expr *VLENExpr = Attr->getSimdlen(); 12087 if (VLENExpr) { 12088 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12089 ExprLoc = VLENExpr->getExprLoc(); 12090 } 12091 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12092 if (CGM.getTriple().isX86()) { 12093 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12094 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12095 unsigned VLEN = VLENVal.getExtValue(); 12096 StringRef MangledName = Fn->getName(); 12097 if (CGM.getTarget().hasFeature("sve")) 12098 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12099 MangledName, 's', 128, Fn, ExprLoc); 12100 if (CGM.getTarget().hasFeature("neon")) 12101 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12102 MangledName, 'n', 128, Fn, ExprLoc); 12103 } 12104 } 12105 FD = FD->getPreviousDecl(); 12106 } 12107 } 12108 12109 namespace { 12110 /// Cleanup action for doacross support. 12111 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12112 public: 12113 static const int DoacrossFinArgs = 2; 12114 12115 private: 12116 llvm::FunctionCallee RTLFn; 12117 llvm::Value *Args[DoacrossFinArgs]; 12118 12119 public: 12120 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12121 ArrayRef<llvm::Value *> CallArgs) 12122 : RTLFn(RTLFn) { 12123 assert(CallArgs.size() == DoacrossFinArgs); 12124 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12125 } 12126 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12127 if (!CGF.HaveInsertPoint()) 12128 return; 12129 CGF.EmitRuntimeCall(RTLFn, Args); 12130 } 12131 }; 12132 } // namespace 12133 12134 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12135 const OMPLoopDirective &D, 12136 ArrayRef<Expr *> NumIterations) { 12137 if (!CGF.HaveInsertPoint()) 12138 return; 12139 12140 ASTContext &C = CGM.getContext(); 12141 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12142 RecordDecl *RD; 12143 if (KmpDimTy.isNull()) { 12144 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12145 // kmp_int64 lo; // lower 12146 // kmp_int64 up; // upper 12147 // kmp_int64 st; // stride 12148 // }; 12149 RD = C.buildImplicitRecord("kmp_dim"); 12150 RD->startDefinition(); 12151 addFieldToRecordDecl(C, RD, Int64Ty); 12152 addFieldToRecordDecl(C, RD, Int64Ty); 12153 addFieldToRecordDecl(C, RD, Int64Ty); 12154 RD->completeDefinition(); 12155 KmpDimTy = C.getRecordType(RD); 12156 } else { 12157 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12158 } 12159 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12160 QualType ArrayTy = 12161 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12162 12163 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12164 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12165 enum { LowerFD = 0, UpperFD, StrideFD }; 12166 // Fill dims with data. 12167 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12168 LValue DimsLVal = CGF.MakeAddrLValue( 12169 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12170 // dims.upper = num_iterations; 12171 LValue UpperLVal = CGF.EmitLValueForField( 12172 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12173 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12174 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12175 Int64Ty, NumIterations[I]->getExprLoc()); 12176 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12177 // dims.stride = 1; 12178 LValue StrideLVal = CGF.EmitLValueForField( 12179 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12180 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12181 StrideLVal); 12182 } 12183 12184 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12185 // kmp_int32 num_dims, struct kmp_dim * dims); 12186 llvm::Value *Args[] = { 12187 emitUpdateLocation(CGF, D.getBeginLoc()), 12188 getThreadID(CGF, D.getBeginLoc()), 12189 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12190 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12191 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12192 CGM.VoidPtrTy)}; 12193 12194 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12195 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12196 CGF.EmitRuntimeCall(RTLFn, Args); 12197 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12198 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12199 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12200 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12201 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12202 llvm::makeArrayRef(FiniArgs)); 12203 } 12204 12205 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12206 const OMPDependClause *C) { 12207 QualType Int64Ty = 12208 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12209 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12210 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12211 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12212 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12213 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12214 const Expr *CounterVal = C->getLoopData(I); 12215 assert(CounterVal); 12216 llvm::Value *CntVal = CGF.EmitScalarConversion( 12217 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12218 CounterVal->getExprLoc()); 12219 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12220 /*Volatile=*/false, Int64Ty); 12221 } 12222 llvm::Value *Args[] = { 12223 emitUpdateLocation(CGF, C->getBeginLoc()), 12224 getThreadID(CGF, C->getBeginLoc()), 12225 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12226 llvm::FunctionCallee RTLFn; 12227 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12228 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12229 OMPRTL___kmpc_doacross_post); 12230 } else { 12231 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12232 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12233 OMPRTL___kmpc_doacross_wait); 12234 } 12235 CGF.EmitRuntimeCall(RTLFn, Args); 12236 } 12237 12238 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12239 llvm::FunctionCallee Callee, 12240 ArrayRef<llvm::Value *> Args) const { 12241 assert(Loc.isValid() && "Outlined function call location must be valid."); 12242 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12243 12244 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12245 if (Fn->doesNotThrow()) { 12246 CGF.EmitNounwindRuntimeCall(Fn, Args); 12247 return; 12248 } 12249 } 12250 CGF.EmitRuntimeCall(Callee, Args); 12251 } 12252 12253 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12254 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12255 ArrayRef<llvm::Value *> Args) const { 12256 emitCall(CGF, Loc, OutlinedFn, Args); 12257 } 12258 12259 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12260 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12261 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12262 HasEmittedDeclareTargetRegion = true; 12263 } 12264 12265 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12266 const VarDecl *NativeParam, 12267 const VarDecl *TargetParam) const { 12268 return CGF.GetAddrOfLocalVar(NativeParam); 12269 } 12270 12271 /// Return allocator value from expression, or return a null allocator (default 12272 /// when no allocator specified). 12273 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 12274 const Expr *Allocator) { 12275 llvm::Value *AllocVal; 12276 if (Allocator) { 12277 AllocVal = CGF.EmitScalarExpr(Allocator); 12278 // According to the standard, the original allocator type is a enum 12279 // (integer). Convert to pointer type, if required. 12280 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12281 CGF.getContext().VoidPtrTy, 12282 Allocator->getExprLoc()); 12283 } else { 12284 // If no allocator specified, it defaults to the null allocator. 12285 AllocVal = llvm::Constant::getNullValue( 12286 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 12287 } 12288 return AllocVal; 12289 } 12290 12291 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12292 const VarDecl *VD) { 12293 if (!VD) 12294 return Address::invalid(); 12295 Address UntiedAddr = Address::invalid(); 12296 Address UntiedRealAddr = Address::invalid(); 12297 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12298 if (It != FunctionToUntiedTaskStackMap.end()) { 12299 const UntiedLocalVarsAddressesMap &UntiedData = 12300 UntiedLocalVarsStack[It->second]; 12301 auto I = UntiedData.find(VD); 12302 if (I != UntiedData.end()) { 12303 UntiedAddr = I->second.first; 12304 UntiedRealAddr = I->second.second; 12305 } 12306 } 12307 const VarDecl *CVD = VD->getCanonicalDecl(); 12308 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12309 // Use the default allocation. 12310 if (!isAllocatableDecl(VD)) 12311 return UntiedAddr; 12312 llvm::Value *Size; 12313 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12314 if (CVD->getType()->isVariablyModifiedType()) { 12315 Size = CGF.getTypeSize(CVD->getType()); 12316 // Align the size: ((size + align - 1) / align) * align 12317 Size = CGF.Builder.CreateNUWAdd( 12318 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12319 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12320 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12321 } else { 12322 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12323 Size = CGM.getSize(Sz.alignTo(Align)); 12324 } 12325 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12326 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12327 const Expr *Allocator = AA->getAllocator(); 12328 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 12329 llvm::Value *Alignment = 12330 AA->getAlignment() 12331 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()), 12332 CGM.SizeTy, /*isSigned=*/false) 12333 : nullptr; 12334 SmallVector<llvm::Value *, 4> Args; 12335 Args.push_back(ThreadID); 12336 if (Alignment) 12337 Args.push_back(Alignment); 12338 Args.push_back(Size); 12339 Args.push_back(AllocVal); 12340 llvm::omp::RuntimeFunction FnID = 12341 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 12342 llvm::Value *Addr = CGF.EmitRuntimeCall( 12343 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 12344 getName({CVD->getName(), ".void.addr"})); 12345 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12346 CGM.getModule(), OMPRTL___kmpc_free); 12347 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12348 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12349 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12350 if (UntiedAddr.isValid()) 12351 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12352 12353 // Cleanup action for allocate support. 12354 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12355 llvm::FunctionCallee RTLFn; 12356 SourceLocation::UIntTy LocEncoding; 12357 Address Addr; 12358 const Expr *AllocExpr; 12359 12360 public: 12361 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12362 SourceLocation::UIntTy LocEncoding, Address Addr, 12363 const Expr *AllocExpr) 12364 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12365 AllocExpr(AllocExpr) {} 12366 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12367 if (!CGF.HaveInsertPoint()) 12368 return; 12369 llvm::Value *Args[3]; 12370 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12371 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12372 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12373 Addr.getPointer(), CGF.VoidPtrTy); 12374 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 12375 Args[2] = AllocVal; 12376 CGF.EmitRuntimeCall(RTLFn, Args); 12377 } 12378 }; 12379 Address VDAddr = UntiedRealAddr.isValid() 12380 ? UntiedRealAddr 12381 : Address::deprecated(Addr, Align); 12382 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12383 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12384 VDAddr, Allocator); 12385 if (UntiedRealAddr.isValid()) 12386 if (auto *Region = 12387 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12388 Region->emitUntiedSwitch(CGF); 12389 return VDAddr; 12390 } 12391 return UntiedAddr; 12392 } 12393 12394 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12395 const VarDecl *VD) const { 12396 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12397 if (It == FunctionToUntiedTaskStackMap.end()) 12398 return false; 12399 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12400 } 12401 12402 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12403 CodeGenModule &CGM, const OMPLoopDirective &S) 12404 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12405 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12406 if (!NeedToPush) 12407 return; 12408 NontemporalDeclsSet &DS = 12409 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12410 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12411 for (const Stmt *Ref : C->private_refs()) { 12412 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12413 const ValueDecl *VD; 12414 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12415 VD = DRE->getDecl(); 12416 } else { 12417 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12418 assert((ME->isImplicitCXXThis() || 12419 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12420 "Expected member of current class."); 12421 VD = ME->getMemberDecl(); 12422 } 12423 DS.insert(VD); 12424 } 12425 } 12426 } 12427 12428 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12429 if (!NeedToPush) 12430 return; 12431 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12432 } 12433 12434 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12435 CodeGenFunction &CGF, 12436 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12437 std::pair<Address, Address>> &LocalVars) 12438 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12439 if (!NeedToPush) 12440 return; 12441 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12442 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12443 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12444 } 12445 12446 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12447 if (!NeedToPush) 12448 return; 12449 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12450 } 12451 12452 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12453 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12454 12455 return llvm::any_of( 12456 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12457 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12458 } 12459 12460 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12461 const OMPExecutableDirective &S, 12462 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12463 const { 12464 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12465 // Vars in target/task regions must be excluded completely. 12466 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12467 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12468 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12469 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12470 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12471 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12472 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12473 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12474 } 12475 } 12476 // Exclude vars in private clauses. 12477 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12478 for (const Expr *Ref : C->varlists()) { 12479 if (!Ref->getType()->isScalarType()) 12480 continue; 12481 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12482 if (!DRE) 12483 continue; 12484 NeedToCheckForLPCs.insert(DRE->getDecl()); 12485 } 12486 } 12487 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12488 for (const Expr *Ref : C->varlists()) { 12489 if (!Ref->getType()->isScalarType()) 12490 continue; 12491 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12492 if (!DRE) 12493 continue; 12494 NeedToCheckForLPCs.insert(DRE->getDecl()); 12495 } 12496 } 12497 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12498 for (const Expr *Ref : C->varlists()) { 12499 if (!Ref->getType()->isScalarType()) 12500 continue; 12501 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12502 if (!DRE) 12503 continue; 12504 NeedToCheckForLPCs.insert(DRE->getDecl()); 12505 } 12506 } 12507 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12508 for (const Expr *Ref : C->varlists()) { 12509 if (!Ref->getType()->isScalarType()) 12510 continue; 12511 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12512 if (!DRE) 12513 continue; 12514 NeedToCheckForLPCs.insert(DRE->getDecl()); 12515 } 12516 } 12517 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12518 for (const Expr *Ref : C->varlists()) { 12519 if (!Ref->getType()->isScalarType()) 12520 continue; 12521 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12522 if (!DRE) 12523 continue; 12524 NeedToCheckForLPCs.insert(DRE->getDecl()); 12525 } 12526 } 12527 for (const Decl *VD : NeedToCheckForLPCs) { 12528 for (const LastprivateConditionalData &Data : 12529 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12530 if (Data.DeclToUniqueName.count(VD) > 0) { 12531 if (!Data.Disabled) 12532 NeedToAddForLPCsAsDisabled.insert(VD); 12533 break; 12534 } 12535 } 12536 } 12537 } 12538 12539 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12540 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12541 : CGM(CGF.CGM), 12542 Action((CGM.getLangOpts().OpenMP >= 50 && 12543 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12544 [](const OMPLastprivateClause *C) { 12545 return C->getKind() == 12546 OMPC_LASTPRIVATE_conditional; 12547 })) 12548 ? ActionToDo::PushAsLastprivateConditional 12549 : ActionToDo::DoNotPush) { 12550 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12551 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12552 return; 12553 assert(Action == ActionToDo::PushAsLastprivateConditional && 12554 "Expected a push action."); 12555 LastprivateConditionalData &Data = 12556 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12557 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12558 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12559 continue; 12560 12561 for (const Expr *Ref : C->varlists()) { 12562 Data.DeclToUniqueName.insert(std::make_pair( 12563 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12564 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12565 } 12566 } 12567 Data.IVLVal = IVLVal; 12568 Data.Fn = CGF.CurFn; 12569 } 12570 12571 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12572 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12573 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12574 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12575 if (CGM.getLangOpts().OpenMP < 50) 12576 return; 12577 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12578 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12579 if (!NeedToAddForLPCsAsDisabled.empty()) { 12580 Action = ActionToDo::DisableLastprivateConditional; 12581 LastprivateConditionalData &Data = 12582 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12583 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12584 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12585 Data.Fn = CGF.CurFn; 12586 Data.Disabled = true; 12587 } 12588 } 12589 12590 CGOpenMPRuntime::LastprivateConditionalRAII 12591 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12592 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12593 return LastprivateConditionalRAII(CGF, S); 12594 } 12595 12596 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12597 if (CGM.getLangOpts().OpenMP < 50) 12598 return; 12599 if (Action == ActionToDo::DisableLastprivateConditional) { 12600 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12601 "Expected list of disabled private vars."); 12602 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12603 } 12604 if (Action == ActionToDo::PushAsLastprivateConditional) { 12605 assert( 12606 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12607 "Expected list of lastprivate conditional vars."); 12608 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12609 } 12610 } 12611 12612 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12613 const VarDecl *VD) { 12614 ASTContext &C = CGM.getContext(); 12615 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12616 if (I == LastprivateConditionalToTypes.end()) 12617 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12618 QualType NewType; 12619 const FieldDecl *VDField; 12620 const FieldDecl *FiredField; 12621 LValue BaseLVal; 12622 auto VI = I->getSecond().find(VD); 12623 if (VI == I->getSecond().end()) { 12624 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12625 RD->startDefinition(); 12626 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12627 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12628 RD->completeDefinition(); 12629 NewType = C.getRecordType(RD); 12630 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12631 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12632 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12633 } else { 12634 NewType = std::get<0>(VI->getSecond()); 12635 VDField = std::get<1>(VI->getSecond()); 12636 FiredField = std::get<2>(VI->getSecond()); 12637 BaseLVal = std::get<3>(VI->getSecond()); 12638 } 12639 LValue FiredLVal = 12640 CGF.EmitLValueForField(BaseLVal, FiredField); 12641 CGF.EmitStoreOfScalar( 12642 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12643 FiredLVal); 12644 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12645 } 12646 12647 namespace { 12648 /// Checks if the lastprivate conditional variable is referenced in LHS. 12649 class LastprivateConditionalRefChecker final 12650 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12651 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12652 const Expr *FoundE = nullptr; 12653 const Decl *FoundD = nullptr; 12654 StringRef UniqueDeclName; 12655 LValue IVLVal; 12656 llvm::Function *FoundFn = nullptr; 12657 SourceLocation Loc; 12658 12659 public: 12660 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12661 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12662 llvm::reverse(LPM)) { 12663 auto It = D.DeclToUniqueName.find(E->getDecl()); 12664 if (It == D.DeclToUniqueName.end()) 12665 continue; 12666 if (D.Disabled) 12667 return false; 12668 FoundE = E; 12669 FoundD = E->getDecl()->getCanonicalDecl(); 12670 UniqueDeclName = It->second; 12671 IVLVal = D.IVLVal; 12672 FoundFn = D.Fn; 12673 break; 12674 } 12675 return FoundE == E; 12676 } 12677 bool VisitMemberExpr(const MemberExpr *E) { 12678 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12679 return false; 12680 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12681 llvm::reverse(LPM)) { 12682 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12683 if (It == D.DeclToUniqueName.end()) 12684 continue; 12685 if (D.Disabled) 12686 return false; 12687 FoundE = E; 12688 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12689 UniqueDeclName = It->second; 12690 IVLVal = D.IVLVal; 12691 FoundFn = D.Fn; 12692 break; 12693 } 12694 return FoundE == E; 12695 } 12696 bool VisitStmt(const Stmt *S) { 12697 for (const Stmt *Child : S->children()) { 12698 if (!Child) 12699 continue; 12700 if (const auto *E = dyn_cast<Expr>(Child)) 12701 if (!E->isGLValue()) 12702 continue; 12703 if (Visit(Child)) 12704 return true; 12705 } 12706 return false; 12707 } 12708 explicit LastprivateConditionalRefChecker( 12709 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12710 : LPM(LPM) {} 12711 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12712 getFoundData() const { 12713 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12714 } 12715 }; 12716 } // namespace 12717 12718 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12719 LValue IVLVal, 12720 StringRef UniqueDeclName, 12721 LValue LVal, 12722 SourceLocation Loc) { 12723 // Last updated loop counter for the lastprivate conditional var. 12724 // int<xx> last_iv = 0; 12725 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12726 llvm::Constant *LastIV = 12727 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12728 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12729 IVLVal.getAlignment().getAsAlign()); 12730 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12731 12732 // Last value of the lastprivate conditional. 12733 // decltype(priv_a) last_a; 12734 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12735 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12736 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12737 LValue LastLVal = CGF.MakeAddrLValue( 12738 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12739 12740 // Global loop counter. Required to handle inner parallel-for regions. 12741 // iv 12742 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12743 12744 // #pragma omp critical(a) 12745 // if (last_iv <= iv) { 12746 // last_iv = iv; 12747 // last_a = priv_a; 12748 // } 12749 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12750 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12751 Action.Enter(CGF); 12752 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12753 // (last_iv <= iv) ? Check if the variable is updated and store new 12754 // value in global var. 12755 llvm::Value *CmpRes; 12756 if (IVLVal.getType()->isSignedIntegerType()) { 12757 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12758 } else { 12759 assert(IVLVal.getType()->isUnsignedIntegerType() && 12760 "Loop iteration variable must be integer."); 12761 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12762 } 12763 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12764 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12765 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12766 // { 12767 CGF.EmitBlock(ThenBB); 12768 12769 // last_iv = iv; 12770 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12771 12772 // last_a = priv_a; 12773 switch (CGF.getEvaluationKind(LVal.getType())) { 12774 case TEK_Scalar: { 12775 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12776 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12777 break; 12778 } 12779 case TEK_Complex: { 12780 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12781 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12782 break; 12783 } 12784 case TEK_Aggregate: 12785 llvm_unreachable( 12786 "Aggregates are not supported in lastprivate conditional."); 12787 } 12788 // } 12789 CGF.EmitBranch(ExitBB); 12790 // There is no need to emit line number for unconditional branch. 12791 (void)ApplyDebugLocation::CreateEmpty(CGF); 12792 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12793 }; 12794 12795 if (CGM.getLangOpts().OpenMPSimd) { 12796 // Do not emit as a critical region as no parallel region could be emitted. 12797 RegionCodeGenTy ThenRCG(CodeGen); 12798 ThenRCG(CGF); 12799 } else { 12800 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12801 } 12802 } 12803 12804 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12805 const Expr *LHS) { 12806 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12807 return; 12808 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12809 if (!Checker.Visit(LHS)) 12810 return; 12811 const Expr *FoundE; 12812 const Decl *FoundD; 12813 StringRef UniqueDeclName; 12814 LValue IVLVal; 12815 llvm::Function *FoundFn; 12816 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12817 Checker.getFoundData(); 12818 if (FoundFn != CGF.CurFn) { 12819 // Special codegen for inner parallel regions. 12820 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12821 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12822 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12823 "Lastprivate conditional is not found in outer region."); 12824 QualType StructTy = std::get<0>(It->getSecond()); 12825 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12826 LValue PrivLVal = CGF.EmitLValue(FoundE); 12827 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12828 PrivLVal.getAddress(CGF), 12829 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12830 LValue BaseLVal = 12831 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12832 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12833 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12834 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12835 FiredLVal, llvm::AtomicOrdering::Unordered, 12836 /*IsVolatile=*/true, /*isInit=*/false); 12837 return; 12838 } 12839 12840 // Private address of the lastprivate conditional in the current context. 12841 // priv_a 12842 LValue LVal = CGF.EmitLValue(FoundE); 12843 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12844 FoundE->getExprLoc()); 12845 } 12846 12847 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12848 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12849 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12850 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12851 return; 12852 auto Range = llvm::reverse(LastprivateConditionalStack); 12853 auto It = llvm::find_if( 12854 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12855 if (It == Range.end() || It->Fn != CGF.CurFn) 12856 return; 12857 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12858 assert(LPCI != LastprivateConditionalToTypes.end() && 12859 "Lastprivates must be registered already."); 12860 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12861 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12862 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12863 for (const auto &Pair : It->DeclToUniqueName) { 12864 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12865 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12866 continue; 12867 auto I = LPCI->getSecond().find(Pair.first); 12868 assert(I != LPCI->getSecond().end() && 12869 "Lastprivate must be rehistered already."); 12870 // bool Cmp = priv_a.Fired != 0; 12871 LValue BaseLVal = std::get<3>(I->getSecond()); 12872 LValue FiredLVal = 12873 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12874 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12875 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12876 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12877 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12878 // if (Cmp) { 12879 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12880 CGF.EmitBlock(ThenBB); 12881 Address Addr = CGF.GetAddrOfLocalVar(VD); 12882 LValue LVal; 12883 if (VD->getType()->isReferenceType()) 12884 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12885 AlignmentSource::Decl); 12886 else 12887 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12888 AlignmentSource::Decl); 12889 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12890 D.getBeginLoc()); 12891 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12892 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12893 // } 12894 } 12895 } 12896 12897 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12898 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12899 SourceLocation Loc) { 12900 if (CGF.getLangOpts().OpenMP < 50) 12901 return; 12902 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12903 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12904 "Unknown lastprivate conditional variable."); 12905 StringRef UniqueName = It->second; 12906 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12907 // The variable was not updated in the region - exit. 12908 if (!GV) 12909 return; 12910 LValue LPLVal = CGF.MakeAddrLValue( 12911 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12912 PrivLVal.getType().getNonReferenceType()); 12913 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12914 CGF.EmitStoreOfScalar(Res, PrivLVal); 12915 } 12916 12917 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12918 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12919 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12920 llvm_unreachable("Not supported in SIMD-only mode"); 12921 } 12922 12923 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12924 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12925 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12926 llvm_unreachable("Not supported in SIMD-only mode"); 12927 } 12928 12929 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12930 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12931 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12932 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12933 bool Tied, unsigned &NumberOfParts) { 12934 llvm_unreachable("Not supported in SIMD-only mode"); 12935 } 12936 12937 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12938 SourceLocation Loc, 12939 llvm::Function *OutlinedFn, 12940 ArrayRef<llvm::Value *> CapturedVars, 12941 const Expr *IfCond, 12942 llvm::Value *NumThreads) { 12943 llvm_unreachable("Not supported in SIMD-only mode"); 12944 } 12945 12946 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12947 CodeGenFunction &CGF, StringRef CriticalName, 12948 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12949 const Expr *Hint) { 12950 llvm_unreachable("Not supported in SIMD-only mode"); 12951 } 12952 12953 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12954 const RegionCodeGenTy &MasterOpGen, 12955 SourceLocation Loc) { 12956 llvm_unreachable("Not supported in SIMD-only mode"); 12957 } 12958 12959 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12960 const RegionCodeGenTy &MasterOpGen, 12961 SourceLocation Loc, 12962 const Expr *Filter) { 12963 llvm_unreachable("Not supported in SIMD-only mode"); 12964 } 12965 12966 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12967 SourceLocation Loc) { 12968 llvm_unreachable("Not supported in SIMD-only mode"); 12969 } 12970 12971 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12972 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12973 SourceLocation Loc) { 12974 llvm_unreachable("Not supported in SIMD-only mode"); 12975 } 12976 12977 void CGOpenMPSIMDRuntime::emitSingleRegion( 12978 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12979 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12980 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12981 ArrayRef<const Expr *> AssignmentOps) { 12982 llvm_unreachable("Not supported in SIMD-only mode"); 12983 } 12984 12985 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12986 const RegionCodeGenTy &OrderedOpGen, 12987 SourceLocation Loc, 12988 bool IsThreads) { 12989 llvm_unreachable("Not supported in SIMD-only mode"); 12990 } 12991 12992 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12993 SourceLocation Loc, 12994 OpenMPDirectiveKind Kind, 12995 bool EmitChecks, 12996 bool ForceSimpleCall) { 12997 llvm_unreachable("Not supported in SIMD-only mode"); 12998 } 12999 13000 void CGOpenMPSIMDRuntime::emitForDispatchInit( 13001 CodeGenFunction &CGF, SourceLocation Loc, 13002 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 13003 bool Ordered, const DispatchRTInput &DispatchValues) { 13004 llvm_unreachable("Not supported in SIMD-only mode"); 13005 } 13006 13007 void CGOpenMPSIMDRuntime::emitForStaticInit( 13008 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 13009 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 13010 llvm_unreachable("Not supported in SIMD-only mode"); 13011 } 13012 13013 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 13014 CodeGenFunction &CGF, SourceLocation Loc, 13015 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 13016 llvm_unreachable("Not supported in SIMD-only mode"); 13017 } 13018 13019 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 13020 SourceLocation Loc, 13021 unsigned IVSize, 13022 bool IVSigned) { 13023 llvm_unreachable("Not supported in SIMD-only mode"); 13024 } 13025 13026 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 13027 SourceLocation Loc, 13028 OpenMPDirectiveKind DKind) { 13029 llvm_unreachable("Not supported in SIMD-only mode"); 13030 } 13031 13032 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 13033 SourceLocation Loc, 13034 unsigned IVSize, bool IVSigned, 13035 Address IL, Address LB, 13036 Address UB, Address ST) { 13037 llvm_unreachable("Not supported in SIMD-only mode"); 13038 } 13039 13040 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 13041 llvm::Value *NumThreads, 13042 SourceLocation Loc) { 13043 llvm_unreachable("Not supported in SIMD-only mode"); 13044 } 13045 13046 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 13047 ProcBindKind ProcBind, 13048 SourceLocation Loc) { 13049 llvm_unreachable("Not supported in SIMD-only mode"); 13050 } 13051 13052 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 13053 const VarDecl *VD, 13054 Address VDAddr, 13055 SourceLocation Loc) { 13056 llvm_unreachable("Not supported in SIMD-only mode"); 13057 } 13058 13059 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 13060 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 13061 CodeGenFunction *CGF) { 13062 llvm_unreachable("Not supported in SIMD-only mode"); 13063 } 13064 13065 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 13066 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 13067 llvm_unreachable("Not supported in SIMD-only mode"); 13068 } 13069 13070 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13071 ArrayRef<const Expr *> Vars, 13072 SourceLocation Loc, 13073 llvm::AtomicOrdering AO) { 13074 llvm_unreachable("Not supported in SIMD-only mode"); 13075 } 13076 13077 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13078 const OMPExecutableDirective &D, 13079 llvm::Function *TaskFunction, 13080 QualType SharedsTy, Address Shareds, 13081 const Expr *IfCond, 13082 const OMPTaskDataTy &Data) { 13083 llvm_unreachable("Not supported in SIMD-only mode"); 13084 } 13085 13086 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13087 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13088 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13089 const Expr *IfCond, const OMPTaskDataTy &Data) { 13090 llvm_unreachable("Not supported in SIMD-only mode"); 13091 } 13092 13093 void CGOpenMPSIMDRuntime::emitReduction( 13094 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13095 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13096 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13097 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13098 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13099 ReductionOps, Options); 13100 } 13101 13102 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13103 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13104 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13105 llvm_unreachable("Not supported in SIMD-only mode"); 13106 } 13107 13108 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13109 SourceLocation Loc, 13110 bool IsWorksharingReduction) { 13111 llvm_unreachable("Not supported in SIMD-only mode"); 13112 } 13113 13114 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13115 SourceLocation Loc, 13116 ReductionCodeGen &RCG, 13117 unsigned N) { 13118 llvm_unreachable("Not supported in SIMD-only mode"); 13119 } 13120 13121 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13122 SourceLocation Loc, 13123 llvm::Value *ReductionsPtr, 13124 LValue SharedLVal) { 13125 llvm_unreachable("Not supported in SIMD-only mode"); 13126 } 13127 13128 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13129 SourceLocation Loc, 13130 const OMPTaskDataTy &Data) { 13131 llvm_unreachable("Not supported in SIMD-only mode"); 13132 } 13133 13134 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13135 CodeGenFunction &CGF, SourceLocation Loc, 13136 OpenMPDirectiveKind CancelRegion) { 13137 llvm_unreachable("Not supported in SIMD-only mode"); 13138 } 13139 13140 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13141 SourceLocation Loc, const Expr *IfCond, 13142 OpenMPDirectiveKind CancelRegion) { 13143 llvm_unreachable("Not supported in SIMD-only mode"); 13144 } 13145 13146 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13147 const OMPExecutableDirective &D, StringRef ParentName, 13148 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13149 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13150 llvm_unreachable("Not supported in SIMD-only mode"); 13151 } 13152 13153 void CGOpenMPSIMDRuntime::emitTargetCall( 13154 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13155 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13156 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13157 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13158 const OMPLoopDirective &D)> 13159 SizeEmitter) { 13160 llvm_unreachable("Not supported in SIMD-only mode"); 13161 } 13162 13163 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13164 llvm_unreachable("Not supported in SIMD-only mode"); 13165 } 13166 13167 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13168 llvm_unreachable("Not supported in SIMD-only mode"); 13169 } 13170 13171 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13172 return false; 13173 } 13174 13175 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13176 const OMPExecutableDirective &D, 13177 SourceLocation Loc, 13178 llvm::Function *OutlinedFn, 13179 ArrayRef<llvm::Value *> CapturedVars) { 13180 llvm_unreachable("Not supported in SIMD-only mode"); 13181 } 13182 13183 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13184 const Expr *NumTeams, 13185 const Expr *ThreadLimit, 13186 SourceLocation Loc) { 13187 llvm_unreachable("Not supported in SIMD-only mode"); 13188 } 13189 13190 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13191 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13192 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13193 llvm_unreachable("Not supported in SIMD-only mode"); 13194 } 13195 13196 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13197 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13198 const Expr *Device) { 13199 llvm_unreachable("Not supported in SIMD-only mode"); 13200 } 13201 13202 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13203 const OMPLoopDirective &D, 13204 ArrayRef<Expr *> NumIterations) { 13205 llvm_unreachable("Not supported in SIMD-only mode"); 13206 } 13207 13208 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13209 const OMPDependClause *C) { 13210 llvm_unreachable("Not supported in SIMD-only mode"); 13211 } 13212 13213 const VarDecl * 13214 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13215 const VarDecl *NativeParam) const { 13216 llvm_unreachable("Not supported in SIMD-only mode"); 13217 } 13218 13219 Address 13220 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13221 const VarDecl *NativeParam, 13222 const VarDecl *TargetParam) const { 13223 llvm_unreachable("Not supported in SIMD-only mode"); 13224 } 13225