1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/StringExtras.h" 33 #include "llvm/Bitcode/BitcodeReader.h" 34 #include "llvm/IR/Constants.h" 35 #include "llvm/IR/DerivedTypes.h" 36 #include "llvm/IR/GlobalValue.h" 37 #include "llvm/IR/Value.h" 38 #include "llvm/Support/AtomicOrdering.h" 39 #include "llvm/Support/Format.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <cassert> 42 #include <numeric> 43 44 using namespace clang; 45 using namespace CodeGen; 46 using namespace llvm::omp; 47 48 namespace { 49 /// Base class for handling code generation inside OpenMP regions. 50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 51 public: 52 /// Kinds of OpenMP regions used in codegen. 53 enum CGOpenMPRegionKind { 54 /// Region with outlined function for standalone 'parallel' 55 /// directive. 56 ParallelOutlinedRegion, 57 /// Region with outlined function for standalone 'task' directive. 58 TaskOutlinedRegion, 59 /// Region for constructs that do not require function outlining, 60 /// like 'for', 'sections', 'atomic' etc. directives. 61 InlinedRegion, 62 /// Region with outlined function for standalone 'target' directive. 63 TargetRegion, 64 }; 65 66 CGOpenMPRegionInfo(const CapturedStmt &CS, 67 const CGOpenMPRegionKind RegionKind, 68 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 69 bool HasCancel) 70 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 71 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 72 73 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 75 bool HasCancel) 76 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 77 Kind(Kind), HasCancel(HasCancel) {} 78 79 /// Get a variable or parameter for storing global thread id 80 /// inside OpenMP construct. 81 virtual const VarDecl *getThreadIDVariable() const = 0; 82 83 /// Emit the captured statement body. 84 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 85 86 /// Get an LValue for the current ThreadID variable. 87 /// \return LValue for thread id variable. This LValue always has type int32*. 88 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 89 90 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 91 92 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 93 94 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 95 96 bool hasCancel() const { return HasCancel; } 97 98 static bool classof(const CGCapturedStmtInfo *Info) { 99 return Info->getKind() == CR_OpenMP; 100 } 101 102 ~CGOpenMPRegionInfo() override = default; 103 104 protected: 105 CGOpenMPRegionKind RegionKind; 106 RegionCodeGenTy CodeGen; 107 OpenMPDirectiveKind Kind; 108 bool HasCancel; 109 }; 110 111 /// API for captured statement code generation in OpenMP constructs. 112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 113 public: 114 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 115 const RegionCodeGenTy &CodeGen, 116 OpenMPDirectiveKind Kind, bool HasCancel, 117 StringRef HelperName) 118 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 119 HasCancel), 120 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 121 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 122 } 123 124 /// Get a variable or parameter for storing global thread id 125 /// inside OpenMP construct. 126 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 127 128 /// Get the name of the capture helper. 129 StringRef getHelperName() const override { return HelperName; } 130 131 static bool classof(const CGCapturedStmtInfo *Info) { 132 return CGOpenMPRegionInfo::classof(Info) && 133 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 134 ParallelOutlinedRegion; 135 } 136 137 private: 138 /// A variable or parameter storing global thread id for OpenMP 139 /// constructs. 140 const VarDecl *ThreadIDVar; 141 StringRef HelperName; 142 }; 143 144 /// API for captured statement code generation in OpenMP constructs. 145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 146 public: 147 class UntiedTaskActionTy final : public PrePostActionTy { 148 bool Untied; 149 const VarDecl *PartIDVar; 150 const RegionCodeGenTy UntiedCodeGen; 151 llvm::SwitchInst *UntiedSwitch = nullptr; 152 153 public: 154 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 155 const RegionCodeGenTy &UntiedCodeGen) 156 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 157 void Enter(CodeGenFunction &CGF) override { 158 if (Untied) { 159 // Emit task switching point. 160 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 161 CGF.GetAddrOfLocalVar(PartIDVar), 162 PartIDVar->getType()->castAs<PointerType>()); 163 llvm::Value *Res = 164 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 165 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 166 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 167 CGF.EmitBlock(DoneBB); 168 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 169 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 170 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 171 CGF.Builder.GetInsertBlock()); 172 emitUntiedSwitch(CGF); 173 } 174 } 175 void emitUntiedSwitch(CodeGenFunction &CGF) const { 176 if (Untied) { 177 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 178 CGF.GetAddrOfLocalVar(PartIDVar), 179 PartIDVar->getType()->castAs<PointerType>()); 180 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 181 PartIdLVal); 182 UntiedCodeGen(CGF); 183 CodeGenFunction::JumpDest CurPoint = 184 CGF.getJumpDestInCurrentScope(".untied.next."); 185 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 186 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 187 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 188 CGF.Builder.GetInsertBlock()); 189 CGF.EmitBranchThroughCleanup(CurPoint); 190 CGF.EmitBlock(CurPoint.getBlock()); 191 } 192 } 193 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 194 }; 195 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 196 const VarDecl *ThreadIDVar, 197 const RegionCodeGenTy &CodeGen, 198 OpenMPDirectiveKind Kind, bool HasCancel, 199 const UntiedTaskActionTy &Action) 200 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 201 ThreadIDVar(ThreadIDVar), Action(Action) { 202 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 203 } 204 205 /// Get a variable or parameter for storing global thread id 206 /// inside OpenMP construct. 207 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 208 209 /// Get an LValue for the current ThreadID variable. 210 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 211 212 /// Get the name of the capture helper. 213 StringRef getHelperName() const override { return ".omp_outlined."; } 214 215 void emitUntiedSwitch(CodeGenFunction &CGF) override { 216 Action.emitUntiedSwitch(CGF); 217 } 218 219 static bool classof(const CGCapturedStmtInfo *Info) { 220 return CGOpenMPRegionInfo::classof(Info) && 221 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 222 TaskOutlinedRegion; 223 } 224 225 private: 226 /// A variable or parameter storing global thread id for OpenMP 227 /// constructs. 228 const VarDecl *ThreadIDVar; 229 /// Action for emitting code for untied tasks. 230 const UntiedTaskActionTy &Action; 231 }; 232 233 /// API for inlined captured statement code generation in OpenMP 234 /// constructs. 235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 236 public: 237 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 238 const RegionCodeGenTy &CodeGen, 239 OpenMPDirectiveKind Kind, bool HasCancel) 240 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 241 OldCSI(OldCSI), 242 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 243 244 // Retrieve the value of the context parameter. 245 llvm::Value *getContextValue() const override { 246 if (OuterRegionInfo) 247 return OuterRegionInfo->getContextValue(); 248 llvm_unreachable("No context value for inlined OpenMP region"); 249 } 250 251 void setContextValue(llvm::Value *V) override { 252 if (OuterRegionInfo) { 253 OuterRegionInfo->setContextValue(V); 254 return; 255 } 256 llvm_unreachable("No context value for inlined OpenMP region"); 257 } 258 259 /// Lookup the captured field decl for a variable. 260 const FieldDecl *lookup(const VarDecl *VD) const override { 261 if (OuterRegionInfo) 262 return OuterRegionInfo->lookup(VD); 263 // If there is no outer outlined region,no need to lookup in a list of 264 // captured variables, we can use the original one. 265 return nullptr; 266 } 267 268 FieldDecl *getThisFieldDecl() const override { 269 if (OuterRegionInfo) 270 return OuterRegionInfo->getThisFieldDecl(); 271 return nullptr; 272 } 273 274 /// Get a variable or parameter for storing global thread id 275 /// inside OpenMP construct. 276 const VarDecl *getThreadIDVariable() const override { 277 if (OuterRegionInfo) 278 return OuterRegionInfo->getThreadIDVariable(); 279 return nullptr; 280 } 281 282 /// Get an LValue for the current ThreadID variable. 283 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 284 if (OuterRegionInfo) 285 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 286 llvm_unreachable("No LValue for inlined OpenMP construct"); 287 } 288 289 /// Get the name of the capture helper. 290 StringRef getHelperName() const override { 291 if (auto *OuterRegionInfo = getOldCSI()) 292 return OuterRegionInfo->getHelperName(); 293 llvm_unreachable("No helper name for inlined OpenMP construct"); 294 } 295 296 void emitUntiedSwitch(CodeGenFunction &CGF) override { 297 if (OuterRegionInfo) 298 OuterRegionInfo->emitUntiedSwitch(CGF); 299 } 300 301 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 302 303 static bool classof(const CGCapturedStmtInfo *Info) { 304 return CGOpenMPRegionInfo::classof(Info) && 305 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 306 } 307 308 ~CGOpenMPInlinedRegionInfo() override = default; 309 310 private: 311 /// CodeGen info about outer OpenMP region. 312 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 313 CGOpenMPRegionInfo *OuterRegionInfo; 314 }; 315 316 /// API for captured statement code generation in OpenMP target 317 /// constructs. For this captures, implicit parameters are used instead of the 318 /// captured fields. The name of the target region has to be unique in a given 319 /// application so it is provided by the client, because only the client has 320 /// the information to generate that. 321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 322 public: 323 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 324 const RegionCodeGenTy &CodeGen, StringRef HelperName) 325 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 326 /*HasCancel=*/false), 327 HelperName(HelperName) {} 328 329 /// This is unused for target regions because each starts executing 330 /// with a single thread. 331 const VarDecl *getThreadIDVariable() const override { return nullptr; } 332 333 /// Get the name of the capture helper. 334 StringRef getHelperName() const override { return HelperName; } 335 336 static bool classof(const CGCapturedStmtInfo *Info) { 337 return CGOpenMPRegionInfo::classof(Info) && 338 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 339 } 340 341 private: 342 StringRef HelperName; 343 }; 344 345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 346 llvm_unreachable("No codegen for expressions"); 347 } 348 /// API for generation of expressions captured in a innermost OpenMP 349 /// region. 350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 351 public: 352 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 353 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 354 OMPD_unknown, 355 /*HasCancel=*/false), 356 PrivScope(CGF) { 357 // Make sure the globals captured in the provided statement are local by 358 // using the privatization logic. We assume the same variable is not 359 // captured more than once. 360 for (const auto &C : CS.captures()) { 361 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 362 continue; 363 364 const VarDecl *VD = C.getCapturedVar(); 365 if (VD->isLocalVarDeclOrParm()) 366 continue; 367 368 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 369 /*RefersToEnclosingVariableOrCapture=*/false, 370 VD->getType().getNonReferenceType(), VK_LValue, 371 C.getLocation()); 372 PrivScope.addPrivate( 373 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 374 } 375 (void)PrivScope.Privatize(); 376 } 377 378 /// Lookup the captured field decl for a variable. 379 const FieldDecl *lookup(const VarDecl *VD) const override { 380 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 381 return FD; 382 return nullptr; 383 } 384 385 /// Emit the captured statement body. 386 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 387 llvm_unreachable("No body for expressions"); 388 } 389 390 /// Get a variable or parameter for storing global thread id 391 /// inside OpenMP construct. 392 const VarDecl *getThreadIDVariable() const override { 393 llvm_unreachable("No thread id for expressions"); 394 } 395 396 /// Get the name of the capture helper. 397 StringRef getHelperName() const override { 398 llvm_unreachable("No helper name for expressions"); 399 } 400 401 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 402 403 private: 404 /// Private scope to capture global variables. 405 CodeGenFunction::OMPPrivateScope PrivScope; 406 }; 407 408 /// RAII for emitting code of OpenMP constructs. 409 class InlinedOpenMPRegionRAII { 410 CodeGenFunction &CGF; 411 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 412 FieldDecl *LambdaThisCaptureField = nullptr; 413 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 414 bool NoInheritance = false; 415 416 public: 417 /// Constructs region for combined constructs. 418 /// \param CodeGen Code generation sequence for combined directives. Includes 419 /// a list of functions used for code generation of implicitly inlined 420 /// regions. 421 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 422 OpenMPDirectiveKind Kind, bool HasCancel, 423 bool NoInheritance = true) 424 : CGF(CGF), NoInheritance(NoInheritance) { 425 // Start emission for the construct. 426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 428 if (NoInheritance) { 429 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 430 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 431 CGF.LambdaThisCaptureField = nullptr; 432 BlockInfo = CGF.BlockInfo; 433 CGF.BlockInfo = nullptr; 434 } 435 } 436 437 ~InlinedOpenMPRegionRAII() { 438 // Restore original CapturedStmtInfo only if we're done with code emission. 439 auto *OldCSI = 440 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 441 delete CGF.CapturedStmtInfo; 442 CGF.CapturedStmtInfo = OldCSI; 443 if (NoInheritance) { 444 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 445 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 446 CGF.BlockInfo = BlockInfo; 447 } 448 } 449 }; 450 451 /// Values for bit flags used in the ident_t to describe the fields. 452 /// All enumeric elements are named and described in accordance with the code 453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 454 enum OpenMPLocationFlags : unsigned { 455 /// Use trampoline for internal microtask. 456 OMP_IDENT_IMD = 0x01, 457 /// Use c-style ident structure. 458 OMP_IDENT_KMPC = 0x02, 459 /// Atomic reduction option for kmpc_reduce. 460 OMP_ATOMIC_REDUCE = 0x10, 461 /// Explicit 'barrier' directive. 462 OMP_IDENT_BARRIER_EXPL = 0x20, 463 /// Implicit barrier in code. 464 OMP_IDENT_BARRIER_IMPL = 0x40, 465 /// Implicit barrier in 'for' directive. 466 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 467 /// Implicit barrier in 'sections' directive. 468 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 469 /// Implicit barrier in 'single' directive. 470 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 471 /// Call of __kmp_for_static_init for static loop. 472 OMP_IDENT_WORK_LOOP = 0x200, 473 /// Call of __kmp_for_static_init for sections. 474 OMP_IDENT_WORK_SECTIONS = 0x400, 475 /// Call of __kmp_for_static_init for distribute. 476 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 478 }; 479 480 namespace { 481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 482 /// Values for bit flags for marking which requires clauses have been used. 483 enum OpenMPOffloadingRequiresDirFlags : int64_t { 484 /// flag undefined. 485 OMP_REQ_UNDEFINED = 0x000, 486 /// no requires clause present. 487 OMP_REQ_NONE = 0x001, 488 /// reverse_offload clause. 489 OMP_REQ_REVERSE_OFFLOAD = 0x002, 490 /// unified_address clause. 491 OMP_REQ_UNIFIED_ADDRESS = 0x004, 492 /// unified_shared_memory clause. 493 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 494 /// dynamic_allocators clause. 495 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 496 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 497 }; 498 499 enum OpenMPOffloadingReservedDeviceIDs { 500 /// Device ID if the device was not defined, runtime should get it 501 /// from environment variables in the spec. 502 OMP_DEVICEID_UNDEF = -1, 503 }; 504 } // anonymous namespace 505 506 /// Describes ident structure that describes a source location. 507 /// All descriptions are taken from 508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 509 /// Original structure: 510 /// typedef struct ident { 511 /// kmp_int32 reserved_1; /**< might be used in Fortran; 512 /// see above */ 513 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 514 /// KMP_IDENT_KMPC identifies this union 515 /// member */ 516 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 517 /// see above */ 518 ///#if USE_ITT_BUILD 519 /// /* but currently used for storing 520 /// region-specific ITT */ 521 /// /* contextual information. */ 522 ///#endif /* USE_ITT_BUILD */ 523 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 524 /// C++ */ 525 /// char const *psource; /**< String describing the source location. 526 /// The string is composed of semi-colon separated 527 // fields which describe the source file, 528 /// the function and a pair of line numbers that 529 /// delimit the construct. 530 /// */ 531 /// } ident_t; 532 enum IdentFieldIndex { 533 /// might be used in Fortran 534 IdentField_Reserved_1, 535 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 536 IdentField_Flags, 537 /// Not really used in Fortran any more 538 IdentField_Reserved_2, 539 /// Source[4] in Fortran, do not use for C++ 540 IdentField_Reserved_3, 541 /// String describing the source location. The string is composed of 542 /// semi-colon separated fields which describe the source file, the function 543 /// and a pair of line numbers that delimit the construct. 544 IdentField_PSource 545 }; 546 547 /// Schedule types for 'omp for' loops (these enumerators are taken from 548 /// the enum sched_type in kmp.h). 549 enum OpenMPSchedType { 550 /// Lower bound for default (unordered) versions. 551 OMP_sch_lower = 32, 552 OMP_sch_static_chunked = 33, 553 OMP_sch_static = 34, 554 OMP_sch_dynamic_chunked = 35, 555 OMP_sch_guided_chunked = 36, 556 OMP_sch_runtime = 37, 557 OMP_sch_auto = 38, 558 /// static with chunk adjustment (e.g., simd) 559 OMP_sch_static_balanced_chunked = 45, 560 /// Lower bound for 'ordered' versions. 561 OMP_ord_lower = 64, 562 OMP_ord_static_chunked = 65, 563 OMP_ord_static = 66, 564 OMP_ord_dynamic_chunked = 67, 565 OMP_ord_guided_chunked = 68, 566 OMP_ord_runtime = 69, 567 OMP_ord_auto = 70, 568 OMP_sch_default = OMP_sch_static, 569 /// dist_schedule types 570 OMP_dist_sch_static_chunked = 91, 571 OMP_dist_sch_static = 92, 572 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 573 /// Set if the monotonic schedule modifier was present. 574 OMP_sch_modifier_monotonic = (1 << 29), 575 /// Set if the nonmonotonic schedule modifier was present. 576 OMP_sch_modifier_nonmonotonic = (1 << 30), 577 }; 578 579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 580 /// region. 581 class CleanupTy final : public EHScopeStack::Cleanup { 582 PrePostActionTy *Action; 583 584 public: 585 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 586 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 587 if (!CGF.HaveInsertPoint()) 588 return; 589 Action->Exit(CGF); 590 } 591 }; 592 593 } // anonymous namespace 594 595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 596 CodeGenFunction::RunCleanupsScope Scope(CGF); 597 if (PrePostAction) { 598 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 599 Callback(CodeGen, CGF, *PrePostAction); 600 } else { 601 PrePostActionTy Action; 602 Callback(CodeGen, CGF, Action); 603 } 604 } 605 606 /// Check if the combiner is a call to UDR combiner and if it is so return the 607 /// UDR decl used for reduction. 608 static const OMPDeclareReductionDecl * 609 getReductionInit(const Expr *ReductionOp) { 610 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 611 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 612 if (const auto *DRE = 613 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 614 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 615 return DRD; 616 return nullptr; 617 } 618 619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 620 const OMPDeclareReductionDecl *DRD, 621 const Expr *InitOp, 622 Address Private, Address Original, 623 QualType Ty) { 624 if (DRD->getInitializer()) { 625 std::pair<llvm::Function *, llvm::Function *> Reduction = 626 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 627 const auto *CE = cast<CallExpr>(InitOp); 628 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 629 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 630 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 631 const auto *LHSDRE = 632 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 633 const auto *RHSDRE = 634 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 635 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 636 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 637 [=]() { return Private; }); 638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 639 [=]() { return Original; }); 640 (void)PrivateScope.Privatize(); 641 RValue Func = RValue::get(Reduction.second); 642 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 643 CGF.EmitIgnoredExpr(InitOp); 644 } else { 645 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 646 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 647 auto *GV = new llvm::GlobalVariable( 648 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 649 llvm::GlobalValue::PrivateLinkage, Init, Name); 650 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 651 RValue InitRVal; 652 switch (CGF.getEvaluationKind(Ty)) { 653 case TEK_Scalar: 654 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 655 break; 656 case TEK_Complex: 657 InitRVal = 658 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 659 break; 660 case TEK_Aggregate: { 661 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 662 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 663 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 664 /*IsInitializer=*/false); 665 return; 666 } 667 } 668 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 669 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 670 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 671 /*IsInitializer=*/false); 672 } 673 } 674 675 /// Emit initialization of arrays of complex types. 676 /// \param DestAddr Address of the array. 677 /// \param Type Type of array. 678 /// \param Init Initial expression of array. 679 /// \param SrcAddr Address of the original array. 680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 681 QualType Type, bool EmitDeclareReductionInit, 682 const Expr *Init, 683 const OMPDeclareReductionDecl *DRD, 684 Address SrcAddr = Address::invalid()) { 685 // Perform element-by-element initialization. 686 QualType ElementTy; 687 688 // Drill down to the base element type on both arrays. 689 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 690 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 691 DestAddr = 692 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 693 if (DRD) 694 SrcAddr = 695 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 696 697 llvm::Value *SrcBegin = nullptr; 698 if (DRD) 699 SrcBegin = SrcAddr.getPointer(); 700 llvm::Value *DestBegin = DestAddr.getPointer(); 701 // Cast from pointer to array type to pointer to single element. 702 llvm::Value *DestEnd = 703 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 704 // The basic structure here is a while-do loop. 705 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 706 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 707 llvm::Value *IsEmpty = 708 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 709 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 710 711 // Enter the loop body, making that address the current address. 712 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 713 CGF.EmitBlock(BodyBB); 714 715 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 716 717 llvm::PHINode *SrcElementPHI = nullptr; 718 Address SrcElementCurrent = Address::invalid(); 719 if (DRD) { 720 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 721 "omp.arraycpy.srcElementPast"); 722 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 723 SrcElementCurrent = 724 Address(SrcElementPHI, 725 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 726 } 727 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 728 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 729 DestElementPHI->addIncoming(DestBegin, EntryBB); 730 Address DestElementCurrent = 731 Address(DestElementPHI, 732 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 733 734 // Emit copy. 735 { 736 CodeGenFunction::RunCleanupsScope InitScope(CGF); 737 if (EmitDeclareReductionInit) { 738 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 739 SrcElementCurrent, ElementTy); 740 } else 741 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 742 /*IsInitializer=*/false); 743 } 744 745 if (DRD) { 746 // Shift the address forward by one element. 747 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 748 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 749 "omp.arraycpy.dest.element"); 750 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 751 } 752 753 // Shift the address forward by one element. 754 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 755 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 756 "omp.arraycpy.dest.element"); 757 // Check whether we've reached the end. 758 llvm::Value *Done = 759 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 760 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 761 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 762 763 // Done. 764 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 765 } 766 767 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 768 return CGF.EmitOMPSharedLValue(E); 769 } 770 771 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 772 const Expr *E) { 773 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 774 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 775 return LValue(); 776 } 777 778 void ReductionCodeGen::emitAggregateInitialization( 779 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 780 const OMPDeclareReductionDecl *DRD) { 781 // Emit VarDecl with copy init for arrays. 782 // Get the address of the original variable captured in current 783 // captured region. 784 const auto *PrivateVD = 785 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 786 bool EmitDeclareReductionInit = 787 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 788 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 789 EmitDeclareReductionInit, 790 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 791 : PrivateVD->getInit(), 792 DRD, SharedLVal.getAddress(CGF)); 793 } 794 795 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 796 ArrayRef<const Expr *> Origs, 797 ArrayRef<const Expr *> Privates, 798 ArrayRef<const Expr *> ReductionOps) { 799 ClausesData.reserve(Shareds.size()); 800 SharedAddresses.reserve(Shareds.size()); 801 Sizes.reserve(Shareds.size()); 802 BaseDecls.reserve(Shareds.size()); 803 const auto *IOrig = Origs.begin(); 804 const auto *IPriv = Privates.begin(); 805 const auto *IRed = ReductionOps.begin(); 806 for (const Expr *Ref : Shareds) { 807 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 808 std::advance(IOrig, 1); 809 std::advance(IPriv, 1); 810 std::advance(IRed, 1); 811 } 812 } 813 814 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 815 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 816 "Number of generated lvalues must be exactly N."); 817 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 818 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 819 SharedAddresses.emplace_back(First, Second); 820 if (ClausesData[N].Shared == ClausesData[N].Ref) { 821 OrigAddresses.emplace_back(First, Second); 822 } else { 823 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 824 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 825 OrigAddresses.emplace_back(First, Second); 826 } 827 } 828 829 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 830 const auto *PrivateVD = 831 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 832 QualType PrivateType = PrivateVD->getType(); 833 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 834 if (!PrivateType->isVariablyModifiedType()) { 835 Sizes.emplace_back( 836 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 837 nullptr); 838 return; 839 } 840 llvm::Value *Size; 841 llvm::Value *SizeInChars; 842 auto *ElemType = 843 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 844 ->getElementType(); 845 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 846 if (AsArraySection) { 847 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 848 OrigAddresses[N].first.getPointer(CGF)); 849 Size = CGF.Builder.CreateNUWAdd( 850 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 851 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 852 } else { 853 SizeInChars = 854 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 855 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 856 } 857 Sizes.emplace_back(SizeInChars, Size); 858 CodeGenFunction::OpaqueValueMapping OpaqueMap( 859 CGF, 860 cast<OpaqueValueExpr>( 861 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 862 RValue::get(Size)); 863 CGF.EmitVariablyModifiedType(PrivateType); 864 } 865 866 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 867 llvm::Value *Size) { 868 const auto *PrivateVD = 869 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 870 QualType PrivateType = PrivateVD->getType(); 871 if (!PrivateType->isVariablyModifiedType()) { 872 assert(!Size && !Sizes[N].second && 873 "Size should be nullptr for non-variably modified reduction " 874 "items."); 875 return; 876 } 877 CodeGenFunction::OpaqueValueMapping OpaqueMap( 878 CGF, 879 cast<OpaqueValueExpr>( 880 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 881 RValue::get(Size)); 882 CGF.EmitVariablyModifiedType(PrivateType); 883 } 884 885 void ReductionCodeGen::emitInitialization( 886 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 887 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 888 assert(SharedAddresses.size() > N && "No variable was generated"); 889 const auto *PrivateVD = 890 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 891 const OMPDeclareReductionDecl *DRD = 892 getReductionInit(ClausesData[N].ReductionOp); 893 QualType PrivateType = PrivateVD->getType(); 894 PrivateAddr = CGF.Builder.CreateElementBitCast( 895 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 896 QualType SharedType = SharedAddresses[N].first.getType(); 897 SharedLVal = CGF.MakeAddrLValue( 898 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 899 CGF.ConvertTypeForMem(SharedType)), 900 SharedType, SharedAddresses[N].first.getBaseInfo(), 901 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 902 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 903 if (DRD && DRD->getInitializer()) 904 (void)DefaultInit(CGF); 905 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 906 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 907 (void)DefaultInit(CGF); 908 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 909 PrivateAddr, SharedLVal.getAddress(CGF), 910 SharedLVal.getType()); 911 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 912 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 913 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 914 PrivateVD->getType().getQualifiers(), 915 /*IsInitializer=*/false); 916 } 917 } 918 919 bool ReductionCodeGen::needCleanups(unsigned N) { 920 const auto *PrivateVD = 921 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 922 QualType PrivateType = PrivateVD->getType(); 923 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 924 return DTorKind != QualType::DK_none; 925 } 926 927 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 928 Address PrivateAddr) { 929 const auto *PrivateVD = 930 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 931 QualType PrivateType = PrivateVD->getType(); 932 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 933 if (needCleanups(N)) { 934 PrivateAddr = CGF.Builder.CreateElementBitCast( 935 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 936 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 937 } 938 } 939 940 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 941 LValue BaseLV) { 942 BaseTy = BaseTy.getNonReferenceType(); 943 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 944 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 945 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 946 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 947 } else { 948 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 949 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 950 } 951 BaseTy = BaseTy->getPointeeType(); 952 } 953 return CGF.MakeAddrLValue( 954 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 955 CGF.ConvertTypeForMem(ElTy)), 956 BaseLV.getType(), BaseLV.getBaseInfo(), 957 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 958 } 959 960 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 961 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 962 llvm::Value *Addr) { 963 Address Tmp = Address::invalid(); 964 Address TopTmp = Address::invalid(); 965 Address MostTopTmp = Address::invalid(); 966 BaseTy = BaseTy.getNonReferenceType(); 967 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 968 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 969 Tmp = CGF.CreateMemTemp(BaseTy); 970 if (TopTmp.isValid()) 971 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 972 else 973 MostTopTmp = Tmp; 974 TopTmp = Tmp; 975 BaseTy = BaseTy->getPointeeType(); 976 } 977 llvm::Type *Ty = BaseLVType; 978 if (Tmp.isValid()) 979 Ty = Tmp.getElementType(); 980 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 981 if (Tmp.isValid()) { 982 CGF.Builder.CreateStore(Addr, Tmp); 983 return MostTopTmp; 984 } 985 return Address(Addr, BaseLVAlignment); 986 } 987 988 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 989 const VarDecl *OrigVD = nullptr; 990 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 991 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 992 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 993 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 994 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 995 Base = TempASE->getBase()->IgnoreParenImpCasts(); 996 DE = cast<DeclRefExpr>(Base); 997 OrigVD = cast<VarDecl>(DE->getDecl()); 998 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 999 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1000 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1001 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1002 DE = cast<DeclRefExpr>(Base); 1003 OrigVD = cast<VarDecl>(DE->getDecl()); 1004 } 1005 return OrigVD; 1006 } 1007 1008 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1009 Address PrivateAddr) { 1010 const DeclRefExpr *DE; 1011 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1012 BaseDecls.emplace_back(OrigVD); 1013 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1014 LValue BaseLValue = 1015 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1016 OriginalBaseLValue); 1017 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1018 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1019 BaseLValue.getPointer(CGF), SharedAddr.getPointer()); 1020 llvm::Value *PrivatePointer = 1021 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1022 PrivateAddr.getPointer(), SharedAddr.getType()); 1023 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1024 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1025 return castToBase(CGF, OrigVD->getType(), 1026 SharedAddresses[N].first.getType(), 1027 OriginalBaseLValue.getAddress(CGF).getType(), 1028 OriginalBaseLValue.getAlignment(), Ptr); 1029 } 1030 BaseDecls.emplace_back( 1031 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1032 return PrivateAddr; 1033 } 1034 1035 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1036 const OMPDeclareReductionDecl *DRD = 1037 getReductionInit(ClausesData[N].ReductionOp); 1038 return DRD && DRD->getInitializer(); 1039 } 1040 1041 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1042 return CGF.EmitLoadOfPointerLValue( 1043 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1044 getThreadIDVariable()->getType()->castAs<PointerType>()); 1045 } 1046 1047 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1048 if (!CGF.HaveInsertPoint()) 1049 return; 1050 // 1.2.2 OpenMP Language Terminology 1051 // Structured block - An executable statement with a single entry at the 1052 // top and a single exit at the bottom. 1053 // The point of exit cannot be a branch out of the structured block. 1054 // longjmp() and throw() must not violate the entry/exit criteria. 1055 CGF.EHStack.pushTerminate(); 1056 if (S) 1057 CGF.incrementProfileCounter(S); 1058 CodeGen(CGF); 1059 CGF.EHStack.popTerminate(); 1060 } 1061 1062 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1063 CodeGenFunction &CGF) { 1064 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1065 getThreadIDVariable()->getType(), 1066 AlignmentSource::Decl); 1067 } 1068 1069 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1070 QualType FieldTy) { 1071 auto *Field = FieldDecl::Create( 1072 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1073 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1074 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1075 Field->setAccess(AS_public); 1076 DC->addDecl(Field); 1077 return Field; 1078 } 1079 1080 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1081 StringRef Separator) 1082 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1083 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1084 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1085 1086 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1087 OMPBuilder.initialize(); 1088 loadOffloadInfoMetadata(); 1089 } 1090 1091 void CGOpenMPRuntime::clear() { 1092 InternalVars.clear(); 1093 // Clean non-target variable declarations possibly used only in debug info. 1094 for (const auto &Data : EmittedNonTargetVariables) { 1095 if (!Data.getValue().pointsToAliveValue()) 1096 continue; 1097 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1098 if (!GV) 1099 continue; 1100 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1101 continue; 1102 GV->eraseFromParent(); 1103 } 1104 } 1105 1106 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1107 SmallString<128> Buffer; 1108 llvm::raw_svector_ostream OS(Buffer); 1109 StringRef Sep = FirstSeparator; 1110 for (StringRef Part : Parts) { 1111 OS << Sep << Part; 1112 Sep = Separator; 1113 } 1114 return std::string(OS.str()); 1115 } 1116 1117 static llvm::Function * 1118 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1119 const Expr *CombinerInitializer, const VarDecl *In, 1120 const VarDecl *Out, bool IsCombiner) { 1121 // void .omp_combiner.(Ty *in, Ty *out); 1122 ASTContext &C = CGM.getContext(); 1123 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1124 FunctionArgList Args; 1125 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1126 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1127 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1128 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1129 Args.push_back(&OmpOutParm); 1130 Args.push_back(&OmpInParm); 1131 const CGFunctionInfo &FnInfo = 1132 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1133 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1134 std::string Name = CGM.getOpenMPRuntime().getName( 1135 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1136 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1137 Name, &CGM.getModule()); 1138 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1139 if (CGM.getLangOpts().Optimize) { 1140 Fn->removeFnAttr(llvm::Attribute::NoInline); 1141 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1142 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1143 } 1144 CodeGenFunction CGF(CGM); 1145 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1146 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1147 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1148 Out->getLocation()); 1149 CodeGenFunction::OMPPrivateScope Scope(CGF); 1150 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1151 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1152 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1153 .getAddress(CGF); 1154 }); 1155 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1156 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1157 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1158 .getAddress(CGF); 1159 }); 1160 (void)Scope.Privatize(); 1161 if (!IsCombiner && Out->hasInit() && 1162 !CGF.isTrivialInitializer(Out->getInit())) { 1163 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1164 Out->getType().getQualifiers(), 1165 /*IsInitializer=*/true); 1166 } 1167 if (CombinerInitializer) 1168 CGF.EmitIgnoredExpr(CombinerInitializer); 1169 Scope.ForceCleanup(); 1170 CGF.FinishFunction(); 1171 return Fn; 1172 } 1173 1174 void CGOpenMPRuntime::emitUserDefinedReduction( 1175 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1176 if (UDRMap.count(D) > 0) 1177 return; 1178 llvm::Function *Combiner = emitCombinerOrInitializer( 1179 CGM, D->getType(), D->getCombiner(), 1180 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1181 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1182 /*IsCombiner=*/true); 1183 llvm::Function *Initializer = nullptr; 1184 if (const Expr *Init = D->getInitializer()) { 1185 Initializer = emitCombinerOrInitializer( 1186 CGM, D->getType(), 1187 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1188 : nullptr, 1189 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1190 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1191 /*IsCombiner=*/false); 1192 } 1193 UDRMap.try_emplace(D, Combiner, Initializer); 1194 if (CGF) { 1195 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1196 Decls.second.push_back(D); 1197 } 1198 } 1199 1200 std::pair<llvm::Function *, llvm::Function *> 1201 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1202 auto I = UDRMap.find(D); 1203 if (I != UDRMap.end()) 1204 return I->second; 1205 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1206 return UDRMap.lookup(D); 1207 } 1208 1209 namespace { 1210 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1211 // Builder if one is present. 1212 struct PushAndPopStackRAII { 1213 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1214 bool HasCancel, llvm::omp::Directive Kind) 1215 : OMPBuilder(OMPBuilder) { 1216 if (!OMPBuilder) 1217 return; 1218 1219 // The following callback is the crucial part of clangs cleanup process. 1220 // 1221 // NOTE: 1222 // Once the OpenMPIRBuilder is used to create parallel regions (and 1223 // similar), the cancellation destination (Dest below) is determined via 1224 // IP. That means if we have variables to finalize we split the block at IP, 1225 // use the new block (=BB) as destination to build a JumpDest (via 1226 // getJumpDestInCurrentScope(BB)) which then is fed to 1227 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1228 // to push & pop an FinalizationInfo object. 1229 // The FiniCB will still be needed but at the point where the 1230 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1231 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1232 assert(IP.getBlock()->end() == IP.getPoint() && 1233 "Clang CG should cause non-terminated block!"); 1234 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1235 CGF.Builder.restoreIP(IP); 1236 CodeGenFunction::JumpDest Dest = 1237 CGF.getOMPCancelDestination(OMPD_parallel); 1238 CGF.EmitBranchThroughCleanup(Dest); 1239 }; 1240 1241 // TODO: Remove this once we emit parallel regions through the 1242 // OpenMPIRBuilder as it can do this setup internally. 1243 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1244 OMPBuilder->pushFinalizationCB(std::move(FI)); 1245 } 1246 ~PushAndPopStackRAII() { 1247 if (OMPBuilder) 1248 OMPBuilder->popFinalizationCB(); 1249 } 1250 llvm::OpenMPIRBuilder *OMPBuilder; 1251 }; 1252 } // namespace 1253 1254 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1255 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1256 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1257 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1258 assert(ThreadIDVar->getType()->isPointerType() && 1259 "thread id variable must be of type kmp_int32 *"); 1260 CodeGenFunction CGF(CGM, true); 1261 bool HasCancel = false; 1262 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1263 HasCancel = OPD->hasCancel(); 1264 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1265 HasCancel = OPD->hasCancel(); 1266 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1267 HasCancel = OPSD->hasCancel(); 1268 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1269 HasCancel = OPFD->hasCancel(); 1270 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1273 HasCancel = OPFD->hasCancel(); 1274 else if (const auto *OPFD = 1275 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1276 HasCancel = OPFD->hasCancel(); 1277 else if (const auto *OPFD = 1278 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1279 HasCancel = OPFD->hasCancel(); 1280 1281 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1282 // parallel region to make cancellation barriers work properly. 1283 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1284 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1285 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1286 HasCancel, OutlinedHelperName); 1287 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1288 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1289 } 1290 1291 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1292 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1294 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1295 return emitParallelOrTeamsOutlinedFunction( 1296 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1297 } 1298 1299 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1300 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1301 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1302 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1303 return emitParallelOrTeamsOutlinedFunction( 1304 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1305 } 1306 1307 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1308 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1309 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1310 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1311 bool Tied, unsigned &NumberOfParts) { 1312 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1313 PrePostActionTy &) { 1314 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1315 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1316 llvm::Value *TaskArgs[] = { 1317 UpLoc, ThreadID, 1318 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1319 TaskTVar->getType()->castAs<PointerType>()) 1320 .getPointer(CGF)}; 1321 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1322 CGM.getModule(), OMPRTL___kmpc_omp_task), 1323 TaskArgs); 1324 }; 1325 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1326 UntiedCodeGen); 1327 CodeGen.setAction(Action); 1328 assert(!ThreadIDVar->getType()->isPointerType() && 1329 "thread id variable must be of type kmp_int32 for tasks"); 1330 const OpenMPDirectiveKind Region = 1331 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1332 : OMPD_task; 1333 const CapturedStmt *CS = D.getCapturedStmt(Region); 1334 bool HasCancel = false; 1335 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1336 HasCancel = TD->hasCancel(); 1337 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1338 HasCancel = TD->hasCancel(); 1339 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1340 HasCancel = TD->hasCancel(); 1341 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1342 HasCancel = TD->hasCancel(); 1343 1344 CodeGenFunction CGF(CGM, true); 1345 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1346 InnermostKind, HasCancel, Action); 1347 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1348 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1349 if (!Tied) 1350 NumberOfParts = Action.getNumberOfParts(); 1351 return Res; 1352 } 1353 1354 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1355 const RecordDecl *RD, const CGRecordLayout &RL, 1356 ArrayRef<llvm::Constant *> Data) { 1357 llvm::StructType *StructTy = RL.getLLVMType(); 1358 unsigned PrevIdx = 0; 1359 ConstantInitBuilder CIBuilder(CGM); 1360 auto DI = Data.begin(); 1361 for (const FieldDecl *FD : RD->fields()) { 1362 unsigned Idx = RL.getLLVMFieldNo(FD); 1363 // Fill the alignment. 1364 for (unsigned I = PrevIdx; I < Idx; ++I) 1365 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1366 PrevIdx = Idx + 1; 1367 Fields.add(*DI); 1368 ++DI; 1369 } 1370 } 1371 1372 template <class... As> 1373 static llvm::GlobalVariable * 1374 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1375 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1376 As &&... Args) { 1377 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1378 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1379 ConstantInitBuilder CIBuilder(CGM); 1380 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1381 buildStructValue(Fields, CGM, RD, RL, Data); 1382 return Fields.finishAndCreateGlobal( 1383 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1384 std::forward<As>(Args)...); 1385 } 1386 1387 template <typename T> 1388 static void 1389 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1390 ArrayRef<llvm::Constant *> Data, 1391 T &Parent) { 1392 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1393 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1394 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1395 buildStructValue(Fields, CGM, RD, RL, Data); 1396 Fields.finishAndAddTo(Parent); 1397 } 1398 1399 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1400 bool AtCurrentPoint) { 1401 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1402 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1403 1404 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1405 if (AtCurrentPoint) { 1406 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1407 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1408 } else { 1409 Elem.second.ServiceInsertPt = 1410 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1411 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1412 } 1413 } 1414 1415 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1416 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1417 if (Elem.second.ServiceInsertPt) { 1418 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1419 Elem.second.ServiceInsertPt = nullptr; 1420 Ptr->eraseFromParent(); 1421 } 1422 } 1423 1424 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1425 SourceLocation Loc, 1426 SmallString<128> &Buffer) { 1427 llvm::raw_svector_ostream OS(Buffer); 1428 // Build debug location 1429 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1430 OS << ";" << PLoc.getFilename() << ";"; 1431 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1432 OS << FD->getQualifiedNameAsString(); 1433 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1434 return OS.str(); 1435 } 1436 1437 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1438 SourceLocation Loc, 1439 unsigned Flags) { 1440 llvm::Constant *SrcLocStr; 1441 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1442 Loc.isInvalid()) { 1443 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1444 } else { 1445 std::string FunctionName = ""; 1446 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1447 FunctionName = FD->getQualifiedNameAsString(); 1448 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1449 const char *FileName = PLoc.getFilename(); 1450 unsigned Line = PLoc.getLine(); 1451 unsigned Column = PLoc.getColumn(); 1452 SrcLocStr = 1453 OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column); 1454 } 1455 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1456 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1457 Reserved2Flags); 1458 } 1459 1460 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1461 SourceLocation Loc) { 1462 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1463 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1464 // the clang invariants used below might be broken. 1465 if (CGM.getLangOpts().OpenMPIRBuilder) { 1466 SmallString<128> Buffer; 1467 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1468 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1469 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1470 return OMPBuilder.getOrCreateThreadID( 1471 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1472 } 1473 1474 llvm::Value *ThreadID = nullptr; 1475 // Check whether we've already cached a load of the thread id in this 1476 // function. 1477 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1478 if (I != OpenMPLocThreadIDMap.end()) { 1479 ThreadID = I->second.ThreadID; 1480 if (ThreadID != nullptr) 1481 return ThreadID; 1482 } 1483 // If exceptions are enabled, do not use parameter to avoid possible crash. 1484 if (auto *OMPRegionInfo = 1485 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1486 if (OMPRegionInfo->getThreadIDVariable()) { 1487 // Check if this an outlined function with thread id passed as argument. 1488 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1489 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1490 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1491 !CGF.getLangOpts().CXXExceptions || 1492 CGF.Builder.GetInsertBlock() == TopBlock || 1493 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1494 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1495 TopBlock || 1496 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1497 CGF.Builder.GetInsertBlock()) { 1498 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1499 // If value loaded in entry block, cache it and use it everywhere in 1500 // function. 1501 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1502 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1503 Elem.second.ThreadID = ThreadID; 1504 } 1505 return ThreadID; 1506 } 1507 } 1508 } 1509 1510 // This is not an outlined function region - need to call __kmpc_int32 1511 // kmpc_global_thread_num(ident_t *loc). 1512 // Generate thread id value and cache this value for use across the 1513 // function. 1514 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1515 if (!Elem.second.ServiceInsertPt) 1516 setLocThreadIdInsertPt(CGF); 1517 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1518 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1519 llvm::CallInst *Call = CGF.Builder.CreateCall( 1520 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1521 OMPRTL___kmpc_global_thread_num), 1522 emitUpdateLocation(CGF, Loc)); 1523 Call->setCallingConv(CGF.getRuntimeCC()); 1524 Elem.second.ThreadID = Call; 1525 return Call; 1526 } 1527 1528 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1529 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1530 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1531 clearLocThreadIdInsertPt(CGF); 1532 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1533 } 1534 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1535 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1536 UDRMap.erase(D); 1537 FunctionUDRMap.erase(CGF.CurFn); 1538 } 1539 auto I = FunctionUDMMap.find(CGF.CurFn); 1540 if (I != FunctionUDMMap.end()) { 1541 for(const auto *D : I->second) 1542 UDMMap.erase(D); 1543 FunctionUDMMap.erase(I); 1544 } 1545 LastprivateConditionalToTypes.erase(CGF.CurFn); 1546 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1547 } 1548 1549 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1550 return OMPBuilder.IdentPtr; 1551 } 1552 1553 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1554 if (!Kmpc_MicroTy) { 1555 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1556 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1557 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1558 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1559 } 1560 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1561 } 1562 1563 llvm::FunctionCallee 1564 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1565 bool IsGPUDistribute) { 1566 assert((IVSize == 32 || IVSize == 64) && 1567 "IV size is not compatible with the omp runtime"); 1568 StringRef Name; 1569 if (IsGPUDistribute) 1570 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1571 : "__kmpc_distribute_static_init_4u") 1572 : (IVSigned ? "__kmpc_distribute_static_init_8" 1573 : "__kmpc_distribute_static_init_8u"); 1574 else 1575 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1576 : "__kmpc_for_static_init_4u") 1577 : (IVSigned ? "__kmpc_for_static_init_8" 1578 : "__kmpc_for_static_init_8u"); 1579 1580 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1581 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1582 llvm::Type *TypeParams[] = { 1583 getIdentTyPointerTy(), // loc 1584 CGM.Int32Ty, // tid 1585 CGM.Int32Ty, // schedtype 1586 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1587 PtrTy, // p_lower 1588 PtrTy, // p_upper 1589 PtrTy, // p_stride 1590 ITy, // incr 1591 ITy // chunk 1592 }; 1593 auto *FnTy = 1594 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1595 return CGM.CreateRuntimeFunction(FnTy, Name); 1596 } 1597 1598 llvm::FunctionCallee 1599 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1600 assert((IVSize == 32 || IVSize == 64) && 1601 "IV size is not compatible with the omp runtime"); 1602 StringRef Name = 1603 IVSize == 32 1604 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1605 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1606 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1607 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1608 CGM.Int32Ty, // tid 1609 CGM.Int32Ty, // schedtype 1610 ITy, // lower 1611 ITy, // upper 1612 ITy, // stride 1613 ITy // chunk 1614 }; 1615 auto *FnTy = 1616 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1617 return CGM.CreateRuntimeFunction(FnTy, Name); 1618 } 1619 1620 llvm::FunctionCallee 1621 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1622 assert((IVSize == 32 || IVSize == 64) && 1623 "IV size is not compatible with the omp runtime"); 1624 StringRef Name = 1625 IVSize == 32 1626 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1627 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1628 llvm::Type *TypeParams[] = { 1629 getIdentTyPointerTy(), // loc 1630 CGM.Int32Ty, // tid 1631 }; 1632 auto *FnTy = 1633 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1634 return CGM.CreateRuntimeFunction(FnTy, Name); 1635 } 1636 1637 llvm::FunctionCallee 1638 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1639 assert((IVSize == 32 || IVSize == 64) && 1640 "IV size is not compatible with the omp runtime"); 1641 StringRef Name = 1642 IVSize == 32 1643 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1644 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1645 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1646 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1647 llvm::Type *TypeParams[] = { 1648 getIdentTyPointerTy(), // loc 1649 CGM.Int32Ty, // tid 1650 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1651 PtrTy, // p_lower 1652 PtrTy, // p_upper 1653 PtrTy // p_stride 1654 }; 1655 auto *FnTy = 1656 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1657 return CGM.CreateRuntimeFunction(FnTy, Name); 1658 } 1659 1660 /// Obtain information that uniquely identifies a target entry. This 1661 /// consists of the file and device IDs as well as line number associated with 1662 /// the relevant entry source location. 1663 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1664 unsigned &DeviceID, unsigned &FileID, 1665 unsigned &LineNum) { 1666 SourceManager &SM = C.getSourceManager(); 1667 1668 // The loc should be always valid and have a file ID (the user cannot use 1669 // #pragma directives in macros) 1670 1671 assert(Loc.isValid() && "Source location is expected to be always valid."); 1672 1673 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1674 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1675 1676 llvm::sys::fs::UniqueID ID; 1677 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1678 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1679 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1680 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1681 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1682 << PLoc.getFilename() << EC.message(); 1683 } 1684 1685 DeviceID = ID.getDevice(); 1686 FileID = ID.getFile(); 1687 LineNum = PLoc.getLine(); 1688 } 1689 1690 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1691 if (CGM.getLangOpts().OpenMPSimd) 1692 return Address::invalid(); 1693 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1694 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1695 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1696 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1697 HasRequiresUnifiedSharedMemory))) { 1698 SmallString<64> PtrName; 1699 { 1700 llvm::raw_svector_ostream OS(PtrName); 1701 OS << CGM.getMangledName(GlobalDecl(VD)); 1702 if (!VD->isExternallyVisible()) { 1703 unsigned DeviceID, FileID, Line; 1704 getTargetEntryUniqueInfo(CGM.getContext(), 1705 VD->getCanonicalDecl()->getBeginLoc(), 1706 DeviceID, FileID, Line); 1707 OS << llvm::format("_%x", FileID); 1708 } 1709 OS << "_decl_tgt_ref_ptr"; 1710 } 1711 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1712 if (!Ptr) { 1713 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1714 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1715 PtrName); 1716 1717 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1718 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1719 1720 if (!CGM.getLangOpts().OpenMPIsDevice) 1721 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1722 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1723 } 1724 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1725 } 1726 return Address::invalid(); 1727 } 1728 1729 llvm::Constant * 1730 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1731 assert(!CGM.getLangOpts().OpenMPUseTLS || 1732 !CGM.getContext().getTargetInfo().isTLSSupported()); 1733 // Lookup the entry, lazily creating it if necessary. 1734 std::string Suffix = getName({"cache", ""}); 1735 return getOrCreateInternalVariable( 1736 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1737 } 1738 1739 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1740 const VarDecl *VD, 1741 Address VDAddr, 1742 SourceLocation Loc) { 1743 if (CGM.getLangOpts().OpenMPUseTLS && 1744 CGM.getContext().getTargetInfo().isTLSSupported()) 1745 return VDAddr; 1746 1747 llvm::Type *VarTy = VDAddr.getElementType(); 1748 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1749 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1750 CGM.Int8PtrTy), 1751 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1752 getOrCreateThreadPrivateCache(VD)}; 1753 return Address(CGF.EmitRuntimeCall( 1754 OMPBuilder.getOrCreateRuntimeFunction( 1755 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1756 Args), 1757 VDAddr.getAlignment()); 1758 } 1759 1760 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1761 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1762 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1763 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1764 // library. 1765 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1766 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1767 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1768 OMPLoc); 1769 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1770 // to register constructor/destructor for variable. 1771 llvm::Value *Args[] = { 1772 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1773 Ctor, CopyCtor, Dtor}; 1774 CGF.EmitRuntimeCall( 1775 OMPBuilder.getOrCreateRuntimeFunction( 1776 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1777 Args); 1778 } 1779 1780 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1781 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1782 bool PerformInit, CodeGenFunction *CGF) { 1783 if (CGM.getLangOpts().OpenMPUseTLS && 1784 CGM.getContext().getTargetInfo().isTLSSupported()) 1785 return nullptr; 1786 1787 VD = VD->getDefinition(CGM.getContext()); 1788 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1789 QualType ASTTy = VD->getType(); 1790 1791 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1792 const Expr *Init = VD->getAnyInitializer(); 1793 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1794 // Generate function that re-emits the declaration's initializer into the 1795 // threadprivate copy of the variable VD 1796 CodeGenFunction CtorCGF(CGM); 1797 FunctionArgList Args; 1798 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1799 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1800 ImplicitParamDecl::Other); 1801 Args.push_back(&Dst); 1802 1803 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1804 CGM.getContext().VoidPtrTy, Args); 1805 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1806 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1807 llvm::Function *Fn = 1808 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1809 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1810 Args, Loc, Loc); 1811 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1812 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1813 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1814 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1815 Arg = CtorCGF.Builder.CreateElementBitCast( 1816 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1817 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1818 /*IsInitializer=*/true); 1819 ArgVal = CtorCGF.EmitLoadOfScalar( 1820 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1821 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1822 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1823 CtorCGF.FinishFunction(); 1824 Ctor = Fn; 1825 } 1826 if (VD->getType().isDestructedType() != QualType::DK_none) { 1827 // Generate function that emits destructor call for the threadprivate copy 1828 // of the variable VD 1829 CodeGenFunction DtorCGF(CGM); 1830 FunctionArgList Args; 1831 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1832 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1833 ImplicitParamDecl::Other); 1834 Args.push_back(&Dst); 1835 1836 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1837 CGM.getContext().VoidTy, Args); 1838 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1839 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1840 llvm::Function *Fn = 1841 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1842 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1843 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1844 Loc, Loc); 1845 // Create a scope with an artificial location for the body of this function. 1846 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1847 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1848 DtorCGF.GetAddrOfLocalVar(&Dst), 1849 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1850 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1851 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1852 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1853 DtorCGF.FinishFunction(); 1854 Dtor = Fn; 1855 } 1856 // Do not emit init function if it is not required. 1857 if (!Ctor && !Dtor) 1858 return nullptr; 1859 1860 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1861 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1862 /*isVarArg=*/false) 1863 ->getPointerTo(); 1864 // Copying constructor for the threadprivate variable. 1865 // Must be NULL - reserved by runtime, but currently it requires that this 1866 // parameter is always NULL. Otherwise it fires assertion. 1867 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1868 if (Ctor == nullptr) { 1869 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1870 /*isVarArg=*/false) 1871 ->getPointerTo(); 1872 Ctor = llvm::Constant::getNullValue(CtorTy); 1873 } 1874 if (Dtor == nullptr) { 1875 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1876 /*isVarArg=*/false) 1877 ->getPointerTo(); 1878 Dtor = llvm::Constant::getNullValue(DtorTy); 1879 } 1880 if (!CGF) { 1881 auto *InitFunctionTy = 1882 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1883 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1884 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1885 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1886 CodeGenFunction InitCGF(CGM); 1887 FunctionArgList ArgList; 1888 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1889 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1890 Loc, Loc); 1891 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1892 InitCGF.FinishFunction(); 1893 return InitFunction; 1894 } 1895 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1896 } 1897 return nullptr; 1898 } 1899 1900 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1901 llvm::GlobalVariable *Addr, 1902 bool PerformInit) { 1903 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1904 !CGM.getLangOpts().OpenMPIsDevice) 1905 return false; 1906 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1907 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1908 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1909 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1910 HasRequiresUnifiedSharedMemory)) 1911 return CGM.getLangOpts().OpenMPIsDevice; 1912 VD = VD->getDefinition(CGM.getContext()); 1913 assert(VD && "Unknown VarDecl"); 1914 1915 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1916 return CGM.getLangOpts().OpenMPIsDevice; 1917 1918 QualType ASTTy = VD->getType(); 1919 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1920 1921 // Produce the unique prefix to identify the new target regions. We use 1922 // the source location of the variable declaration which we know to not 1923 // conflict with any target region. 1924 unsigned DeviceID; 1925 unsigned FileID; 1926 unsigned Line; 1927 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1928 SmallString<128> Buffer, Out; 1929 { 1930 llvm::raw_svector_ostream OS(Buffer); 1931 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1932 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1933 } 1934 1935 const Expr *Init = VD->getAnyInitializer(); 1936 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1937 llvm::Constant *Ctor; 1938 llvm::Constant *ID; 1939 if (CGM.getLangOpts().OpenMPIsDevice) { 1940 // Generate function that re-emits the declaration's initializer into 1941 // the threadprivate copy of the variable VD 1942 CodeGenFunction CtorCGF(CGM); 1943 1944 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1945 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1946 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1947 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1948 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1949 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1950 FunctionArgList(), Loc, Loc); 1951 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1952 CtorCGF.EmitAnyExprToMem(Init, 1953 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1954 Init->getType().getQualifiers(), 1955 /*IsInitializer=*/true); 1956 CtorCGF.FinishFunction(); 1957 Ctor = Fn; 1958 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1959 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1960 } else { 1961 Ctor = new llvm::GlobalVariable( 1962 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1963 llvm::GlobalValue::PrivateLinkage, 1964 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1965 ID = Ctor; 1966 } 1967 1968 // Register the information for the entry associated with the constructor. 1969 Out.clear(); 1970 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1971 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1972 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1973 } 1974 if (VD->getType().isDestructedType() != QualType::DK_none) { 1975 llvm::Constant *Dtor; 1976 llvm::Constant *ID; 1977 if (CGM.getLangOpts().OpenMPIsDevice) { 1978 // Generate function that emits destructor call for the threadprivate 1979 // copy of the variable VD 1980 CodeGenFunction DtorCGF(CGM); 1981 1982 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1983 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1984 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1985 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1986 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1987 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1988 FunctionArgList(), Loc, Loc); 1989 // Create a scope with an artificial location for the body of this 1990 // function. 1991 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1992 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1993 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1994 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1995 DtorCGF.FinishFunction(); 1996 Dtor = Fn; 1997 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1998 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1999 } else { 2000 Dtor = new llvm::GlobalVariable( 2001 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2002 llvm::GlobalValue::PrivateLinkage, 2003 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2004 ID = Dtor; 2005 } 2006 // Register the information for the entry associated with the destructor. 2007 Out.clear(); 2008 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2009 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2010 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2011 } 2012 return CGM.getLangOpts().OpenMPIsDevice; 2013 } 2014 2015 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2016 QualType VarType, 2017 StringRef Name) { 2018 std::string Suffix = getName({"artificial", ""}); 2019 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2020 llvm::Value *GAddr = 2021 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2022 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2023 CGM.getTarget().isTLSSupported()) { 2024 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2025 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2026 } 2027 std::string CacheSuffix = getName({"cache", ""}); 2028 llvm::Value *Args[] = { 2029 emitUpdateLocation(CGF, SourceLocation()), 2030 getThreadID(CGF, SourceLocation()), 2031 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2032 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2033 /*isSigned=*/false), 2034 getOrCreateInternalVariable( 2035 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2036 return Address( 2037 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2038 CGF.EmitRuntimeCall( 2039 OMPBuilder.getOrCreateRuntimeFunction( 2040 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2041 Args), 2042 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2043 CGM.getContext().getTypeAlignInChars(VarType)); 2044 } 2045 2046 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2047 const RegionCodeGenTy &ThenGen, 2048 const RegionCodeGenTy &ElseGen) { 2049 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2050 2051 // If the condition constant folds and can be elided, try to avoid emitting 2052 // the condition and the dead arm of the if/else. 2053 bool CondConstant; 2054 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2055 if (CondConstant) 2056 ThenGen(CGF); 2057 else 2058 ElseGen(CGF); 2059 return; 2060 } 2061 2062 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2063 // emit the conditional branch. 2064 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2065 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2066 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2067 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2068 2069 // Emit the 'then' code. 2070 CGF.EmitBlock(ThenBlock); 2071 ThenGen(CGF); 2072 CGF.EmitBranch(ContBlock); 2073 // Emit the 'else' code if present. 2074 // There is no need to emit line number for unconditional branch. 2075 (void)ApplyDebugLocation::CreateEmpty(CGF); 2076 CGF.EmitBlock(ElseBlock); 2077 ElseGen(CGF); 2078 // There is no need to emit line number for unconditional branch. 2079 (void)ApplyDebugLocation::CreateEmpty(CGF); 2080 CGF.EmitBranch(ContBlock); 2081 // Emit the continuation block for code after the if. 2082 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2083 } 2084 2085 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2086 llvm::Function *OutlinedFn, 2087 ArrayRef<llvm::Value *> CapturedVars, 2088 const Expr *IfCond, 2089 llvm::Value *NumThreads) { 2090 if (!CGF.HaveInsertPoint()) 2091 return; 2092 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2093 auto &M = CGM.getModule(); 2094 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2095 this](CodeGenFunction &CGF, PrePostActionTy &) { 2096 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2097 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2098 llvm::Value *Args[] = { 2099 RTLoc, 2100 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2101 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2102 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2103 RealArgs.append(std::begin(Args), std::end(Args)); 2104 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2105 2106 llvm::FunctionCallee RTLFn = 2107 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2108 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2109 }; 2110 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2111 this](CodeGenFunction &CGF, PrePostActionTy &) { 2112 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2113 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2114 // Build calls: 2115 // __kmpc_serialized_parallel(&Loc, GTid); 2116 llvm::Value *Args[] = {RTLoc, ThreadID}; 2117 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2118 M, OMPRTL___kmpc_serialized_parallel), 2119 Args); 2120 2121 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2122 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2123 Address ZeroAddrBound = 2124 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2125 /*Name=*/".bound.zero.addr"); 2126 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2127 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2128 // ThreadId for serialized parallels is 0. 2129 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2130 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2131 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2132 2133 // Ensure we do not inline the function. This is trivially true for the ones 2134 // passed to __kmpc_fork_call but the ones called in serialized regions 2135 // could be inlined. This is not a perfect but it is closer to the invariant 2136 // we want, namely, every data environment starts with a new function. 2137 // TODO: We should pass the if condition to the runtime function and do the 2138 // handling there. Much cleaner code. 2139 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2140 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2141 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2142 2143 // __kmpc_end_serialized_parallel(&Loc, GTid); 2144 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2145 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2146 M, OMPRTL___kmpc_end_serialized_parallel), 2147 EndArgs); 2148 }; 2149 if (IfCond) { 2150 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2151 } else { 2152 RegionCodeGenTy ThenRCG(ThenGen); 2153 ThenRCG(CGF); 2154 } 2155 } 2156 2157 // If we're inside an (outlined) parallel region, use the region info's 2158 // thread-ID variable (it is passed in a first argument of the outlined function 2159 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2160 // regular serial code region, get thread ID by calling kmp_int32 2161 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2162 // return the address of that temp. 2163 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2164 SourceLocation Loc) { 2165 if (auto *OMPRegionInfo = 2166 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2167 if (OMPRegionInfo->getThreadIDVariable()) 2168 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2169 2170 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2171 QualType Int32Ty = 2172 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2173 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2174 CGF.EmitStoreOfScalar(ThreadID, 2175 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2176 2177 return ThreadIDTemp; 2178 } 2179 2180 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2181 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2182 SmallString<256> Buffer; 2183 llvm::raw_svector_ostream Out(Buffer); 2184 Out << Name; 2185 StringRef RuntimeName = Out.str(); 2186 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2187 if (Elem.second) { 2188 assert(Elem.second->getType()->getPointerElementType() == Ty && 2189 "OMP internal variable has different type than requested"); 2190 return &*Elem.second; 2191 } 2192 2193 return Elem.second = new llvm::GlobalVariable( 2194 CGM.getModule(), Ty, /*IsConstant*/ false, 2195 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2196 Elem.first(), /*InsertBefore=*/nullptr, 2197 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2198 } 2199 2200 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2201 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2202 std::string Name = getName({Prefix, "var"}); 2203 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2204 } 2205 2206 namespace { 2207 /// Common pre(post)-action for different OpenMP constructs. 2208 class CommonActionTy final : public PrePostActionTy { 2209 llvm::FunctionCallee EnterCallee; 2210 ArrayRef<llvm::Value *> EnterArgs; 2211 llvm::FunctionCallee ExitCallee; 2212 ArrayRef<llvm::Value *> ExitArgs; 2213 bool Conditional; 2214 llvm::BasicBlock *ContBlock = nullptr; 2215 2216 public: 2217 CommonActionTy(llvm::FunctionCallee EnterCallee, 2218 ArrayRef<llvm::Value *> EnterArgs, 2219 llvm::FunctionCallee ExitCallee, 2220 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2221 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2222 ExitArgs(ExitArgs), Conditional(Conditional) {} 2223 void Enter(CodeGenFunction &CGF) override { 2224 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2225 if (Conditional) { 2226 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2227 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2228 ContBlock = CGF.createBasicBlock("omp_if.end"); 2229 // Generate the branch (If-stmt) 2230 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2231 CGF.EmitBlock(ThenBlock); 2232 } 2233 } 2234 void Done(CodeGenFunction &CGF) { 2235 // Emit the rest of blocks/branches 2236 CGF.EmitBranch(ContBlock); 2237 CGF.EmitBlock(ContBlock, true); 2238 } 2239 void Exit(CodeGenFunction &CGF) override { 2240 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2241 } 2242 }; 2243 } // anonymous namespace 2244 2245 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2246 StringRef CriticalName, 2247 const RegionCodeGenTy &CriticalOpGen, 2248 SourceLocation Loc, const Expr *Hint) { 2249 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2250 // CriticalOpGen(); 2251 // __kmpc_end_critical(ident_t *, gtid, Lock); 2252 // Prepare arguments and build a call to __kmpc_critical 2253 if (!CGF.HaveInsertPoint()) 2254 return; 2255 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2256 getCriticalRegionLock(CriticalName)}; 2257 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2258 std::end(Args)); 2259 if (Hint) { 2260 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2261 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2262 } 2263 CommonActionTy Action( 2264 OMPBuilder.getOrCreateRuntimeFunction( 2265 CGM.getModule(), 2266 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2267 EnterArgs, 2268 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2269 OMPRTL___kmpc_end_critical), 2270 Args); 2271 CriticalOpGen.setAction(Action); 2272 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2273 } 2274 2275 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2276 const RegionCodeGenTy &MasterOpGen, 2277 SourceLocation Loc) { 2278 if (!CGF.HaveInsertPoint()) 2279 return; 2280 // if(__kmpc_master(ident_t *, gtid)) { 2281 // MasterOpGen(); 2282 // __kmpc_end_master(ident_t *, gtid); 2283 // } 2284 // Prepare arguments and build a call to __kmpc_master 2285 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2286 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2287 CGM.getModule(), OMPRTL___kmpc_master), 2288 Args, 2289 OMPBuilder.getOrCreateRuntimeFunction( 2290 CGM.getModule(), OMPRTL___kmpc_end_master), 2291 Args, 2292 /*Conditional=*/true); 2293 MasterOpGen.setAction(Action); 2294 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2295 Action.Done(CGF); 2296 } 2297 2298 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2299 const RegionCodeGenTy &MaskedOpGen, 2300 SourceLocation Loc, const Expr *Filter) { 2301 if (!CGF.HaveInsertPoint()) 2302 return; 2303 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2304 // MaskedOpGen(); 2305 // __kmpc_end_masked(iden_t *, gtid); 2306 // } 2307 // Prepare arguments and build a call to __kmpc_masked 2308 llvm::Value *FilterVal = Filter 2309 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2310 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2311 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2312 FilterVal}; 2313 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2314 getThreadID(CGF, Loc)}; 2315 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2316 CGM.getModule(), OMPRTL___kmpc_masked), 2317 Args, 2318 OMPBuilder.getOrCreateRuntimeFunction( 2319 CGM.getModule(), OMPRTL___kmpc_end_masked), 2320 ArgsEnd, 2321 /*Conditional=*/true); 2322 MaskedOpGen.setAction(Action); 2323 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2324 Action.Done(CGF); 2325 } 2326 2327 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2328 SourceLocation Loc) { 2329 if (!CGF.HaveInsertPoint()) 2330 return; 2331 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2332 OMPBuilder.createTaskyield(CGF.Builder); 2333 } else { 2334 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2335 llvm::Value *Args[] = { 2336 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2337 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2338 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2339 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2340 Args); 2341 } 2342 2343 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2344 Region->emitUntiedSwitch(CGF); 2345 } 2346 2347 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2348 const RegionCodeGenTy &TaskgroupOpGen, 2349 SourceLocation Loc) { 2350 if (!CGF.HaveInsertPoint()) 2351 return; 2352 // __kmpc_taskgroup(ident_t *, gtid); 2353 // TaskgroupOpGen(); 2354 // __kmpc_end_taskgroup(ident_t *, gtid); 2355 // Prepare arguments and build a call to __kmpc_taskgroup 2356 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2357 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2358 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2359 Args, 2360 OMPBuilder.getOrCreateRuntimeFunction( 2361 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2362 Args); 2363 TaskgroupOpGen.setAction(Action); 2364 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2365 } 2366 2367 /// Given an array of pointers to variables, project the address of a 2368 /// given variable. 2369 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2370 unsigned Index, const VarDecl *Var) { 2371 // Pull out the pointer to the variable. 2372 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2373 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2374 2375 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2376 Addr = CGF.Builder.CreateElementBitCast( 2377 Addr, CGF.ConvertTypeForMem(Var->getType())); 2378 return Addr; 2379 } 2380 2381 static llvm::Value *emitCopyprivateCopyFunction( 2382 CodeGenModule &CGM, llvm::Type *ArgsType, 2383 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2384 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2385 SourceLocation Loc) { 2386 ASTContext &C = CGM.getContext(); 2387 // void copy_func(void *LHSArg, void *RHSArg); 2388 FunctionArgList Args; 2389 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2390 ImplicitParamDecl::Other); 2391 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2392 ImplicitParamDecl::Other); 2393 Args.push_back(&LHSArg); 2394 Args.push_back(&RHSArg); 2395 const auto &CGFI = 2396 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2397 std::string Name = 2398 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2399 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2400 llvm::GlobalValue::InternalLinkage, Name, 2401 &CGM.getModule()); 2402 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2403 Fn->setDoesNotRecurse(); 2404 CodeGenFunction CGF(CGM); 2405 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2406 // Dest = (void*[n])(LHSArg); 2407 // Src = (void*[n])(RHSArg); 2408 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2409 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2410 ArgsType), CGF.getPointerAlign()); 2411 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2412 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2413 ArgsType), CGF.getPointerAlign()); 2414 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2415 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2416 // ... 2417 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2418 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2419 const auto *DestVar = 2420 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2421 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2422 2423 const auto *SrcVar = 2424 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2425 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2426 2427 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2428 QualType Type = VD->getType(); 2429 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2430 } 2431 CGF.FinishFunction(); 2432 return Fn; 2433 } 2434 2435 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2436 const RegionCodeGenTy &SingleOpGen, 2437 SourceLocation Loc, 2438 ArrayRef<const Expr *> CopyprivateVars, 2439 ArrayRef<const Expr *> SrcExprs, 2440 ArrayRef<const Expr *> DstExprs, 2441 ArrayRef<const Expr *> AssignmentOps) { 2442 if (!CGF.HaveInsertPoint()) 2443 return; 2444 assert(CopyprivateVars.size() == SrcExprs.size() && 2445 CopyprivateVars.size() == DstExprs.size() && 2446 CopyprivateVars.size() == AssignmentOps.size()); 2447 ASTContext &C = CGM.getContext(); 2448 // int32 did_it = 0; 2449 // if(__kmpc_single(ident_t *, gtid)) { 2450 // SingleOpGen(); 2451 // __kmpc_end_single(ident_t *, gtid); 2452 // did_it = 1; 2453 // } 2454 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2455 // <copy_func>, did_it); 2456 2457 Address DidIt = Address::invalid(); 2458 if (!CopyprivateVars.empty()) { 2459 // int32 did_it = 0; 2460 QualType KmpInt32Ty = 2461 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2462 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2463 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2464 } 2465 // Prepare arguments and build a call to __kmpc_single 2466 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2467 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2468 CGM.getModule(), OMPRTL___kmpc_single), 2469 Args, 2470 OMPBuilder.getOrCreateRuntimeFunction( 2471 CGM.getModule(), OMPRTL___kmpc_end_single), 2472 Args, 2473 /*Conditional=*/true); 2474 SingleOpGen.setAction(Action); 2475 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2476 if (DidIt.isValid()) { 2477 // did_it = 1; 2478 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2479 } 2480 Action.Done(CGF); 2481 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2482 // <copy_func>, did_it); 2483 if (DidIt.isValid()) { 2484 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2485 QualType CopyprivateArrayTy = C.getConstantArrayType( 2486 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2487 /*IndexTypeQuals=*/0); 2488 // Create a list of all private variables for copyprivate. 2489 Address CopyprivateList = 2490 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2491 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2492 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2493 CGF.Builder.CreateStore( 2494 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2495 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2496 CGF.VoidPtrTy), 2497 Elem); 2498 } 2499 // Build function that copies private values from single region to all other 2500 // threads in the corresponding parallel region. 2501 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2502 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2503 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2504 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2505 Address CL = 2506 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2507 CGF.VoidPtrTy); 2508 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2509 llvm::Value *Args[] = { 2510 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2511 getThreadID(CGF, Loc), // i32 <gtid> 2512 BufSize, // size_t <buf_size> 2513 CL.getPointer(), // void *<copyprivate list> 2514 CpyFn, // void (*) (void *, void *) <copy_func> 2515 DidItVal // i32 did_it 2516 }; 2517 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2518 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2519 Args); 2520 } 2521 } 2522 2523 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2524 const RegionCodeGenTy &OrderedOpGen, 2525 SourceLocation Loc, bool IsThreads) { 2526 if (!CGF.HaveInsertPoint()) 2527 return; 2528 // __kmpc_ordered(ident_t *, gtid); 2529 // OrderedOpGen(); 2530 // __kmpc_end_ordered(ident_t *, gtid); 2531 // Prepare arguments and build a call to __kmpc_ordered 2532 if (IsThreads) { 2533 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2534 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2535 CGM.getModule(), OMPRTL___kmpc_ordered), 2536 Args, 2537 OMPBuilder.getOrCreateRuntimeFunction( 2538 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2539 Args); 2540 OrderedOpGen.setAction(Action); 2541 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2542 return; 2543 } 2544 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2545 } 2546 2547 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2548 unsigned Flags; 2549 if (Kind == OMPD_for) 2550 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2551 else if (Kind == OMPD_sections) 2552 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2553 else if (Kind == OMPD_single) 2554 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2555 else if (Kind == OMPD_barrier) 2556 Flags = OMP_IDENT_BARRIER_EXPL; 2557 else 2558 Flags = OMP_IDENT_BARRIER_IMPL; 2559 return Flags; 2560 } 2561 2562 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2563 CodeGenFunction &CGF, const OMPLoopDirective &S, 2564 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2565 // Check if the loop directive is actually a doacross loop directive. In this 2566 // case choose static, 1 schedule. 2567 if (llvm::any_of( 2568 S.getClausesOfKind<OMPOrderedClause>(), 2569 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2570 ScheduleKind = OMPC_SCHEDULE_static; 2571 // Chunk size is 1 in this case. 2572 llvm::APInt ChunkSize(32, 1); 2573 ChunkExpr = IntegerLiteral::Create( 2574 CGF.getContext(), ChunkSize, 2575 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2576 SourceLocation()); 2577 } 2578 } 2579 2580 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2581 OpenMPDirectiveKind Kind, bool EmitChecks, 2582 bool ForceSimpleCall) { 2583 // Check if we should use the OMPBuilder 2584 auto *OMPRegionInfo = 2585 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2586 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2587 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2588 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2589 return; 2590 } 2591 2592 if (!CGF.HaveInsertPoint()) 2593 return; 2594 // Build call __kmpc_cancel_barrier(loc, thread_id); 2595 // Build call __kmpc_barrier(loc, thread_id); 2596 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2597 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2598 // thread_id); 2599 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2600 getThreadID(CGF, Loc)}; 2601 if (OMPRegionInfo) { 2602 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2603 llvm::Value *Result = CGF.EmitRuntimeCall( 2604 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2605 OMPRTL___kmpc_cancel_barrier), 2606 Args); 2607 if (EmitChecks) { 2608 // if (__kmpc_cancel_barrier()) { 2609 // exit from construct; 2610 // } 2611 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2612 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2613 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2614 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2615 CGF.EmitBlock(ExitBB); 2616 // exit from construct; 2617 CodeGenFunction::JumpDest CancelDestination = 2618 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2619 CGF.EmitBranchThroughCleanup(CancelDestination); 2620 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2621 } 2622 return; 2623 } 2624 } 2625 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2626 CGM.getModule(), OMPRTL___kmpc_barrier), 2627 Args); 2628 } 2629 2630 /// Map the OpenMP loop schedule to the runtime enumeration. 2631 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2632 bool Chunked, bool Ordered) { 2633 switch (ScheduleKind) { 2634 case OMPC_SCHEDULE_static: 2635 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2636 : (Ordered ? OMP_ord_static : OMP_sch_static); 2637 case OMPC_SCHEDULE_dynamic: 2638 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2639 case OMPC_SCHEDULE_guided: 2640 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2641 case OMPC_SCHEDULE_runtime: 2642 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2643 case OMPC_SCHEDULE_auto: 2644 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2645 case OMPC_SCHEDULE_unknown: 2646 assert(!Chunked && "chunk was specified but schedule kind not known"); 2647 return Ordered ? OMP_ord_static : OMP_sch_static; 2648 } 2649 llvm_unreachable("Unexpected runtime schedule"); 2650 } 2651 2652 /// Map the OpenMP distribute schedule to the runtime enumeration. 2653 static OpenMPSchedType 2654 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2655 // only static is allowed for dist_schedule 2656 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2657 } 2658 2659 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2660 bool Chunked) const { 2661 OpenMPSchedType Schedule = 2662 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2663 return Schedule == OMP_sch_static; 2664 } 2665 2666 bool CGOpenMPRuntime::isStaticNonchunked( 2667 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2668 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2669 return Schedule == OMP_dist_sch_static; 2670 } 2671 2672 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2673 bool Chunked) const { 2674 OpenMPSchedType Schedule = 2675 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2676 return Schedule == OMP_sch_static_chunked; 2677 } 2678 2679 bool CGOpenMPRuntime::isStaticChunked( 2680 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2681 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2682 return Schedule == OMP_dist_sch_static_chunked; 2683 } 2684 2685 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2686 OpenMPSchedType Schedule = 2687 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2688 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2689 return Schedule != OMP_sch_static; 2690 } 2691 2692 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2693 OpenMPScheduleClauseModifier M1, 2694 OpenMPScheduleClauseModifier M2) { 2695 int Modifier = 0; 2696 switch (M1) { 2697 case OMPC_SCHEDULE_MODIFIER_monotonic: 2698 Modifier = OMP_sch_modifier_monotonic; 2699 break; 2700 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2701 Modifier = OMP_sch_modifier_nonmonotonic; 2702 break; 2703 case OMPC_SCHEDULE_MODIFIER_simd: 2704 if (Schedule == OMP_sch_static_chunked) 2705 Schedule = OMP_sch_static_balanced_chunked; 2706 break; 2707 case OMPC_SCHEDULE_MODIFIER_last: 2708 case OMPC_SCHEDULE_MODIFIER_unknown: 2709 break; 2710 } 2711 switch (M2) { 2712 case OMPC_SCHEDULE_MODIFIER_monotonic: 2713 Modifier = OMP_sch_modifier_monotonic; 2714 break; 2715 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2716 Modifier = OMP_sch_modifier_nonmonotonic; 2717 break; 2718 case OMPC_SCHEDULE_MODIFIER_simd: 2719 if (Schedule == OMP_sch_static_chunked) 2720 Schedule = OMP_sch_static_balanced_chunked; 2721 break; 2722 case OMPC_SCHEDULE_MODIFIER_last: 2723 case OMPC_SCHEDULE_MODIFIER_unknown: 2724 break; 2725 } 2726 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2727 // If the static schedule kind is specified or if the ordered clause is 2728 // specified, and if the nonmonotonic modifier is not specified, the effect is 2729 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2730 // modifier is specified, the effect is as if the nonmonotonic modifier is 2731 // specified. 2732 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2733 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2734 Schedule == OMP_sch_static_balanced_chunked || 2735 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2736 Schedule == OMP_dist_sch_static_chunked || 2737 Schedule == OMP_dist_sch_static)) 2738 Modifier = OMP_sch_modifier_nonmonotonic; 2739 } 2740 return Schedule | Modifier; 2741 } 2742 2743 void CGOpenMPRuntime::emitForDispatchInit( 2744 CodeGenFunction &CGF, SourceLocation Loc, 2745 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2746 bool Ordered, const DispatchRTInput &DispatchValues) { 2747 if (!CGF.HaveInsertPoint()) 2748 return; 2749 OpenMPSchedType Schedule = getRuntimeSchedule( 2750 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2751 assert(Ordered || 2752 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2753 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2754 Schedule != OMP_sch_static_balanced_chunked)); 2755 // Call __kmpc_dispatch_init( 2756 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2757 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2758 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2759 2760 // If the Chunk was not specified in the clause - use default value 1. 2761 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2762 : CGF.Builder.getIntN(IVSize, 1); 2763 llvm::Value *Args[] = { 2764 emitUpdateLocation(CGF, Loc), 2765 getThreadID(CGF, Loc), 2766 CGF.Builder.getInt32(addMonoNonMonoModifier( 2767 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2768 DispatchValues.LB, // Lower 2769 DispatchValues.UB, // Upper 2770 CGF.Builder.getIntN(IVSize, 1), // Stride 2771 Chunk // Chunk 2772 }; 2773 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2774 } 2775 2776 static void emitForStaticInitCall( 2777 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2778 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2779 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2780 const CGOpenMPRuntime::StaticRTInput &Values) { 2781 if (!CGF.HaveInsertPoint()) 2782 return; 2783 2784 assert(!Values.Ordered); 2785 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2786 Schedule == OMP_sch_static_balanced_chunked || 2787 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2788 Schedule == OMP_dist_sch_static || 2789 Schedule == OMP_dist_sch_static_chunked); 2790 2791 // Call __kmpc_for_static_init( 2792 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2793 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2794 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2795 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2796 llvm::Value *Chunk = Values.Chunk; 2797 if (Chunk == nullptr) { 2798 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2799 Schedule == OMP_dist_sch_static) && 2800 "expected static non-chunked schedule"); 2801 // If the Chunk was not specified in the clause - use default value 1. 2802 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2803 } else { 2804 assert((Schedule == OMP_sch_static_chunked || 2805 Schedule == OMP_sch_static_balanced_chunked || 2806 Schedule == OMP_ord_static_chunked || 2807 Schedule == OMP_dist_sch_static_chunked) && 2808 "expected static chunked schedule"); 2809 } 2810 llvm::Value *Args[] = { 2811 UpdateLocation, 2812 ThreadId, 2813 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2814 M2)), // Schedule type 2815 Values.IL.getPointer(), // &isLastIter 2816 Values.LB.getPointer(), // &LB 2817 Values.UB.getPointer(), // &UB 2818 Values.ST.getPointer(), // &Stride 2819 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2820 Chunk // Chunk 2821 }; 2822 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2823 } 2824 2825 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2826 SourceLocation Loc, 2827 OpenMPDirectiveKind DKind, 2828 const OpenMPScheduleTy &ScheduleKind, 2829 const StaticRTInput &Values) { 2830 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2831 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2832 assert(isOpenMPWorksharingDirective(DKind) && 2833 "Expected loop-based or sections-based directive."); 2834 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2835 isOpenMPLoopDirective(DKind) 2836 ? OMP_IDENT_WORK_LOOP 2837 : OMP_IDENT_WORK_SECTIONS); 2838 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2839 llvm::FunctionCallee StaticInitFunction = 2840 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2841 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2842 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2843 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2844 } 2845 2846 void CGOpenMPRuntime::emitDistributeStaticInit( 2847 CodeGenFunction &CGF, SourceLocation Loc, 2848 OpenMPDistScheduleClauseKind SchedKind, 2849 const CGOpenMPRuntime::StaticRTInput &Values) { 2850 OpenMPSchedType ScheduleNum = 2851 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2852 llvm::Value *UpdatedLocation = 2853 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2854 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2855 llvm::FunctionCallee StaticInitFunction; 2856 bool isGPUDistribute = 2857 CGM.getLangOpts().OpenMPIsDevice && 2858 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2859 StaticInitFunction = createForStaticInitFunction( 2860 Values.IVSize, Values.IVSigned, isGPUDistribute); 2861 2862 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2863 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2864 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2865 } 2866 2867 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2868 SourceLocation Loc, 2869 OpenMPDirectiveKind DKind) { 2870 if (!CGF.HaveInsertPoint()) 2871 return; 2872 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2873 llvm::Value *Args[] = { 2874 emitUpdateLocation(CGF, Loc, 2875 isOpenMPDistributeDirective(DKind) 2876 ? OMP_IDENT_WORK_DISTRIBUTE 2877 : isOpenMPLoopDirective(DKind) 2878 ? OMP_IDENT_WORK_LOOP 2879 : OMP_IDENT_WORK_SECTIONS), 2880 getThreadID(CGF, Loc)}; 2881 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2882 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2883 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2884 CGF.EmitRuntimeCall( 2885 OMPBuilder.getOrCreateRuntimeFunction( 2886 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2887 Args); 2888 else 2889 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2890 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2891 Args); 2892 } 2893 2894 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2895 SourceLocation Loc, 2896 unsigned IVSize, 2897 bool IVSigned) { 2898 if (!CGF.HaveInsertPoint()) 2899 return; 2900 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2901 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2902 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2903 } 2904 2905 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2906 SourceLocation Loc, unsigned IVSize, 2907 bool IVSigned, Address IL, 2908 Address LB, Address UB, 2909 Address ST) { 2910 // Call __kmpc_dispatch_next( 2911 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2912 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2913 // kmp_int[32|64] *p_stride); 2914 llvm::Value *Args[] = { 2915 emitUpdateLocation(CGF, Loc), 2916 getThreadID(CGF, Loc), 2917 IL.getPointer(), // &isLastIter 2918 LB.getPointer(), // &Lower 2919 UB.getPointer(), // &Upper 2920 ST.getPointer() // &Stride 2921 }; 2922 llvm::Value *Call = 2923 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2924 return CGF.EmitScalarConversion( 2925 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2926 CGF.getContext().BoolTy, Loc); 2927 } 2928 2929 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2930 llvm::Value *NumThreads, 2931 SourceLocation Loc) { 2932 if (!CGF.HaveInsertPoint()) 2933 return; 2934 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2935 llvm::Value *Args[] = { 2936 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2937 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2938 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2939 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2940 Args); 2941 } 2942 2943 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2944 ProcBindKind ProcBind, 2945 SourceLocation Loc) { 2946 if (!CGF.HaveInsertPoint()) 2947 return; 2948 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2949 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2950 llvm::Value *Args[] = { 2951 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2952 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2953 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2954 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2955 Args); 2956 } 2957 2958 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2959 SourceLocation Loc, llvm::AtomicOrdering AO) { 2960 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2961 OMPBuilder.createFlush(CGF.Builder); 2962 } else { 2963 if (!CGF.HaveInsertPoint()) 2964 return; 2965 // Build call void __kmpc_flush(ident_t *loc) 2966 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2967 CGM.getModule(), OMPRTL___kmpc_flush), 2968 emitUpdateLocation(CGF, Loc)); 2969 } 2970 } 2971 2972 namespace { 2973 /// Indexes of fields for type kmp_task_t. 2974 enum KmpTaskTFields { 2975 /// List of shared variables. 2976 KmpTaskTShareds, 2977 /// Task routine. 2978 KmpTaskTRoutine, 2979 /// Partition id for the untied tasks. 2980 KmpTaskTPartId, 2981 /// Function with call of destructors for private variables. 2982 Data1, 2983 /// Task priority. 2984 Data2, 2985 /// (Taskloops only) Lower bound. 2986 KmpTaskTLowerBound, 2987 /// (Taskloops only) Upper bound. 2988 KmpTaskTUpperBound, 2989 /// (Taskloops only) Stride. 2990 KmpTaskTStride, 2991 /// (Taskloops only) Is last iteration flag. 2992 KmpTaskTLastIter, 2993 /// (Taskloops only) Reduction data. 2994 KmpTaskTReductions, 2995 }; 2996 } // anonymous namespace 2997 2998 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2999 return OffloadEntriesTargetRegion.empty() && 3000 OffloadEntriesDeviceGlobalVar.empty(); 3001 } 3002 3003 /// Initialize target region entry. 3004 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3005 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3006 StringRef ParentName, unsigned LineNum, 3007 unsigned Order) { 3008 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3009 "only required for the device " 3010 "code generation."); 3011 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3012 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3013 OMPTargetRegionEntryTargetRegion); 3014 ++OffloadingEntriesNum; 3015 } 3016 3017 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3018 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3019 StringRef ParentName, unsigned LineNum, 3020 llvm::Constant *Addr, llvm::Constant *ID, 3021 OMPTargetRegionEntryKind Flags) { 3022 // If we are emitting code for a target, the entry is already initialized, 3023 // only has to be registered. 3024 if (CGM.getLangOpts().OpenMPIsDevice) { 3025 // This could happen if the device compilation is invoked standalone. 3026 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3027 return; 3028 auto &Entry = 3029 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3030 Entry.setAddress(Addr); 3031 Entry.setID(ID); 3032 Entry.setFlags(Flags); 3033 } else { 3034 if (Flags == 3035 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3036 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3037 /*IgnoreAddressId*/ true)) 3038 return; 3039 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3040 "Target region entry already registered!"); 3041 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3042 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3043 ++OffloadingEntriesNum; 3044 } 3045 } 3046 3047 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3048 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3049 bool IgnoreAddressId) const { 3050 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3051 if (PerDevice == OffloadEntriesTargetRegion.end()) 3052 return false; 3053 auto PerFile = PerDevice->second.find(FileID); 3054 if (PerFile == PerDevice->second.end()) 3055 return false; 3056 auto PerParentName = PerFile->second.find(ParentName); 3057 if (PerParentName == PerFile->second.end()) 3058 return false; 3059 auto PerLine = PerParentName->second.find(LineNum); 3060 if (PerLine == PerParentName->second.end()) 3061 return false; 3062 // Fail if this entry is already registered. 3063 if (!IgnoreAddressId && 3064 (PerLine->second.getAddress() || PerLine->second.getID())) 3065 return false; 3066 return true; 3067 } 3068 3069 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3070 const OffloadTargetRegionEntryInfoActTy &Action) { 3071 // Scan all target region entries and perform the provided action. 3072 for (const auto &D : OffloadEntriesTargetRegion) 3073 for (const auto &F : D.second) 3074 for (const auto &P : F.second) 3075 for (const auto &L : P.second) 3076 Action(D.first, F.first, P.first(), L.first, L.second); 3077 } 3078 3079 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3080 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3081 OMPTargetGlobalVarEntryKind Flags, 3082 unsigned Order) { 3083 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3084 "only required for the device " 3085 "code generation."); 3086 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3087 ++OffloadingEntriesNum; 3088 } 3089 3090 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3091 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3092 CharUnits VarSize, 3093 OMPTargetGlobalVarEntryKind Flags, 3094 llvm::GlobalValue::LinkageTypes Linkage) { 3095 if (CGM.getLangOpts().OpenMPIsDevice) { 3096 // This could happen if the device compilation is invoked standalone. 3097 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3098 return; 3099 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3100 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3101 if (Entry.getVarSize().isZero()) { 3102 Entry.setVarSize(VarSize); 3103 Entry.setLinkage(Linkage); 3104 } 3105 return; 3106 } 3107 Entry.setVarSize(VarSize); 3108 Entry.setLinkage(Linkage); 3109 Entry.setAddress(Addr); 3110 } else { 3111 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3112 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3113 assert(Entry.isValid() && Entry.getFlags() == Flags && 3114 "Entry not initialized!"); 3115 if (Entry.getVarSize().isZero()) { 3116 Entry.setVarSize(VarSize); 3117 Entry.setLinkage(Linkage); 3118 } 3119 return; 3120 } 3121 OffloadEntriesDeviceGlobalVar.try_emplace( 3122 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3123 ++OffloadingEntriesNum; 3124 } 3125 } 3126 3127 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3128 actOnDeviceGlobalVarEntriesInfo( 3129 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3130 // Scan all target region entries and perform the provided action. 3131 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3132 Action(E.getKey(), E.getValue()); 3133 } 3134 3135 void CGOpenMPRuntime::createOffloadEntry( 3136 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3137 llvm::GlobalValue::LinkageTypes Linkage) { 3138 StringRef Name = Addr->getName(); 3139 llvm::Module &M = CGM.getModule(); 3140 llvm::LLVMContext &C = M.getContext(); 3141 3142 // Create constant string with the name. 3143 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3144 3145 std::string StringName = getName({"omp_offloading", "entry_name"}); 3146 auto *Str = new llvm::GlobalVariable( 3147 M, StrPtrInit->getType(), /*isConstant=*/true, 3148 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3149 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3150 3151 llvm::Constant *Data[] = { 3152 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3153 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3154 llvm::ConstantInt::get(CGM.SizeTy, Size), 3155 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3156 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3157 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3158 llvm::GlobalVariable *Entry = createGlobalStruct( 3159 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3160 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3161 3162 // The entry has to be created in the section the linker expects it to be. 3163 Entry->setSection("omp_offloading_entries"); 3164 } 3165 3166 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3167 // Emit the offloading entries and metadata so that the device codegen side 3168 // can easily figure out what to emit. The produced metadata looks like 3169 // this: 3170 // 3171 // !omp_offload.info = !{!1, ...} 3172 // 3173 // Right now we only generate metadata for function that contain target 3174 // regions. 3175 3176 // If we are in simd mode or there are no entries, we don't need to do 3177 // anything. 3178 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3179 return; 3180 3181 llvm::Module &M = CGM.getModule(); 3182 llvm::LLVMContext &C = M.getContext(); 3183 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3184 SourceLocation, StringRef>, 3185 16> 3186 OrderedEntries(OffloadEntriesInfoManager.size()); 3187 llvm::SmallVector<StringRef, 16> ParentFunctions( 3188 OffloadEntriesInfoManager.size()); 3189 3190 // Auxiliary methods to create metadata values and strings. 3191 auto &&GetMDInt = [this](unsigned V) { 3192 return llvm::ConstantAsMetadata::get( 3193 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3194 }; 3195 3196 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3197 3198 // Create the offloading info metadata node. 3199 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3200 3201 // Create function that emits metadata for each target region entry; 3202 auto &&TargetRegionMetadataEmitter = 3203 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3204 &GetMDString]( 3205 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3206 unsigned Line, 3207 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3208 // Generate metadata for target regions. Each entry of this metadata 3209 // contains: 3210 // - Entry 0 -> Kind of this type of metadata (0). 3211 // - Entry 1 -> Device ID of the file where the entry was identified. 3212 // - Entry 2 -> File ID of the file where the entry was identified. 3213 // - Entry 3 -> Mangled name of the function where the entry was 3214 // identified. 3215 // - Entry 4 -> Line in the file where the entry was identified. 3216 // - Entry 5 -> Order the entry was created. 3217 // The first element of the metadata node is the kind. 3218 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3219 GetMDInt(FileID), GetMDString(ParentName), 3220 GetMDInt(Line), GetMDInt(E.getOrder())}; 3221 3222 SourceLocation Loc; 3223 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3224 E = CGM.getContext().getSourceManager().fileinfo_end(); 3225 I != E; ++I) { 3226 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3227 I->getFirst()->getUniqueID().getFile() == FileID) { 3228 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3229 I->getFirst(), Line, 1); 3230 break; 3231 } 3232 } 3233 // Save this entry in the right position of the ordered entries array. 3234 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3235 ParentFunctions[E.getOrder()] = ParentName; 3236 3237 // Add metadata to the named metadata node. 3238 MD->addOperand(llvm::MDNode::get(C, Ops)); 3239 }; 3240 3241 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3242 TargetRegionMetadataEmitter); 3243 3244 // Create function that emits metadata for each device global variable entry; 3245 auto &&DeviceGlobalVarMetadataEmitter = 3246 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3247 MD](StringRef MangledName, 3248 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3249 &E) { 3250 // Generate metadata for global variables. Each entry of this metadata 3251 // contains: 3252 // - Entry 0 -> Kind of this type of metadata (1). 3253 // - Entry 1 -> Mangled name of the variable. 3254 // - Entry 2 -> Declare target kind. 3255 // - Entry 3 -> Order the entry was created. 3256 // The first element of the metadata node is the kind. 3257 llvm::Metadata *Ops[] = { 3258 GetMDInt(E.getKind()), GetMDString(MangledName), 3259 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3260 3261 // Save this entry in the right position of the ordered entries array. 3262 OrderedEntries[E.getOrder()] = 3263 std::make_tuple(&E, SourceLocation(), MangledName); 3264 3265 // Add metadata to the named metadata node. 3266 MD->addOperand(llvm::MDNode::get(C, Ops)); 3267 }; 3268 3269 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3270 DeviceGlobalVarMetadataEmitter); 3271 3272 for (const auto &E : OrderedEntries) { 3273 assert(std::get<0>(E) && "All ordered entries must exist!"); 3274 if (const auto *CE = 3275 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3276 std::get<0>(E))) { 3277 if (!CE->getID() || !CE->getAddress()) { 3278 // Do not blame the entry if the parent funtion is not emitted. 3279 StringRef FnName = ParentFunctions[CE->getOrder()]; 3280 if (!CGM.GetGlobalValue(FnName)) 3281 continue; 3282 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3283 DiagnosticsEngine::Error, 3284 "Offloading entry for target region in %0 is incorrect: either the " 3285 "address or the ID is invalid."); 3286 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3287 continue; 3288 } 3289 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3290 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3291 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3292 OffloadEntryInfoDeviceGlobalVar>( 3293 std::get<0>(E))) { 3294 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3295 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3296 CE->getFlags()); 3297 switch (Flags) { 3298 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3299 if (CGM.getLangOpts().OpenMPIsDevice && 3300 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3301 continue; 3302 if (!CE->getAddress()) { 3303 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3304 DiagnosticsEngine::Error, "Offloading entry for declare target " 3305 "variable %0 is incorrect: the " 3306 "address is invalid."); 3307 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3308 continue; 3309 } 3310 // The vaiable has no definition - no need to add the entry. 3311 if (CE->getVarSize().isZero()) 3312 continue; 3313 break; 3314 } 3315 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3316 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3317 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3318 "Declaret target link address is set."); 3319 if (CGM.getLangOpts().OpenMPIsDevice) 3320 continue; 3321 if (!CE->getAddress()) { 3322 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3323 DiagnosticsEngine::Error, 3324 "Offloading entry for declare target variable is incorrect: the " 3325 "address is invalid."); 3326 CGM.getDiags().Report(DiagID); 3327 continue; 3328 } 3329 break; 3330 } 3331 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3332 CE->getVarSize().getQuantity(), Flags, 3333 CE->getLinkage()); 3334 } else { 3335 llvm_unreachable("Unsupported entry kind."); 3336 } 3337 } 3338 } 3339 3340 /// Loads all the offload entries information from the host IR 3341 /// metadata. 3342 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3343 // If we are in target mode, load the metadata from the host IR. This code has 3344 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3345 3346 if (!CGM.getLangOpts().OpenMPIsDevice) 3347 return; 3348 3349 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3350 return; 3351 3352 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3353 if (auto EC = Buf.getError()) { 3354 CGM.getDiags().Report(diag::err_cannot_open_file) 3355 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3356 return; 3357 } 3358 3359 llvm::LLVMContext C; 3360 auto ME = expectedToErrorOrAndEmitErrors( 3361 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3362 3363 if (auto EC = ME.getError()) { 3364 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3365 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3366 CGM.getDiags().Report(DiagID) 3367 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3368 return; 3369 } 3370 3371 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3372 if (!MD) 3373 return; 3374 3375 for (llvm::MDNode *MN : MD->operands()) { 3376 auto &&GetMDInt = [MN](unsigned Idx) { 3377 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3378 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3379 }; 3380 3381 auto &&GetMDString = [MN](unsigned Idx) { 3382 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3383 return V->getString(); 3384 }; 3385 3386 switch (GetMDInt(0)) { 3387 default: 3388 llvm_unreachable("Unexpected metadata!"); 3389 break; 3390 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3391 OffloadingEntryInfoTargetRegion: 3392 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3393 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3394 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3395 /*Order=*/GetMDInt(5)); 3396 break; 3397 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3398 OffloadingEntryInfoDeviceGlobalVar: 3399 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3400 /*MangledName=*/GetMDString(1), 3401 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3402 /*Flags=*/GetMDInt(2)), 3403 /*Order=*/GetMDInt(3)); 3404 break; 3405 } 3406 } 3407 } 3408 3409 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3410 if (!KmpRoutineEntryPtrTy) { 3411 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3412 ASTContext &C = CGM.getContext(); 3413 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3414 FunctionProtoType::ExtProtoInfo EPI; 3415 KmpRoutineEntryPtrQTy = C.getPointerType( 3416 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3417 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3418 } 3419 } 3420 3421 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3422 // Make sure the type of the entry is already created. This is the type we 3423 // have to create: 3424 // struct __tgt_offload_entry{ 3425 // void *addr; // Pointer to the offload entry info. 3426 // // (function or global) 3427 // char *name; // Name of the function or global. 3428 // size_t size; // Size of the entry info (0 if it a function). 3429 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3430 // int32_t reserved; // Reserved, to use by the runtime library. 3431 // }; 3432 if (TgtOffloadEntryQTy.isNull()) { 3433 ASTContext &C = CGM.getContext(); 3434 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3435 RD->startDefinition(); 3436 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3437 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3438 addFieldToRecordDecl(C, RD, C.getSizeType()); 3439 addFieldToRecordDecl( 3440 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3441 addFieldToRecordDecl( 3442 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3443 RD->completeDefinition(); 3444 RD->addAttr(PackedAttr::CreateImplicit(C)); 3445 TgtOffloadEntryQTy = C.getRecordType(RD); 3446 } 3447 return TgtOffloadEntryQTy; 3448 } 3449 3450 namespace { 3451 struct PrivateHelpersTy { 3452 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3453 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3454 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3455 PrivateElemInit(PrivateElemInit) {} 3456 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3457 const Expr *OriginalRef = nullptr; 3458 const VarDecl *Original = nullptr; 3459 const VarDecl *PrivateCopy = nullptr; 3460 const VarDecl *PrivateElemInit = nullptr; 3461 bool isLocalPrivate() const { 3462 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3463 } 3464 }; 3465 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3466 } // anonymous namespace 3467 3468 static bool isAllocatableDecl(const VarDecl *VD) { 3469 const VarDecl *CVD = VD->getCanonicalDecl(); 3470 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3471 return false; 3472 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3473 // Use the default allocation. 3474 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3475 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3476 !AA->getAllocator()); 3477 } 3478 3479 static RecordDecl * 3480 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3481 if (!Privates.empty()) { 3482 ASTContext &C = CGM.getContext(); 3483 // Build struct .kmp_privates_t. { 3484 // /* private vars */ 3485 // }; 3486 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3487 RD->startDefinition(); 3488 for (const auto &Pair : Privates) { 3489 const VarDecl *VD = Pair.second.Original; 3490 QualType Type = VD->getType().getNonReferenceType(); 3491 // If the private variable is a local variable with lvalue ref type, 3492 // allocate the pointer instead of the pointee type. 3493 if (Pair.second.isLocalPrivate()) { 3494 if (VD->getType()->isLValueReferenceType()) 3495 Type = C.getPointerType(Type); 3496 if (isAllocatableDecl(VD)) 3497 Type = C.getPointerType(Type); 3498 } 3499 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3500 if (VD->hasAttrs()) { 3501 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3502 E(VD->getAttrs().end()); 3503 I != E; ++I) 3504 FD->addAttr(*I); 3505 } 3506 } 3507 RD->completeDefinition(); 3508 return RD; 3509 } 3510 return nullptr; 3511 } 3512 3513 static RecordDecl * 3514 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3515 QualType KmpInt32Ty, 3516 QualType KmpRoutineEntryPointerQTy) { 3517 ASTContext &C = CGM.getContext(); 3518 // Build struct kmp_task_t { 3519 // void * shareds; 3520 // kmp_routine_entry_t routine; 3521 // kmp_int32 part_id; 3522 // kmp_cmplrdata_t data1; 3523 // kmp_cmplrdata_t data2; 3524 // For taskloops additional fields: 3525 // kmp_uint64 lb; 3526 // kmp_uint64 ub; 3527 // kmp_int64 st; 3528 // kmp_int32 liter; 3529 // void * reductions; 3530 // }; 3531 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3532 UD->startDefinition(); 3533 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3534 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3535 UD->completeDefinition(); 3536 QualType KmpCmplrdataTy = C.getRecordType(UD); 3537 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3538 RD->startDefinition(); 3539 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3540 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3541 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3542 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3543 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3544 if (isOpenMPTaskLoopDirective(Kind)) { 3545 QualType KmpUInt64Ty = 3546 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3547 QualType KmpInt64Ty = 3548 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3549 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3550 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3551 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3552 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3553 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3554 } 3555 RD->completeDefinition(); 3556 return RD; 3557 } 3558 3559 static RecordDecl * 3560 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3561 ArrayRef<PrivateDataTy> Privates) { 3562 ASTContext &C = CGM.getContext(); 3563 // Build struct kmp_task_t_with_privates { 3564 // kmp_task_t task_data; 3565 // .kmp_privates_t. privates; 3566 // }; 3567 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3568 RD->startDefinition(); 3569 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3570 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3571 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3572 RD->completeDefinition(); 3573 return RD; 3574 } 3575 3576 /// Emit a proxy function which accepts kmp_task_t as the second 3577 /// argument. 3578 /// \code 3579 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3580 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3581 /// For taskloops: 3582 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3583 /// tt->reductions, tt->shareds); 3584 /// return 0; 3585 /// } 3586 /// \endcode 3587 static llvm::Function * 3588 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3589 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3590 QualType KmpTaskTWithPrivatesPtrQTy, 3591 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3592 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3593 llvm::Value *TaskPrivatesMap) { 3594 ASTContext &C = CGM.getContext(); 3595 FunctionArgList Args; 3596 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3597 ImplicitParamDecl::Other); 3598 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3599 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3600 ImplicitParamDecl::Other); 3601 Args.push_back(&GtidArg); 3602 Args.push_back(&TaskTypeArg); 3603 const auto &TaskEntryFnInfo = 3604 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3605 llvm::FunctionType *TaskEntryTy = 3606 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3607 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3608 auto *TaskEntry = llvm::Function::Create( 3609 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3610 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3611 TaskEntry->setDoesNotRecurse(); 3612 CodeGenFunction CGF(CGM); 3613 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3614 Loc, Loc); 3615 3616 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3617 // tt, 3618 // For taskloops: 3619 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3620 // tt->task_data.shareds); 3621 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3622 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3623 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3624 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3625 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3626 const auto *KmpTaskTWithPrivatesQTyRD = 3627 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3628 LValue Base = 3629 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3630 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3631 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3632 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3633 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3634 3635 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3636 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3637 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3638 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3639 CGF.ConvertTypeForMem(SharedsPtrTy)); 3640 3641 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3642 llvm::Value *PrivatesParam; 3643 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3644 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3645 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3646 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3647 } else { 3648 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3649 } 3650 3651 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3652 TaskPrivatesMap, 3653 CGF.Builder 3654 .CreatePointerBitCastOrAddrSpaceCast( 3655 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3656 .getPointer()}; 3657 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3658 std::end(CommonArgs)); 3659 if (isOpenMPTaskLoopDirective(Kind)) { 3660 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3661 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3662 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3663 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3664 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3665 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3666 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3667 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3668 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3669 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3670 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3671 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3672 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3673 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3674 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3675 CallArgs.push_back(LBParam); 3676 CallArgs.push_back(UBParam); 3677 CallArgs.push_back(StParam); 3678 CallArgs.push_back(LIParam); 3679 CallArgs.push_back(RParam); 3680 } 3681 CallArgs.push_back(SharedsParam); 3682 3683 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3684 CallArgs); 3685 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3686 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3687 CGF.FinishFunction(); 3688 return TaskEntry; 3689 } 3690 3691 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3692 SourceLocation Loc, 3693 QualType KmpInt32Ty, 3694 QualType KmpTaskTWithPrivatesPtrQTy, 3695 QualType KmpTaskTWithPrivatesQTy) { 3696 ASTContext &C = CGM.getContext(); 3697 FunctionArgList Args; 3698 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3699 ImplicitParamDecl::Other); 3700 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3701 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3702 ImplicitParamDecl::Other); 3703 Args.push_back(&GtidArg); 3704 Args.push_back(&TaskTypeArg); 3705 const auto &DestructorFnInfo = 3706 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3707 llvm::FunctionType *DestructorFnTy = 3708 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3709 std::string Name = 3710 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3711 auto *DestructorFn = 3712 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3713 Name, &CGM.getModule()); 3714 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3715 DestructorFnInfo); 3716 DestructorFn->setDoesNotRecurse(); 3717 CodeGenFunction CGF(CGM); 3718 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3719 Args, Loc, Loc); 3720 3721 LValue Base = CGF.EmitLoadOfPointerLValue( 3722 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3723 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3724 const auto *KmpTaskTWithPrivatesQTyRD = 3725 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3726 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3727 Base = CGF.EmitLValueForField(Base, *FI); 3728 for (const auto *Field : 3729 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3730 if (QualType::DestructionKind DtorKind = 3731 Field->getType().isDestructedType()) { 3732 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3733 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3734 } 3735 } 3736 CGF.FinishFunction(); 3737 return DestructorFn; 3738 } 3739 3740 /// Emit a privates mapping function for correct handling of private and 3741 /// firstprivate variables. 3742 /// \code 3743 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3744 /// **noalias priv1,..., <tyn> **noalias privn) { 3745 /// *priv1 = &.privates.priv1; 3746 /// ...; 3747 /// *privn = &.privates.privn; 3748 /// } 3749 /// \endcode 3750 static llvm::Value * 3751 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3752 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3753 ArrayRef<PrivateDataTy> Privates) { 3754 ASTContext &C = CGM.getContext(); 3755 FunctionArgList Args; 3756 ImplicitParamDecl TaskPrivatesArg( 3757 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3758 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3759 ImplicitParamDecl::Other); 3760 Args.push_back(&TaskPrivatesArg); 3761 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3762 unsigned Counter = 1; 3763 for (const Expr *E : Data.PrivateVars) { 3764 Args.push_back(ImplicitParamDecl::Create( 3765 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3766 C.getPointerType(C.getPointerType(E->getType())) 3767 .withConst() 3768 .withRestrict(), 3769 ImplicitParamDecl::Other)); 3770 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3771 PrivateVarsPos[VD] = Counter; 3772 ++Counter; 3773 } 3774 for (const Expr *E : Data.FirstprivateVars) { 3775 Args.push_back(ImplicitParamDecl::Create( 3776 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3777 C.getPointerType(C.getPointerType(E->getType())) 3778 .withConst() 3779 .withRestrict(), 3780 ImplicitParamDecl::Other)); 3781 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3782 PrivateVarsPos[VD] = Counter; 3783 ++Counter; 3784 } 3785 for (const Expr *E : Data.LastprivateVars) { 3786 Args.push_back(ImplicitParamDecl::Create( 3787 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3788 C.getPointerType(C.getPointerType(E->getType())) 3789 .withConst() 3790 .withRestrict(), 3791 ImplicitParamDecl::Other)); 3792 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3793 PrivateVarsPos[VD] = Counter; 3794 ++Counter; 3795 } 3796 for (const VarDecl *VD : Data.PrivateLocals) { 3797 QualType Ty = VD->getType().getNonReferenceType(); 3798 if (VD->getType()->isLValueReferenceType()) 3799 Ty = C.getPointerType(Ty); 3800 if (isAllocatableDecl(VD)) 3801 Ty = C.getPointerType(Ty); 3802 Args.push_back(ImplicitParamDecl::Create( 3803 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3804 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3805 ImplicitParamDecl::Other)); 3806 PrivateVarsPos[VD] = Counter; 3807 ++Counter; 3808 } 3809 const auto &TaskPrivatesMapFnInfo = 3810 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3811 llvm::FunctionType *TaskPrivatesMapTy = 3812 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3813 std::string Name = 3814 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3815 auto *TaskPrivatesMap = llvm::Function::Create( 3816 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3817 &CGM.getModule()); 3818 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3819 TaskPrivatesMapFnInfo); 3820 if (CGM.getLangOpts().Optimize) { 3821 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3822 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3823 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3824 } 3825 CodeGenFunction CGF(CGM); 3826 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3827 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3828 3829 // *privi = &.privates.privi; 3830 LValue Base = CGF.EmitLoadOfPointerLValue( 3831 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3832 TaskPrivatesArg.getType()->castAs<PointerType>()); 3833 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3834 Counter = 0; 3835 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3836 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3837 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3838 LValue RefLVal = 3839 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3840 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3841 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3842 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3843 ++Counter; 3844 } 3845 CGF.FinishFunction(); 3846 return TaskPrivatesMap; 3847 } 3848 3849 /// Emit initialization for private variables in task-based directives. 3850 static void emitPrivatesInit(CodeGenFunction &CGF, 3851 const OMPExecutableDirective &D, 3852 Address KmpTaskSharedsPtr, LValue TDBase, 3853 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3854 QualType SharedsTy, QualType SharedsPtrTy, 3855 const OMPTaskDataTy &Data, 3856 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3857 ASTContext &C = CGF.getContext(); 3858 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3859 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3860 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3861 ? OMPD_taskloop 3862 : OMPD_task; 3863 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3864 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3865 LValue SrcBase; 3866 bool IsTargetTask = 3867 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3868 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3869 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3870 // PointersArray, SizesArray, and MappersArray. The original variables for 3871 // these arrays are not captured and we get their addresses explicitly. 3872 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3873 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3874 SrcBase = CGF.MakeAddrLValue( 3875 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3876 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3877 SharedsTy); 3878 } 3879 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3880 for (const PrivateDataTy &Pair : Privates) { 3881 // Do not initialize private locals. 3882 if (Pair.second.isLocalPrivate()) { 3883 ++FI; 3884 continue; 3885 } 3886 const VarDecl *VD = Pair.second.PrivateCopy; 3887 const Expr *Init = VD->getAnyInitializer(); 3888 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3889 !CGF.isTrivialInitializer(Init)))) { 3890 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3891 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3892 const VarDecl *OriginalVD = Pair.second.Original; 3893 // Check if the variable is the target-based BasePointersArray, 3894 // PointersArray, SizesArray, or MappersArray. 3895 LValue SharedRefLValue; 3896 QualType Type = PrivateLValue.getType(); 3897 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3898 if (IsTargetTask && !SharedField) { 3899 assert(isa<ImplicitParamDecl>(OriginalVD) && 3900 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3901 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3902 ->getNumParams() == 0 && 3903 isa<TranslationUnitDecl>( 3904 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3905 ->getDeclContext()) && 3906 "Expected artificial target data variable."); 3907 SharedRefLValue = 3908 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3909 } else if (ForDup) { 3910 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3911 SharedRefLValue = CGF.MakeAddrLValue( 3912 Address(SharedRefLValue.getPointer(CGF), 3913 C.getDeclAlign(OriginalVD)), 3914 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3915 SharedRefLValue.getTBAAInfo()); 3916 } else if (CGF.LambdaCaptureFields.count( 3917 Pair.second.Original->getCanonicalDecl()) > 0 || 3918 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3919 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3920 } else { 3921 // Processing for implicitly captured variables. 3922 InlinedOpenMPRegionRAII Region( 3923 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3924 /*HasCancel=*/false, /*NoInheritance=*/true); 3925 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3926 } 3927 if (Type->isArrayType()) { 3928 // Initialize firstprivate array. 3929 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3930 // Perform simple memcpy. 3931 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3932 } else { 3933 // Initialize firstprivate array using element-by-element 3934 // initialization. 3935 CGF.EmitOMPAggregateAssign( 3936 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3937 Type, 3938 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3939 Address SrcElement) { 3940 // Clean up any temporaries needed by the initialization. 3941 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3942 InitScope.addPrivate( 3943 Elem, [SrcElement]() -> Address { return SrcElement; }); 3944 (void)InitScope.Privatize(); 3945 // Emit initialization for single element. 3946 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3947 CGF, &CapturesInfo); 3948 CGF.EmitAnyExprToMem(Init, DestElement, 3949 Init->getType().getQualifiers(), 3950 /*IsInitializer=*/false); 3951 }); 3952 } 3953 } else { 3954 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3955 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3956 return SharedRefLValue.getAddress(CGF); 3957 }); 3958 (void)InitScope.Privatize(); 3959 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3960 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3961 /*capturedByInit=*/false); 3962 } 3963 } else { 3964 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3965 } 3966 } 3967 ++FI; 3968 } 3969 } 3970 3971 /// Check if duplication function is required for taskloops. 3972 static bool checkInitIsRequired(CodeGenFunction &CGF, 3973 ArrayRef<PrivateDataTy> Privates) { 3974 bool InitRequired = false; 3975 for (const PrivateDataTy &Pair : Privates) { 3976 if (Pair.second.isLocalPrivate()) 3977 continue; 3978 const VarDecl *VD = Pair.second.PrivateCopy; 3979 const Expr *Init = VD->getAnyInitializer(); 3980 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3981 !CGF.isTrivialInitializer(Init)); 3982 if (InitRequired) 3983 break; 3984 } 3985 return InitRequired; 3986 } 3987 3988 3989 /// Emit task_dup function (for initialization of 3990 /// private/firstprivate/lastprivate vars and last_iter flag) 3991 /// \code 3992 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3993 /// lastpriv) { 3994 /// // setup lastprivate flag 3995 /// task_dst->last = lastpriv; 3996 /// // could be constructor calls here... 3997 /// } 3998 /// \endcode 3999 static llvm::Value * 4000 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4001 const OMPExecutableDirective &D, 4002 QualType KmpTaskTWithPrivatesPtrQTy, 4003 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4004 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4005 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4006 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4007 ASTContext &C = CGM.getContext(); 4008 FunctionArgList Args; 4009 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4010 KmpTaskTWithPrivatesPtrQTy, 4011 ImplicitParamDecl::Other); 4012 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4013 KmpTaskTWithPrivatesPtrQTy, 4014 ImplicitParamDecl::Other); 4015 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4016 ImplicitParamDecl::Other); 4017 Args.push_back(&DstArg); 4018 Args.push_back(&SrcArg); 4019 Args.push_back(&LastprivArg); 4020 const auto &TaskDupFnInfo = 4021 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4022 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4023 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4024 auto *TaskDup = llvm::Function::Create( 4025 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4026 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4027 TaskDup->setDoesNotRecurse(); 4028 CodeGenFunction CGF(CGM); 4029 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4030 Loc); 4031 4032 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4033 CGF.GetAddrOfLocalVar(&DstArg), 4034 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4035 // task_dst->liter = lastpriv; 4036 if (WithLastIter) { 4037 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4038 LValue Base = CGF.EmitLValueForField( 4039 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4040 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4041 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4042 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4043 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4044 } 4045 4046 // Emit initial values for private copies (if any). 4047 assert(!Privates.empty()); 4048 Address KmpTaskSharedsPtr = Address::invalid(); 4049 if (!Data.FirstprivateVars.empty()) { 4050 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4051 CGF.GetAddrOfLocalVar(&SrcArg), 4052 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4053 LValue Base = CGF.EmitLValueForField( 4054 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4055 KmpTaskSharedsPtr = Address( 4056 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4057 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4058 KmpTaskTShareds)), 4059 Loc), 4060 CGM.getNaturalTypeAlignment(SharedsTy)); 4061 } 4062 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4063 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4064 CGF.FinishFunction(); 4065 return TaskDup; 4066 } 4067 4068 /// Checks if destructor function is required to be generated. 4069 /// \return true if cleanups are required, false otherwise. 4070 static bool 4071 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4072 ArrayRef<PrivateDataTy> Privates) { 4073 for (const PrivateDataTy &P : Privates) { 4074 if (P.second.isLocalPrivate()) 4075 continue; 4076 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4077 if (Ty.isDestructedType()) 4078 return true; 4079 } 4080 return false; 4081 } 4082 4083 namespace { 4084 /// Loop generator for OpenMP iterator expression. 4085 class OMPIteratorGeneratorScope final 4086 : public CodeGenFunction::OMPPrivateScope { 4087 CodeGenFunction &CGF; 4088 const OMPIteratorExpr *E = nullptr; 4089 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4090 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4091 OMPIteratorGeneratorScope() = delete; 4092 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4093 4094 public: 4095 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4096 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4097 if (!E) 4098 return; 4099 SmallVector<llvm::Value *, 4> Uppers; 4100 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4101 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4102 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4103 addPrivate(VD, [&CGF, VD]() { 4104 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4105 }); 4106 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4107 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4108 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4109 "counter.addr"); 4110 }); 4111 } 4112 Privatize(); 4113 4114 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4115 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4116 LValue CLVal = 4117 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4118 HelperData.CounterVD->getType()); 4119 // Counter = 0; 4120 CGF.EmitStoreOfScalar( 4121 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4122 CLVal); 4123 CodeGenFunction::JumpDest &ContDest = 4124 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4125 CodeGenFunction::JumpDest &ExitDest = 4126 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4127 // N = <number-of_iterations>; 4128 llvm::Value *N = Uppers[I]; 4129 // cont: 4130 // if (Counter < N) goto body; else goto exit; 4131 CGF.EmitBlock(ContDest.getBlock()); 4132 auto *CVal = 4133 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4134 llvm::Value *Cmp = 4135 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4136 ? CGF.Builder.CreateICmpSLT(CVal, N) 4137 : CGF.Builder.CreateICmpULT(CVal, N); 4138 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4139 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4140 // body: 4141 CGF.EmitBlock(BodyBB); 4142 // Iteri = Begini + Counter * Stepi; 4143 CGF.EmitIgnoredExpr(HelperData.Update); 4144 } 4145 } 4146 ~OMPIteratorGeneratorScope() { 4147 if (!E) 4148 return; 4149 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4150 // Counter = Counter + 1; 4151 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4152 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4153 // goto cont; 4154 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4155 // exit: 4156 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4157 } 4158 } 4159 }; 4160 } // namespace 4161 4162 static std::pair<llvm::Value *, llvm::Value *> 4163 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4164 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4165 llvm::Value *Addr; 4166 if (OASE) { 4167 const Expr *Base = OASE->getBase(); 4168 Addr = CGF.EmitScalarExpr(Base); 4169 } else { 4170 Addr = CGF.EmitLValue(E).getPointer(CGF); 4171 } 4172 llvm::Value *SizeVal; 4173 QualType Ty = E->getType(); 4174 if (OASE) { 4175 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4176 for (const Expr *SE : OASE->getDimensions()) { 4177 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4178 Sz = CGF.EmitScalarConversion( 4179 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4180 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4181 } 4182 } else if (const auto *ASE = 4183 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4184 LValue UpAddrLVal = 4185 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4186 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4187 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4188 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4189 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4190 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4191 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4192 } else { 4193 SizeVal = CGF.getTypeSize(Ty); 4194 } 4195 return std::make_pair(Addr, SizeVal); 4196 } 4197 4198 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4199 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4200 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4201 if (KmpTaskAffinityInfoTy.isNull()) { 4202 RecordDecl *KmpAffinityInfoRD = 4203 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4204 KmpAffinityInfoRD->startDefinition(); 4205 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4206 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4207 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4208 KmpAffinityInfoRD->completeDefinition(); 4209 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4210 } 4211 } 4212 4213 CGOpenMPRuntime::TaskResultTy 4214 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4215 const OMPExecutableDirective &D, 4216 llvm::Function *TaskFunction, QualType SharedsTy, 4217 Address Shareds, const OMPTaskDataTy &Data) { 4218 ASTContext &C = CGM.getContext(); 4219 llvm::SmallVector<PrivateDataTy, 4> Privates; 4220 // Aggregate privates and sort them by the alignment. 4221 const auto *I = Data.PrivateCopies.begin(); 4222 for (const Expr *E : Data.PrivateVars) { 4223 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4224 Privates.emplace_back( 4225 C.getDeclAlign(VD), 4226 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4227 /*PrivateElemInit=*/nullptr)); 4228 ++I; 4229 } 4230 I = Data.FirstprivateCopies.begin(); 4231 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4232 for (const Expr *E : Data.FirstprivateVars) { 4233 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4234 Privates.emplace_back( 4235 C.getDeclAlign(VD), 4236 PrivateHelpersTy( 4237 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4238 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4239 ++I; 4240 ++IElemInitRef; 4241 } 4242 I = Data.LastprivateCopies.begin(); 4243 for (const Expr *E : Data.LastprivateVars) { 4244 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4245 Privates.emplace_back( 4246 C.getDeclAlign(VD), 4247 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4248 /*PrivateElemInit=*/nullptr)); 4249 ++I; 4250 } 4251 for (const VarDecl *VD : Data.PrivateLocals) { 4252 if (isAllocatableDecl(VD)) 4253 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4254 else 4255 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4256 } 4257 llvm::stable_sort(Privates, 4258 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4259 return L.first > R.first; 4260 }); 4261 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4262 // Build type kmp_routine_entry_t (if not built yet). 4263 emitKmpRoutineEntryT(KmpInt32Ty); 4264 // Build type kmp_task_t (if not built yet). 4265 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4266 if (SavedKmpTaskloopTQTy.isNull()) { 4267 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4268 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4269 } 4270 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4271 } else { 4272 assert((D.getDirectiveKind() == OMPD_task || 4273 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4274 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4275 "Expected taskloop, task or target directive"); 4276 if (SavedKmpTaskTQTy.isNull()) { 4277 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4278 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4279 } 4280 KmpTaskTQTy = SavedKmpTaskTQTy; 4281 } 4282 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4283 // Build particular struct kmp_task_t for the given task. 4284 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4285 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4286 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4287 QualType KmpTaskTWithPrivatesPtrQTy = 4288 C.getPointerType(KmpTaskTWithPrivatesQTy); 4289 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4290 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4291 KmpTaskTWithPrivatesTy->getPointerTo(); 4292 llvm::Value *KmpTaskTWithPrivatesTySize = 4293 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4294 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4295 4296 // Emit initial values for private copies (if any). 4297 llvm::Value *TaskPrivatesMap = nullptr; 4298 llvm::Type *TaskPrivatesMapTy = 4299 std::next(TaskFunction->arg_begin(), 3)->getType(); 4300 if (!Privates.empty()) { 4301 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4302 TaskPrivatesMap = 4303 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4304 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4305 TaskPrivatesMap, TaskPrivatesMapTy); 4306 } else { 4307 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4308 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4309 } 4310 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4311 // kmp_task_t *tt); 4312 llvm::Function *TaskEntry = emitProxyTaskFunction( 4313 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4314 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4315 TaskPrivatesMap); 4316 4317 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4318 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4319 // kmp_routine_entry_t *task_entry); 4320 // Task flags. Format is taken from 4321 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4322 // description of kmp_tasking_flags struct. 4323 enum { 4324 TiedFlag = 0x1, 4325 FinalFlag = 0x2, 4326 DestructorsFlag = 0x8, 4327 PriorityFlag = 0x20, 4328 DetachableFlag = 0x40, 4329 }; 4330 unsigned Flags = Data.Tied ? TiedFlag : 0; 4331 bool NeedsCleanup = false; 4332 if (!Privates.empty()) { 4333 NeedsCleanup = 4334 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4335 if (NeedsCleanup) 4336 Flags = Flags | DestructorsFlag; 4337 } 4338 if (Data.Priority.getInt()) 4339 Flags = Flags | PriorityFlag; 4340 if (D.hasClausesOfKind<OMPDetachClause>()) 4341 Flags = Flags | DetachableFlag; 4342 llvm::Value *TaskFlags = 4343 Data.Final.getPointer() 4344 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4345 CGF.Builder.getInt32(FinalFlag), 4346 CGF.Builder.getInt32(/*C=*/0)) 4347 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4348 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4349 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4350 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4351 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4352 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4353 TaskEntry, KmpRoutineEntryPtrTy)}; 4354 llvm::Value *NewTask; 4355 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4356 // Check if we have any device clause associated with the directive. 4357 const Expr *Device = nullptr; 4358 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4359 Device = C->getDevice(); 4360 // Emit device ID if any otherwise use default value. 4361 llvm::Value *DeviceID; 4362 if (Device) 4363 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4364 CGF.Int64Ty, /*isSigned=*/true); 4365 else 4366 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4367 AllocArgs.push_back(DeviceID); 4368 NewTask = CGF.EmitRuntimeCall( 4369 OMPBuilder.getOrCreateRuntimeFunction( 4370 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4371 AllocArgs); 4372 } else { 4373 NewTask = 4374 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4375 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4376 AllocArgs); 4377 } 4378 // Emit detach clause initialization. 4379 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4380 // task_descriptor); 4381 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4382 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4383 LValue EvtLVal = CGF.EmitLValue(Evt); 4384 4385 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4386 // int gtid, kmp_task_t *task); 4387 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4388 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4389 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4390 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4391 OMPBuilder.getOrCreateRuntimeFunction( 4392 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4393 {Loc, Tid, NewTask}); 4394 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4395 Evt->getExprLoc()); 4396 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4397 } 4398 // Process affinity clauses. 4399 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4400 // Process list of affinity data. 4401 ASTContext &C = CGM.getContext(); 4402 Address AffinitiesArray = Address::invalid(); 4403 // Calculate number of elements to form the array of affinity data. 4404 llvm::Value *NumOfElements = nullptr; 4405 unsigned NumAffinities = 0; 4406 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4407 if (const Expr *Modifier = C->getModifier()) { 4408 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4409 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4410 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4411 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4412 NumOfElements = 4413 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4414 } 4415 } else { 4416 NumAffinities += C->varlist_size(); 4417 } 4418 } 4419 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4420 // Fields ids in kmp_task_affinity_info record. 4421 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4422 4423 QualType KmpTaskAffinityInfoArrayTy; 4424 if (NumOfElements) { 4425 NumOfElements = CGF.Builder.CreateNUWAdd( 4426 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4427 auto *OVE = new (C) OpaqueValueExpr( 4428 Loc, 4429 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4430 VK_PRValue); 4431 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4432 RValue::get(NumOfElements)); 4433 KmpTaskAffinityInfoArrayTy = 4434 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4435 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4436 // Properly emit variable-sized array. 4437 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4438 ImplicitParamDecl::Other); 4439 CGF.EmitVarDecl(*PD); 4440 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4441 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4442 /*isSigned=*/false); 4443 } else { 4444 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4445 KmpTaskAffinityInfoTy, 4446 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4447 ArrayType::Normal, /*IndexTypeQuals=*/0); 4448 AffinitiesArray = 4449 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4450 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4451 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4452 /*isSigned=*/false); 4453 } 4454 4455 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4456 // Fill array by elements without iterators. 4457 unsigned Pos = 0; 4458 bool HasIterator = false; 4459 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4460 if (C->getModifier()) { 4461 HasIterator = true; 4462 continue; 4463 } 4464 for (const Expr *E : C->varlists()) { 4465 llvm::Value *Addr; 4466 llvm::Value *Size; 4467 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4468 LValue Base = 4469 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4470 KmpTaskAffinityInfoTy); 4471 // affs[i].base_addr = &<Affinities[i].second>; 4472 LValue BaseAddrLVal = CGF.EmitLValueForField( 4473 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4474 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4475 BaseAddrLVal); 4476 // affs[i].len = sizeof(<Affinities[i].second>); 4477 LValue LenLVal = CGF.EmitLValueForField( 4478 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4479 CGF.EmitStoreOfScalar(Size, LenLVal); 4480 ++Pos; 4481 } 4482 } 4483 LValue PosLVal; 4484 if (HasIterator) { 4485 PosLVal = CGF.MakeAddrLValue( 4486 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4487 C.getSizeType()); 4488 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4489 } 4490 // Process elements with iterators. 4491 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4492 const Expr *Modifier = C->getModifier(); 4493 if (!Modifier) 4494 continue; 4495 OMPIteratorGeneratorScope IteratorScope( 4496 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4497 for (const Expr *E : C->varlists()) { 4498 llvm::Value *Addr; 4499 llvm::Value *Size; 4500 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4501 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4502 LValue Base = CGF.MakeAddrLValue( 4503 Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(), 4504 AffinitiesArray.getPointer(), Idx), 4505 AffinitiesArray.getAlignment()), 4506 KmpTaskAffinityInfoTy); 4507 // affs[i].base_addr = &<Affinities[i].second>; 4508 LValue BaseAddrLVal = CGF.EmitLValueForField( 4509 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4510 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4511 BaseAddrLVal); 4512 // affs[i].len = sizeof(<Affinities[i].second>); 4513 LValue LenLVal = CGF.EmitLValueForField( 4514 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4515 CGF.EmitStoreOfScalar(Size, LenLVal); 4516 Idx = CGF.Builder.CreateNUWAdd( 4517 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4518 CGF.EmitStoreOfScalar(Idx, PosLVal); 4519 } 4520 } 4521 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4522 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4523 // naffins, kmp_task_affinity_info_t *affin_list); 4524 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4525 llvm::Value *GTid = getThreadID(CGF, Loc); 4526 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4527 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4528 // FIXME: Emit the function and ignore its result for now unless the 4529 // runtime function is properly implemented. 4530 (void)CGF.EmitRuntimeCall( 4531 OMPBuilder.getOrCreateRuntimeFunction( 4532 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4533 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4534 } 4535 llvm::Value *NewTaskNewTaskTTy = 4536 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4537 NewTask, KmpTaskTWithPrivatesPtrTy); 4538 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4539 KmpTaskTWithPrivatesQTy); 4540 LValue TDBase = 4541 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4542 // Fill the data in the resulting kmp_task_t record. 4543 // Copy shareds if there are any. 4544 Address KmpTaskSharedsPtr = Address::invalid(); 4545 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4546 KmpTaskSharedsPtr = 4547 Address(CGF.EmitLoadOfScalar( 4548 CGF.EmitLValueForField( 4549 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4550 KmpTaskTShareds)), 4551 Loc), 4552 CGM.getNaturalTypeAlignment(SharedsTy)); 4553 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4554 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4555 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4556 } 4557 // Emit initial values for private copies (if any). 4558 TaskResultTy Result; 4559 if (!Privates.empty()) { 4560 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4561 SharedsTy, SharedsPtrTy, Data, Privates, 4562 /*ForDup=*/false); 4563 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4564 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4565 Result.TaskDupFn = emitTaskDupFunction( 4566 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4567 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4568 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4569 } 4570 } 4571 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4572 enum { Priority = 0, Destructors = 1 }; 4573 // Provide pointer to function with destructors for privates. 4574 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4575 const RecordDecl *KmpCmplrdataUD = 4576 (*FI)->getType()->getAsUnionType()->getDecl(); 4577 if (NeedsCleanup) { 4578 llvm::Value *DestructorFn = emitDestructorsFunction( 4579 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4580 KmpTaskTWithPrivatesQTy); 4581 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4582 LValue DestructorsLV = CGF.EmitLValueForField( 4583 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4584 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4585 DestructorFn, KmpRoutineEntryPtrTy), 4586 DestructorsLV); 4587 } 4588 // Set priority. 4589 if (Data.Priority.getInt()) { 4590 LValue Data2LV = CGF.EmitLValueForField( 4591 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4592 LValue PriorityLV = CGF.EmitLValueForField( 4593 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4594 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4595 } 4596 Result.NewTask = NewTask; 4597 Result.TaskEntry = TaskEntry; 4598 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4599 Result.TDBase = TDBase; 4600 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4601 return Result; 4602 } 4603 4604 namespace { 4605 /// Dependence kind for RTL. 4606 enum RTLDependenceKindTy { 4607 DepIn = 0x01, 4608 DepInOut = 0x3, 4609 DepMutexInOutSet = 0x4 4610 }; 4611 /// Fields ids in kmp_depend_info record. 4612 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4613 } // namespace 4614 4615 /// Translates internal dependency kind into the runtime kind. 4616 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4617 RTLDependenceKindTy DepKind; 4618 switch (K) { 4619 case OMPC_DEPEND_in: 4620 DepKind = DepIn; 4621 break; 4622 // Out and InOut dependencies must use the same code. 4623 case OMPC_DEPEND_out: 4624 case OMPC_DEPEND_inout: 4625 DepKind = DepInOut; 4626 break; 4627 case OMPC_DEPEND_mutexinoutset: 4628 DepKind = DepMutexInOutSet; 4629 break; 4630 case OMPC_DEPEND_source: 4631 case OMPC_DEPEND_sink: 4632 case OMPC_DEPEND_depobj: 4633 case OMPC_DEPEND_unknown: 4634 llvm_unreachable("Unknown task dependence type"); 4635 } 4636 return DepKind; 4637 } 4638 4639 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4640 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4641 QualType &FlagsTy) { 4642 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4643 if (KmpDependInfoTy.isNull()) { 4644 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4645 KmpDependInfoRD->startDefinition(); 4646 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4647 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4648 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4649 KmpDependInfoRD->completeDefinition(); 4650 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4651 } 4652 } 4653 4654 std::pair<llvm::Value *, LValue> 4655 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4656 SourceLocation Loc) { 4657 ASTContext &C = CGM.getContext(); 4658 QualType FlagsTy; 4659 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4660 RecordDecl *KmpDependInfoRD = 4661 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4662 LValue Base = CGF.EmitLoadOfPointerLValue( 4663 DepobjLVal.getAddress(CGF), 4664 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4665 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4666 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4667 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4668 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4669 Base.getTBAAInfo()); 4670 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4671 Addr.getElementType(), Addr.getPointer(), 4672 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4673 LValue NumDepsBase = CGF.MakeAddrLValue( 4674 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4675 Base.getBaseInfo(), Base.getTBAAInfo()); 4676 // NumDeps = deps[i].base_addr; 4677 LValue BaseAddrLVal = CGF.EmitLValueForField( 4678 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4679 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4680 return std::make_pair(NumDeps, Base); 4681 } 4682 4683 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4684 llvm::PointerUnion<unsigned *, LValue *> Pos, 4685 const OMPTaskDataTy::DependData &Data, 4686 Address DependenciesArray) { 4687 CodeGenModule &CGM = CGF.CGM; 4688 ASTContext &C = CGM.getContext(); 4689 QualType FlagsTy; 4690 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4691 RecordDecl *KmpDependInfoRD = 4692 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4693 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4694 4695 OMPIteratorGeneratorScope IteratorScope( 4696 CGF, cast_or_null<OMPIteratorExpr>( 4697 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4698 : nullptr)); 4699 for (const Expr *E : Data.DepExprs) { 4700 llvm::Value *Addr; 4701 llvm::Value *Size; 4702 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4703 LValue Base; 4704 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4705 Base = CGF.MakeAddrLValue( 4706 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4707 } else { 4708 LValue &PosLVal = *Pos.get<LValue *>(); 4709 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4710 Base = CGF.MakeAddrLValue( 4711 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), 4712 DependenciesArray.getPointer(), Idx), 4713 DependenciesArray.getAlignment()), 4714 KmpDependInfoTy); 4715 } 4716 // deps[i].base_addr = &<Dependencies[i].second>; 4717 LValue BaseAddrLVal = CGF.EmitLValueForField( 4718 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4719 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4720 BaseAddrLVal); 4721 // deps[i].len = sizeof(<Dependencies[i].second>); 4722 LValue LenLVal = CGF.EmitLValueForField( 4723 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4724 CGF.EmitStoreOfScalar(Size, LenLVal); 4725 // deps[i].flags = <Dependencies[i].first>; 4726 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4727 LValue FlagsLVal = CGF.EmitLValueForField( 4728 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4729 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4730 FlagsLVal); 4731 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4732 ++(*P); 4733 } else { 4734 LValue &PosLVal = *Pos.get<LValue *>(); 4735 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4736 Idx = CGF.Builder.CreateNUWAdd(Idx, 4737 llvm::ConstantInt::get(Idx->getType(), 1)); 4738 CGF.EmitStoreOfScalar(Idx, PosLVal); 4739 } 4740 } 4741 } 4742 4743 static SmallVector<llvm::Value *, 4> 4744 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4745 const OMPTaskDataTy::DependData &Data) { 4746 assert(Data.DepKind == OMPC_DEPEND_depobj && 4747 "Expected depobj dependecy kind."); 4748 SmallVector<llvm::Value *, 4> Sizes; 4749 SmallVector<LValue, 4> SizeLVals; 4750 ASTContext &C = CGF.getContext(); 4751 QualType FlagsTy; 4752 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4753 RecordDecl *KmpDependInfoRD = 4754 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4755 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4756 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4757 { 4758 OMPIteratorGeneratorScope IteratorScope( 4759 CGF, cast_or_null<OMPIteratorExpr>( 4760 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4761 : nullptr)); 4762 for (const Expr *E : Data.DepExprs) { 4763 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4764 LValue Base = CGF.EmitLoadOfPointerLValue( 4765 DepobjLVal.getAddress(CGF), 4766 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4767 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4768 Base.getAddress(CGF), KmpDependInfoPtrT); 4769 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4770 Base.getTBAAInfo()); 4771 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4772 Addr.getElementType(), Addr.getPointer(), 4773 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4774 LValue NumDepsBase = CGF.MakeAddrLValue( 4775 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4776 Base.getBaseInfo(), Base.getTBAAInfo()); 4777 // NumDeps = deps[i].base_addr; 4778 LValue BaseAddrLVal = CGF.EmitLValueForField( 4779 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4780 llvm::Value *NumDeps = 4781 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4782 LValue NumLVal = CGF.MakeAddrLValue( 4783 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4784 C.getUIntPtrType()); 4785 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4786 NumLVal.getAddress(CGF)); 4787 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4788 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4789 CGF.EmitStoreOfScalar(Add, NumLVal); 4790 SizeLVals.push_back(NumLVal); 4791 } 4792 } 4793 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4794 llvm::Value *Size = 4795 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4796 Sizes.push_back(Size); 4797 } 4798 return Sizes; 4799 } 4800 4801 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4802 LValue PosLVal, 4803 const OMPTaskDataTy::DependData &Data, 4804 Address DependenciesArray) { 4805 assert(Data.DepKind == OMPC_DEPEND_depobj && 4806 "Expected depobj dependecy kind."); 4807 ASTContext &C = CGF.getContext(); 4808 QualType FlagsTy; 4809 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4810 RecordDecl *KmpDependInfoRD = 4811 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4812 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4813 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4814 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4815 { 4816 OMPIteratorGeneratorScope IteratorScope( 4817 CGF, cast_or_null<OMPIteratorExpr>( 4818 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4819 : nullptr)); 4820 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4821 const Expr *E = Data.DepExprs[I]; 4822 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4823 LValue Base = CGF.EmitLoadOfPointerLValue( 4824 DepobjLVal.getAddress(CGF), 4825 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4826 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4827 Base.getAddress(CGF), KmpDependInfoPtrT); 4828 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4829 Base.getTBAAInfo()); 4830 4831 // Get number of elements in a single depobj. 4832 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4833 Addr.getElementType(), Addr.getPointer(), 4834 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4835 LValue NumDepsBase = CGF.MakeAddrLValue( 4836 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4837 Base.getBaseInfo(), Base.getTBAAInfo()); 4838 // NumDeps = deps[i].base_addr; 4839 LValue BaseAddrLVal = CGF.EmitLValueForField( 4840 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4841 llvm::Value *NumDeps = 4842 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4843 4844 // memcopy dependency data. 4845 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4846 ElSize, 4847 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4848 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4849 Address DepAddr = 4850 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), 4851 DependenciesArray.getPointer(), Pos), 4852 DependenciesArray.getAlignment()); 4853 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4854 4855 // Increase pos. 4856 // pos += size; 4857 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4858 CGF.EmitStoreOfScalar(Add, PosLVal); 4859 } 4860 } 4861 } 4862 4863 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4864 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4865 SourceLocation Loc) { 4866 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4867 return D.DepExprs.empty(); 4868 })) 4869 return std::make_pair(nullptr, Address::invalid()); 4870 // Process list of dependencies. 4871 ASTContext &C = CGM.getContext(); 4872 Address DependenciesArray = Address::invalid(); 4873 llvm::Value *NumOfElements = nullptr; 4874 unsigned NumDependencies = std::accumulate( 4875 Dependencies.begin(), Dependencies.end(), 0, 4876 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4877 return D.DepKind == OMPC_DEPEND_depobj 4878 ? V 4879 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4880 }); 4881 QualType FlagsTy; 4882 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4883 bool HasDepobjDeps = false; 4884 bool HasRegularWithIterators = false; 4885 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4886 llvm::Value *NumOfRegularWithIterators = 4887 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4888 // Calculate number of depobj dependecies and regular deps with the iterators. 4889 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4890 if (D.DepKind == OMPC_DEPEND_depobj) { 4891 SmallVector<llvm::Value *, 4> Sizes = 4892 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4893 for (llvm::Value *Size : Sizes) { 4894 NumOfDepobjElements = 4895 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4896 } 4897 HasDepobjDeps = true; 4898 continue; 4899 } 4900 // Include number of iterations, if any. 4901 4902 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4903 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4904 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4905 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4906 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4907 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4908 NumOfRegularWithIterators = 4909 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4910 } 4911 HasRegularWithIterators = true; 4912 continue; 4913 } 4914 } 4915 4916 QualType KmpDependInfoArrayTy; 4917 if (HasDepobjDeps || HasRegularWithIterators) { 4918 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4919 /*isSigned=*/false); 4920 if (HasDepobjDeps) { 4921 NumOfElements = 4922 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4923 } 4924 if (HasRegularWithIterators) { 4925 NumOfElements = 4926 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4927 } 4928 auto *OVE = new (C) OpaqueValueExpr( 4929 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4930 VK_PRValue); 4931 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4932 RValue::get(NumOfElements)); 4933 KmpDependInfoArrayTy = 4934 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4935 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4936 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4937 // Properly emit variable-sized array. 4938 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4939 ImplicitParamDecl::Other); 4940 CGF.EmitVarDecl(*PD); 4941 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4942 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4943 /*isSigned=*/false); 4944 } else { 4945 KmpDependInfoArrayTy = C.getConstantArrayType( 4946 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4947 ArrayType::Normal, /*IndexTypeQuals=*/0); 4948 DependenciesArray = 4949 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4950 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4951 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4952 /*isSigned=*/false); 4953 } 4954 unsigned Pos = 0; 4955 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4956 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4957 Dependencies[I].IteratorExpr) 4958 continue; 4959 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4960 DependenciesArray); 4961 } 4962 // Copy regular dependecies with iterators. 4963 LValue PosLVal = CGF.MakeAddrLValue( 4964 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4965 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4966 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4967 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4968 !Dependencies[I].IteratorExpr) 4969 continue; 4970 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4971 DependenciesArray); 4972 } 4973 // Copy final depobj arrays without iterators. 4974 if (HasDepobjDeps) { 4975 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4976 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4977 continue; 4978 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4979 DependenciesArray); 4980 } 4981 } 4982 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4983 DependenciesArray, CGF.VoidPtrTy); 4984 return std::make_pair(NumOfElements, DependenciesArray); 4985 } 4986 4987 Address CGOpenMPRuntime::emitDepobjDependClause( 4988 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4989 SourceLocation Loc) { 4990 if (Dependencies.DepExprs.empty()) 4991 return Address::invalid(); 4992 // Process list of dependencies. 4993 ASTContext &C = CGM.getContext(); 4994 Address DependenciesArray = Address::invalid(); 4995 unsigned NumDependencies = Dependencies.DepExprs.size(); 4996 QualType FlagsTy; 4997 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4998 RecordDecl *KmpDependInfoRD = 4999 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5000 5001 llvm::Value *Size; 5002 // Define type kmp_depend_info[<Dependencies.size()>]; 5003 // For depobj reserve one extra element to store the number of elements. 5004 // It is required to handle depobj(x) update(in) construct. 5005 // kmp_depend_info[<Dependencies.size()>] deps; 5006 llvm::Value *NumDepsVal; 5007 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 5008 if (const auto *IE = 5009 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 5010 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 5011 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5012 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5013 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 5014 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 5015 } 5016 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 5017 NumDepsVal); 5018 CharUnits SizeInBytes = 5019 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5020 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5021 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5022 NumDepsVal = 5023 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5024 } else { 5025 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5026 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5027 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5028 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5029 Size = CGM.getSize(Sz.alignTo(Align)); 5030 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5031 } 5032 // Need to allocate on the dynamic memory. 5033 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5034 // Use default allocator. 5035 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5036 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5037 5038 llvm::Value *Addr = 5039 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5040 CGM.getModule(), OMPRTL___kmpc_alloc), 5041 Args, ".dep.arr.addr"); 5042 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5043 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5044 DependenciesArray = Address(Addr, Align); 5045 // Write number of elements in the first element of array for depobj. 5046 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5047 // deps[i].base_addr = NumDependencies; 5048 LValue BaseAddrLVal = CGF.EmitLValueForField( 5049 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5050 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5051 llvm::PointerUnion<unsigned *, LValue *> Pos; 5052 unsigned Idx = 1; 5053 LValue PosLVal; 5054 if (Dependencies.IteratorExpr) { 5055 PosLVal = CGF.MakeAddrLValue( 5056 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5057 C.getSizeType()); 5058 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5059 /*IsInit=*/true); 5060 Pos = &PosLVal; 5061 } else { 5062 Pos = &Idx; 5063 } 5064 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5065 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5066 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5067 return DependenciesArray; 5068 } 5069 5070 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5071 SourceLocation Loc) { 5072 ASTContext &C = CGM.getContext(); 5073 QualType FlagsTy; 5074 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5075 LValue Base = CGF.EmitLoadOfPointerLValue( 5076 DepobjLVal.getAddress(CGF), 5077 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5078 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5079 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5080 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5081 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5082 Addr.getElementType(), Addr.getPointer(), 5083 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5084 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5085 CGF.VoidPtrTy); 5086 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5087 // Use default allocator. 5088 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5089 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5090 5091 // _kmpc_free(gtid, addr, nullptr); 5092 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5093 CGM.getModule(), OMPRTL___kmpc_free), 5094 Args); 5095 } 5096 5097 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5098 OpenMPDependClauseKind NewDepKind, 5099 SourceLocation Loc) { 5100 ASTContext &C = CGM.getContext(); 5101 QualType FlagsTy; 5102 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5103 RecordDecl *KmpDependInfoRD = 5104 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5105 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5106 llvm::Value *NumDeps; 5107 LValue Base; 5108 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5109 5110 Address Begin = Base.getAddress(CGF); 5111 // Cast from pointer to array type to pointer to single element. 5112 llvm::Value *End = CGF.Builder.CreateGEP( 5113 Begin.getElementType(), Begin.getPointer(), NumDeps); 5114 // The basic structure here is a while-do loop. 5115 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5116 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5117 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5118 CGF.EmitBlock(BodyBB); 5119 llvm::PHINode *ElementPHI = 5120 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5121 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5122 Begin = Address(ElementPHI, Begin.getAlignment()); 5123 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5124 Base.getTBAAInfo()); 5125 // deps[i].flags = NewDepKind; 5126 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5127 LValue FlagsLVal = CGF.EmitLValueForField( 5128 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5129 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5130 FlagsLVal); 5131 5132 // Shift the address forward by one element. 5133 Address ElementNext = 5134 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5135 ElementPHI->addIncoming(ElementNext.getPointer(), 5136 CGF.Builder.GetInsertBlock()); 5137 llvm::Value *IsEmpty = 5138 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5139 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5140 // Done. 5141 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5142 } 5143 5144 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5145 const OMPExecutableDirective &D, 5146 llvm::Function *TaskFunction, 5147 QualType SharedsTy, Address Shareds, 5148 const Expr *IfCond, 5149 const OMPTaskDataTy &Data) { 5150 if (!CGF.HaveInsertPoint()) 5151 return; 5152 5153 TaskResultTy Result = 5154 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5155 llvm::Value *NewTask = Result.NewTask; 5156 llvm::Function *TaskEntry = Result.TaskEntry; 5157 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5158 LValue TDBase = Result.TDBase; 5159 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5160 // Process list of dependences. 5161 Address DependenciesArray = Address::invalid(); 5162 llvm::Value *NumOfElements; 5163 std::tie(NumOfElements, DependenciesArray) = 5164 emitDependClause(CGF, Data.Dependences, Loc); 5165 5166 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5167 // libcall. 5168 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5169 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5170 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5171 // list is not empty 5172 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5173 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5174 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5175 llvm::Value *DepTaskArgs[7]; 5176 if (!Data.Dependences.empty()) { 5177 DepTaskArgs[0] = UpLoc; 5178 DepTaskArgs[1] = ThreadID; 5179 DepTaskArgs[2] = NewTask; 5180 DepTaskArgs[3] = NumOfElements; 5181 DepTaskArgs[4] = DependenciesArray.getPointer(); 5182 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5183 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5184 } 5185 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5186 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5187 if (!Data.Tied) { 5188 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5189 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5190 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5191 } 5192 if (!Data.Dependences.empty()) { 5193 CGF.EmitRuntimeCall( 5194 OMPBuilder.getOrCreateRuntimeFunction( 5195 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5196 DepTaskArgs); 5197 } else { 5198 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5199 CGM.getModule(), OMPRTL___kmpc_omp_task), 5200 TaskArgs); 5201 } 5202 // Check if parent region is untied and build return for untied task; 5203 if (auto *Region = 5204 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5205 Region->emitUntiedSwitch(CGF); 5206 }; 5207 5208 llvm::Value *DepWaitTaskArgs[6]; 5209 if (!Data.Dependences.empty()) { 5210 DepWaitTaskArgs[0] = UpLoc; 5211 DepWaitTaskArgs[1] = ThreadID; 5212 DepWaitTaskArgs[2] = NumOfElements; 5213 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5214 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5215 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5216 } 5217 auto &M = CGM.getModule(); 5218 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5219 TaskEntry, &Data, &DepWaitTaskArgs, 5220 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5221 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5222 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5223 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5224 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5225 // is specified. 5226 if (!Data.Dependences.empty()) 5227 CGF.EmitRuntimeCall( 5228 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5229 DepWaitTaskArgs); 5230 // Call proxy_task_entry(gtid, new_task); 5231 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5232 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5233 Action.Enter(CGF); 5234 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5235 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5236 OutlinedFnArgs); 5237 }; 5238 5239 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5240 // kmp_task_t *new_task); 5241 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5242 // kmp_task_t *new_task); 5243 RegionCodeGenTy RCG(CodeGen); 5244 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5245 M, OMPRTL___kmpc_omp_task_begin_if0), 5246 TaskArgs, 5247 OMPBuilder.getOrCreateRuntimeFunction( 5248 M, OMPRTL___kmpc_omp_task_complete_if0), 5249 TaskArgs); 5250 RCG.setAction(Action); 5251 RCG(CGF); 5252 }; 5253 5254 if (IfCond) { 5255 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5256 } else { 5257 RegionCodeGenTy ThenRCG(ThenCodeGen); 5258 ThenRCG(CGF); 5259 } 5260 } 5261 5262 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5263 const OMPLoopDirective &D, 5264 llvm::Function *TaskFunction, 5265 QualType SharedsTy, Address Shareds, 5266 const Expr *IfCond, 5267 const OMPTaskDataTy &Data) { 5268 if (!CGF.HaveInsertPoint()) 5269 return; 5270 TaskResultTy Result = 5271 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5272 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5273 // libcall. 5274 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5275 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5276 // sched, kmp_uint64 grainsize, void *task_dup); 5277 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5278 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5279 llvm::Value *IfVal; 5280 if (IfCond) { 5281 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5282 /*isSigned=*/true); 5283 } else { 5284 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5285 } 5286 5287 LValue LBLVal = CGF.EmitLValueForField( 5288 Result.TDBase, 5289 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5290 const auto *LBVar = 5291 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5292 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5293 LBLVal.getQuals(), 5294 /*IsInitializer=*/true); 5295 LValue UBLVal = CGF.EmitLValueForField( 5296 Result.TDBase, 5297 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5298 const auto *UBVar = 5299 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5300 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5301 UBLVal.getQuals(), 5302 /*IsInitializer=*/true); 5303 LValue StLVal = CGF.EmitLValueForField( 5304 Result.TDBase, 5305 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5306 const auto *StVar = 5307 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5308 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5309 StLVal.getQuals(), 5310 /*IsInitializer=*/true); 5311 // Store reductions address. 5312 LValue RedLVal = CGF.EmitLValueForField( 5313 Result.TDBase, 5314 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5315 if (Data.Reductions) { 5316 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5317 } else { 5318 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5319 CGF.getContext().VoidPtrTy); 5320 } 5321 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5322 llvm::Value *TaskArgs[] = { 5323 UpLoc, 5324 ThreadID, 5325 Result.NewTask, 5326 IfVal, 5327 LBLVal.getPointer(CGF), 5328 UBLVal.getPointer(CGF), 5329 CGF.EmitLoadOfScalar(StLVal, Loc), 5330 llvm::ConstantInt::getSigned( 5331 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5332 llvm::ConstantInt::getSigned( 5333 CGF.IntTy, Data.Schedule.getPointer() 5334 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5335 : NoSchedule), 5336 Data.Schedule.getPointer() 5337 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5338 /*isSigned=*/false) 5339 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5340 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5341 Result.TaskDupFn, CGF.VoidPtrTy) 5342 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5343 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5344 CGM.getModule(), OMPRTL___kmpc_taskloop), 5345 TaskArgs); 5346 } 5347 5348 /// Emit reduction operation for each element of array (required for 5349 /// array sections) LHS op = RHS. 5350 /// \param Type Type of array. 5351 /// \param LHSVar Variable on the left side of the reduction operation 5352 /// (references element of array in original variable). 5353 /// \param RHSVar Variable on the right side of the reduction operation 5354 /// (references element of array in original variable). 5355 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5356 /// RHSVar. 5357 static void EmitOMPAggregateReduction( 5358 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5359 const VarDecl *RHSVar, 5360 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5361 const Expr *, const Expr *)> &RedOpGen, 5362 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5363 const Expr *UpExpr = nullptr) { 5364 // Perform element-by-element initialization. 5365 QualType ElementTy; 5366 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5367 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5368 5369 // Drill down to the base element type on both arrays. 5370 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5371 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5372 5373 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5374 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5375 // Cast from pointer to array type to pointer to single element. 5376 llvm::Value *LHSEnd = 5377 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5378 // The basic structure here is a while-do loop. 5379 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5380 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5381 llvm::Value *IsEmpty = 5382 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5383 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5384 5385 // Enter the loop body, making that address the current address. 5386 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5387 CGF.EmitBlock(BodyBB); 5388 5389 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5390 5391 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5392 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5393 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5394 Address RHSElementCurrent = 5395 Address(RHSElementPHI, 5396 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5397 5398 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5399 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5400 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5401 Address LHSElementCurrent = 5402 Address(LHSElementPHI, 5403 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5404 5405 // Emit copy. 5406 CodeGenFunction::OMPPrivateScope Scope(CGF); 5407 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5408 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5409 Scope.Privatize(); 5410 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5411 Scope.ForceCleanup(); 5412 5413 // Shift the address forward by one element. 5414 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5415 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5416 "omp.arraycpy.dest.element"); 5417 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5418 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5419 "omp.arraycpy.src.element"); 5420 // Check whether we've reached the end. 5421 llvm::Value *Done = 5422 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5423 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5424 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5425 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5426 5427 // Done. 5428 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5429 } 5430 5431 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5432 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5433 /// UDR combiner function. 5434 static void emitReductionCombiner(CodeGenFunction &CGF, 5435 const Expr *ReductionOp) { 5436 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5437 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5438 if (const auto *DRE = 5439 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5440 if (const auto *DRD = 5441 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5442 std::pair<llvm::Function *, llvm::Function *> Reduction = 5443 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5444 RValue Func = RValue::get(Reduction.first); 5445 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5446 CGF.EmitIgnoredExpr(ReductionOp); 5447 return; 5448 } 5449 CGF.EmitIgnoredExpr(ReductionOp); 5450 } 5451 5452 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5453 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5454 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5455 ArrayRef<const Expr *> ReductionOps) { 5456 ASTContext &C = CGM.getContext(); 5457 5458 // void reduction_func(void *LHSArg, void *RHSArg); 5459 FunctionArgList Args; 5460 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5461 ImplicitParamDecl::Other); 5462 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5463 ImplicitParamDecl::Other); 5464 Args.push_back(&LHSArg); 5465 Args.push_back(&RHSArg); 5466 const auto &CGFI = 5467 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5468 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5469 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5470 llvm::GlobalValue::InternalLinkage, Name, 5471 &CGM.getModule()); 5472 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5473 Fn->setDoesNotRecurse(); 5474 CodeGenFunction CGF(CGM); 5475 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5476 5477 // Dst = (void*[n])(LHSArg); 5478 // Src = (void*[n])(RHSArg); 5479 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5480 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5481 ArgsType), CGF.getPointerAlign()); 5482 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5483 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5484 ArgsType), CGF.getPointerAlign()); 5485 5486 // ... 5487 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5488 // ... 5489 CodeGenFunction::OMPPrivateScope Scope(CGF); 5490 auto IPriv = Privates.begin(); 5491 unsigned Idx = 0; 5492 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5493 const auto *RHSVar = 5494 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5495 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5496 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5497 }); 5498 const auto *LHSVar = 5499 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5500 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5501 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5502 }); 5503 QualType PrivTy = (*IPriv)->getType(); 5504 if (PrivTy->isVariablyModifiedType()) { 5505 // Get array size and emit VLA type. 5506 ++Idx; 5507 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5508 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5509 const VariableArrayType *VLA = 5510 CGF.getContext().getAsVariableArrayType(PrivTy); 5511 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5512 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5513 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5514 CGF.EmitVariablyModifiedType(PrivTy); 5515 } 5516 } 5517 Scope.Privatize(); 5518 IPriv = Privates.begin(); 5519 auto ILHS = LHSExprs.begin(); 5520 auto IRHS = RHSExprs.begin(); 5521 for (const Expr *E : ReductionOps) { 5522 if ((*IPriv)->getType()->isArrayType()) { 5523 // Emit reduction for array section. 5524 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5525 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5526 EmitOMPAggregateReduction( 5527 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5528 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5529 emitReductionCombiner(CGF, E); 5530 }); 5531 } else { 5532 // Emit reduction for array subscript or single variable. 5533 emitReductionCombiner(CGF, E); 5534 } 5535 ++IPriv; 5536 ++ILHS; 5537 ++IRHS; 5538 } 5539 Scope.ForceCleanup(); 5540 CGF.FinishFunction(); 5541 return Fn; 5542 } 5543 5544 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5545 const Expr *ReductionOp, 5546 const Expr *PrivateRef, 5547 const DeclRefExpr *LHS, 5548 const DeclRefExpr *RHS) { 5549 if (PrivateRef->getType()->isArrayType()) { 5550 // Emit reduction for array section. 5551 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5552 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5553 EmitOMPAggregateReduction( 5554 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5555 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5556 emitReductionCombiner(CGF, ReductionOp); 5557 }); 5558 } else { 5559 // Emit reduction for array subscript or single variable. 5560 emitReductionCombiner(CGF, ReductionOp); 5561 } 5562 } 5563 5564 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5565 ArrayRef<const Expr *> Privates, 5566 ArrayRef<const Expr *> LHSExprs, 5567 ArrayRef<const Expr *> RHSExprs, 5568 ArrayRef<const Expr *> ReductionOps, 5569 ReductionOptionsTy Options) { 5570 if (!CGF.HaveInsertPoint()) 5571 return; 5572 5573 bool WithNowait = Options.WithNowait; 5574 bool SimpleReduction = Options.SimpleReduction; 5575 5576 // Next code should be emitted for reduction: 5577 // 5578 // static kmp_critical_name lock = { 0 }; 5579 // 5580 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5581 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5582 // ... 5583 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5584 // *(Type<n>-1*)rhs[<n>-1]); 5585 // } 5586 // 5587 // ... 5588 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5589 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5590 // RedList, reduce_func, &<lock>)) { 5591 // case 1: 5592 // ... 5593 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5594 // ... 5595 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5596 // break; 5597 // case 2: 5598 // ... 5599 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5600 // ... 5601 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5602 // break; 5603 // default:; 5604 // } 5605 // 5606 // if SimpleReduction is true, only the next code is generated: 5607 // ... 5608 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5609 // ... 5610 5611 ASTContext &C = CGM.getContext(); 5612 5613 if (SimpleReduction) { 5614 CodeGenFunction::RunCleanupsScope Scope(CGF); 5615 auto IPriv = Privates.begin(); 5616 auto ILHS = LHSExprs.begin(); 5617 auto IRHS = RHSExprs.begin(); 5618 for (const Expr *E : ReductionOps) { 5619 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5620 cast<DeclRefExpr>(*IRHS)); 5621 ++IPriv; 5622 ++ILHS; 5623 ++IRHS; 5624 } 5625 return; 5626 } 5627 5628 // 1. Build a list of reduction variables. 5629 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5630 auto Size = RHSExprs.size(); 5631 for (const Expr *E : Privates) { 5632 if (E->getType()->isVariablyModifiedType()) 5633 // Reserve place for array size. 5634 ++Size; 5635 } 5636 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5637 QualType ReductionArrayTy = 5638 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5639 /*IndexTypeQuals=*/0); 5640 Address ReductionList = 5641 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5642 auto IPriv = Privates.begin(); 5643 unsigned Idx = 0; 5644 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5645 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5646 CGF.Builder.CreateStore( 5647 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5648 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5649 Elem); 5650 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5651 // Store array size. 5652 ++Idx; 5653 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5654 llvm::Value *Size = CGF.Builder.CreateIntCast( 5655 CGF.getVLASize( 5656 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5657 .NumElts, 5658 CGF.SizeTy, /*isSigned=*/false); 5659 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5660 Elem); 5661 } 5662 } 5663 5664 // 2. Emit reduce_func(). 5665 llvm::Function *ReductionFn = emitReductionFunction( 5666 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5667 LHSExprs, RHSExprs, ReductionOps); 5668 5669 // 3. Create static kmp_critical_name lock = { 0 }; 5670 std::string Name = getName({"reduction"}); 5671 llvm::Value *Lock = getCriticalRegionLock(Name); 5672 5673 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5674 // RedList, reduce_func, &<lock>); 5675 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5676 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5677 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5678 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5679 ReductionList.getPointer(), CGF.VoidPtrTy); 5680 llvm::Value *Args[] = { 5681 IdentTLoc, // ident_t *<loc> 5682 ThreadId, // i32 <gtid> 5683 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5684 ReductionArrayTySize, // size_type sizeof(RedList) 5685 RL, // void *RedList 5686 ReductionFn, // void (*) (void *, void *) <reduce_func> 5687 Lock // kmp_critical_name *&<lock> 5688 }; 5689 llvm::Value *Res = CGF.EmitRuntimeCall( 5690 OMPBuilder.getOrCreateRuntimeFunction( 5691 CGM.getModule(), 5692 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5693 Args); 5694 5695 // 5. Build switch(res) 5696 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5697 llvm::SwitchInst *SwInst = 5698 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5699 5700 // 6. Build case 1: 5701 // ... 5702 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5703 // ... 5704 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5705 // break; 5706 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5707 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5708 CGF.EmitBlock(Case1BB); 5709 5710 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5711 llvm::Value *EndArgs[] = { 5712 IdentTLoc, // ident_t *<loc> 5713 ThreadId, // i32 <gtid> 5714 Lock // kmp_critical_name *&<lock> 5715 }; 5716 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5717 CodeGenFunction &CGF, PrePostActionTy &Action) { 5718 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5719 auto IPriv = Privates.begin(); 5720 auto ILHS = LHSExprs.begin(); 5721 auto IRHS = RHSExprs.begin(); 5722 for (const Expr *E : ReductionOps) { 5723 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5724 cast<DeclRefExpr>(*IRHS)); 5725 ++IPriv; 5726 ++ILHS; 5727 ++IRHS; 5728 } 5729 }; 5730 RegionCodeGenTy RCG(CodeGen); 5731 CommonActionTy Action( 5732 nullptr, llvm::None, 5733 OMPBuilder.getOrCreateRuntimeFunction( 5734 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5735 : OMPRTL___kmpc_end_reduce), 5736 EndArgs); 5737 RCG.setAction(Action); 5738 RCG(CGF); 5739 5740 CGF.EmitBranch(DefaultBB); 5741 5742 // 7. Build case 2: 5743 // ... 5744 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5745 // ... 5746 // break; 5747 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5748 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5749 CGF.EmitBlock(Case2BB); 5750 5751 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5752 CodeGenFunction &CGF, PrePostActionTy &Action) { 5753 auto ILHS = LHSExprs.begin(); 5754 auto IRHS = RHSExprs.begin(); 5755 auto IPriv = Privates.begin(); 5756 for (const Expr *E : ReductionOps) { 5757 const Expr *XExpr = nullptr; 5758 const Expr *EExpr = nullptr; 5759 const Expr *UpExpr = nullptr; 5760 BinaryOperatorKind BO = BO_Comma; 5761 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5762 if (BO->getOpcode() == BO_Assign) { 5763 XExpr = BO->getLHS(); 5764 UpExpr = BO->getRHS(); 5765 } 5766 } 5767 // Try to emit update expression as a simple atomic. 5768 const Expr *RHSExpr = UpExpr; 5769 if (RHSExpr) { 5770 // Analyze RHS part of the whole expression. 5771 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5772 RHSExpr->IgnoreParenImpCasts())) { 5773 // If this is a conditional operator, analyze its condition for 5774 // min/max reduction operator. 5775 RHSExpr = ACO->getCond(); 5776 } 5777 if (const auto *BORHS = 5778 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5779 EExpr = BORHS->getRHS(); 5780 BO = BORHS->getOpcode(); 5781 } 5782 } 5783 if (XExpr) { 5784 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5785 auto &&AtomicRedGen = [BO, VD, 5786 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5787 const Expr *EExpr, const Expr *UpExpr) { 5788 LValue X = CGF.EmitLValue(XExpr); 5789 RValue E; 5790 if (EExpr) 5791 E = CGF.EmitAnyExpr(EExpr); 5792 CGF.EmitOMPAtomicSimpleUpdateExpr( 5793 X, E, BO, /*IsXLHSInRHSPart=*/true, 5794 llvm::AtomicOrdering::Monotonic, Loc, 5795 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5796 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5797 PrivateScope.addPrivate( 5798 VD, [&CGF, VD, XRValue, Loc]() { 5799 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5800 CGF.emitOMPSimpleStore( 5801 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5802 VD->getType().getNonReferenceType(), Loc); 5803 return LHSTemp; 5804 }); 5805 (void)PrivateScope.Privatize(); 5806 return CGF.EmitAnyExpr(UpExpr); 5807 }); 5808 }; 5809 if ((*IPriv)->getType()->isArrayType()) { 5810 // Emit atomic reduction for array section. 5811 const auto *RHSVar = 5812 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5813 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5814 AtomicRedGen, XExpr, EExpr, UpExpr); 5815 } else { 5816 // Emit atomic reduction for array subscript or single variable. 5817 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5818 } 5819 } else { 5820 // Emit as a critical region. 5821 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5822 const Expr *, const Expr *) { 5823 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5824 std::string Name = RT.getName({"atomic_reduction"}); 5825 RT.emitCriticalRegion( 5826 CGF, Name, 5827 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5828 Action.Enter(CGF); 5829 emitReductionCombiner(CGF, E); 5830 }, 5831 Loc); 5832 }; 5833 if ((*IPriv)->getType()->isArrayType()) { 5834 const auto *LHSVar = 5835 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5836 const auto *RHSVar = 5837 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5838 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5839 CritRedGen); 5840 } else { 5841 CritRedGen(CGF, nullptr, nullptr, nullptr); 5842 } 5843 } 5844 ++ILHS; 5845 ++IRHS; 5846 ++IPriv; 5847 } 5848 }; 5849 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5850 if (!WithNowait) { 5851 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5852 llvm::Value *EndArgs[] = { 5853 IdentTLoc, // ident_t *<loc> 5854 ThreadId, // i32 <gtid> 5855 Lock // kmp_critical_name *&<lock> 5856 }; 5857 CommonActionTy Action(nullptr, llvm::None, 5858 OMPBuilder.getOrCreateRuntimeFunction( 5859 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5860 EndArgs); 5861 AtomicRCG.setAction(Action); 5862 AtomicRCG(CGF); 5863 } else { 5864 AtomicRCG(CGF); 5865 } 5866 5867 CGF.EmitBranch(DefaultBB); 5868 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5869 } 5870 5871 /// Generates unique name for artificial threadprivate variables. 5872 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5873 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5874 const Expr *Ref) { 5875 SmallString<256> Buffer; 5876 llvm::raw_svector_ostream Out(Buffer); 5877 const clang::DeclRefExpr *DE; 5878 const VarDecl *D = ::getBaseDecl(Ref, DE); 5879 if (!D) 5880 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5881 D = D->getCanonicalDecl(); 5882 std::string Name = CGM.getOpenMPRuntime().getName( 5883 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5884 Out << Prefix << Name << "_" 5885 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5886 return std::string(Out.str()); 5887 } 5888 5889 /// Emits reduction initializer function: 5890 /// \code 5891 /// void @.red_init(void* %arg, void* %orig) { 5892 /// %0 = bitcast void* %arg to <type>* 5893 /// store <type> <init>, <type>* %0 5894 /// ret void 5895 /// } 5896 /// \endcode 5897 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5898 SourceLocation Loc, 5899 ReductionCodeGen &RCG, unsigned N) { 5900 ASTContext &C = CGM.getContext(); 5901 QualType VoidPtrTy = C.VoidPtrTy; 5902 VoidPtrTy.addRestrict(); 5903 FunctionArgList Args; 5904 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5905 ImplicitParamDecl::Other); 5906 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5907 ImplicitParamDecl::Other); 5908 Args.emplace_back(&Param); 5909 Args.emplace_back(&ParamOrig); 5910 const auto &FnInfo = 5911 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5912 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5913 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5914 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5915 Name, &CGM.getModule()); 5916 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5917 Fn->setDoesNotRecurse(); 5918 CodeGenFunction CGF(CGM); 5919 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5920 Address PrivateAddr = CGF.EmitLoadOfPointer( 5921 CGF.GetAddrOfLocalVar(&Param), 5922 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5923 llvm::Value *Size = nullptr; 5924 // If the size of the reduction item is non-constant, load it from global 5925 // threadprivate variable. 5926 if (RCG.getSizes(N).second) { 5927 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5928 CGF, CGM.getContext().getSizeType(), 5929 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5930 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5931 CGM.getContext().getSizeType(), Loc); 5932 } 5933 RCG.emitAggregateType(CGF, N, Size); 5934 LValue OrigLVal; 5935 // If initializer uses initializer from declare reduction construct, emit a 5936 // pointer to the address of the original reduction item (reuired by reduction 5937 // initializer) 5938 if (RCG.usesReductionInitializer(N)) { 5939 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5940 SharedAddr = CGF.EmitLoadOfPointer( 5941 SharedAddr, 5942 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5943 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5944 } else { 5945 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5946 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5947 CGM.getContext().VoidPtrTy); 5948 } 5949 // Emit the initializer: 5950 // %0 = bitcast void* %arg to <type>* 5951 // store <type> <init>, <type>* %0 5952 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5953 [](CodeGenFunction &) { return false; }); 5954 CGF.FinishFunction(); 5955 return Fn; 5956 } 5957 5958 /// Emits reduction combiner function: 5959 /// \code 5960 /// void @.red_comb(void* %arg0, void* %arg1) { 5961 /// %lhs = bitcast void* %arg0 to <type>* 5962 /// %rhs = bitcast void* %arg1 to <type>* 5963 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5964 /// store <type> %2, <type>* %lhs 5965 /// ret void 5966 /// } 5967 /// \endcode 5968 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5969 SourceLocation Loc, 5970 ReductionCodeGen &RCG, unsigned N, 5971 const Expr *ReductionOp, 5972 const Expr *LHS, const Expr *RHS, 5973 const Expr *PrivateRef) { 5974 ASTContext &C = CGM.getContext(); 5975 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5976 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5977 FunctionArgList Args; 5978 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5979 C.VoidPtrTy, ImplicitParamDecl::Other); 5980 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5981 ImplicitParamDecl::Other); 5982 Args.emplace_back(&ParamInOut); 5983 Args.emplace_back(&ParamIn); 5984 const auto &FnInfo = 5985 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5986 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5987 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5988 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5989 Name, &CGM.getModule()); 5990 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5991 Fn->setDoesNotRecurse(); 5992 CodeGenFunction CGF(CGM); 5993 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5994 llvm::Value *Size = nullptr; 5995 // If the size of the reduction item is non-constant, load it from global 5996 // threadprivate variable. 5997 if (RCG.getSizes(N).second) { 5998 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5999 CGF, CGM.getContext().getSizeType(), 6000 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6001 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6002 CGM.getContext().getSizeType(), Loc); 6003 } 6004 RCG.emitAggregateType(CGF, N, Size); 6005 // Remap lhs and rhs variables to the addresses of the function arguments. 6006 // %lhs = bitcast void* %arg0 to <type>* 6007 // %rhs = bitcast void* %arg1 to <type>* 6008 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6009 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6010 // Pull out the pointer to the variable. 6011 Address PtrAddr = CGF.EmitLoadOfPointer( 6012 CGF.GetAddrOfLocalVar(&ParamInOut), 6013 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6014 return CGF.Builder.CreateElementBitCast( 6015 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6016 }); 6017 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6018 // Pull out the pointer to the variable. 6019 Address PtrAddr = CGF.EmitLoadOfPointer( 6020 CGF.GetAddrOfLocalVar(&ParamIn), 6021 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6022 return CGF.Builder.CreateElementBitCast( 6023 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6024 }); 6025 PrivateScope.Privatize(); 6026 // Emit the combiner body: 6027 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6028 // store <type> %2, <type>* %lhs 6029 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6030 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6031 cast<DeclRefExpr>(RHS)); 6032 CGF.FinishFunction(); 6033 return Fn; 6034 } 6035 6036 /// Emits reduction finalizer function: 6037 /// \code 6038 /// void @.red_fini(void* %arg) { 6039 /// %0 = bitcast void* %arg to <type>* 6040 /// <destroy>(<type>* %0) 6041 /// ret void 6042 /// } 6043 /// \endcode 6044 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6045 SourceLocation Loc, 6046 ReductionCodeGen &RCG, unsigned N) { 6047 if (!RCG.needCleanups(N)) 6048 return nullptr; 6049 ASTContext &C = CGM.getContext(); 6050 FunctionArgList Args; 6051 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6052 ImplicitParamDecl::Other); 6053 Args.emplace_back(&Param); 6054 const auto &FnInfo = 6055 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6056 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6057 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6058 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6059 Name, &CGM.getModule()); 6060 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6061 Fn->setDoesNotRecurse(); 6062 CodeGenFunction CGF(CGM); 6063 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6064 Address PrivateAddr = CGF.EmitLoadOfPointer( 6065 CGF.GetAddrOfLocalVar(&Param), 6066 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6067 llvm::Value *Size = nullptr; 6068 // If the size of the reduction item is non-constant, load it from global 6069 // threadprivate variable. 6070 if (RCG.getSizes(N).second) { 6071 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6072 CGF, CGM.getContext().getSizeType(), 6073 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6074 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6075 CGM.getContext().getSizeType(), Loc); 6076 } 6077 RCG.emitAggregateType(CGF, N, Size); 6078 // Emit the finalizer body: 6079 // <destroy>(<type>* %0) 6080 RCG.emitCleanups(CGF, N, PrivateAddr); 6081 CGF.FinishFunction(Loc); 6082 return Fn; 6083 } 6084 6085 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6086 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6087 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6088 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6089 return nullptr; 6090 6091 // Build typedef struct: 6092 // kmp_taskred_input { 6093 // void *reduce_shar; // shared reduction item 6094 // void *reduce_orig; // original reduction item used for initialization 6095 // size_t reduce_size; // size of data item 6096 // void *reduce_init; // data initialization routine 6097 // void *reduce_fini; // data finalization routine 6098 // void *reduce_comb; // data combiner routine 6099 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6100 // } kmp_taskred_input_t; 6101 ASTContext &C = CGM.getContext(); 6102 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6103 RD->startDefinition(); 6104 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6105 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6106 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6107 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6108 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6109 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6110 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6111 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6112 RD->completeDefinition(); 6113 QualType RDType = C.getRecordType(RD); 6114 unsigned Size = Data.ReductionVars.size(); 6115 llvm::APInt ArraySize(/*numBits=*/64, Size); 6116 QualType ArrayRDType = C.getConstantArrayType( 6117 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6118 // kmp_task_red_input_t .rd_input.[Size]; 6119 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6120 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6121 Data.ReductionCopies, Data.ReductionOps); 6122 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6123 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6124 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6125 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6126 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6127 TaskRedInput.getPointer(), Idxs, 6128 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6129 ".rd_input.gep."); 6130 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6131 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6132 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6133 RCG.emitSharedOrigLValue(CGF, Cnt); 6134 llvm::Value *CastedShared = 6135 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6136 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6137 // ElemLVal.reduce_orig = &Origs[Cnt]; 6138 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6139 llvm::Value *CastedOrig = 6140 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6141 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6142 RCG.emitAggregateType(CGF, Cnt); 6143 llvm::Value *SizeValInChars; 6144 llvm::Value *SizeVal; 6145 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6146 // We use delayed creation/initialization for VLAs and array sections. It is 6147 // required because runtime does not provide the way to pass the sizes of 6148 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6149 // threadprivate global variables are used to store these values and use 6150 // them in the functions. 6151 bool DelayedCreation = !!SizeVal; 6152 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6153 /*isSigned=*/false); 6154 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6155 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6156 // ElemLVal.reduce_init = init; 6157 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6158 llvm::Value *InitAddr = 6159 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6160 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6161 // ElemLVal.reduce_fini = fini; 6162 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6163 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6164 llvm::Value *FiniAddr = Fini 6165 ? CGF.EmitCastToVoidPtr(Fini) 6166 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6167 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6168 // ElemLVal.reduce_comb = comb; 6169 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6170 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6171 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6172 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6173 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6174 // ElemLVal.flags = 0; 6175 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6176 if (DelayedCreation) { 6177 CGF.EmitStoreOfScalar( 6178 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6179 FlagsLVal); 6180 } else 6181 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6182 FlagsLVal.getType()); 6183 } 6184 if (Data.IsReductionWithTaskMod) { 6185 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6186 // is_ws, int num, void *data); 6187 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6188 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6189 CGM.IntTy, /*isSigned=*/true); 6190 llvm::Value *Args[] = { 6191 IdentTLoc, GTid, 6192 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6193 /*isSigned=*/true), 6194 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6195 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6196 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6197 return CGF.EmitRuntimeCall( 6198 OMPBuilder.getOrCreateRuntimeFunction( 6199 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6200 Args); 6201 } 6202 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6203 llvm::Value *Args[] = { 6204 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6205 /*isSigned=*/true), 6206 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6207 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6208 CGM.VoidPtrTy)}; 6209 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6210 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6211 Args); 6212 } 6213 6214 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6215 SourceLocation Loc, 6216 bool IsWorksharingReduction) { 6217 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6218 // is_ws, int num, void *data); 6219 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6220 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6221 CGM.IntTy, /*isSigned=*/true); 6222 llvm::Value *Args[] = {IdentTLoc, GTid, 6223 llvm::ConstantInt::get(CGM.IntTy, 6224 IsWorksharingReduction ? 1 : 0, 6225 /*isSigned=*/true)}; 6226 (void)CGF.EmitRuntimeCall( 6227 OMPBuilder.getOrCreateRuntimeFunction( 6228 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6229 Args); 6230 } 6231 6232 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6233 SourceLocation Loc, 6234 ReductionCodeGen &RCG, 6235 unsigned N) { 6236 auto Sizes = RCG.getSizes(N); 6237 // Emit threadprivate global variable if the type is non-constant 6238 // (Sizes.second = nullptr). 6239 if (Sizes.second) { 6240 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6241 /*isSigned=*/false); 6242 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6243 CGF, CGM.getContext().getSizeType(), 6244 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6245 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6246 } 6247 } 6248 6249 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6250 SourceLocation Loc, 6251 llvm::Value *ReductionsPtr, 6252 LValue SharedLVal) { 6253 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6254 // *d); 6255 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6256 CGM.IntTy, 6257 /*isSigned=*/true), 6258 ReductionsPtr, 6259 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6260 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6261 return Address( 6262 CGF.EmitRuntimeCall( 6263 OMPBuilder.getOrCreateRuntimeFunction( 6264 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6265 Args), 6266 SharedLVal.getAlignment()); 6267 } 6268 6269 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6270 const OMPTaskDataTy &Data) { 6271 if (!CGF.HaveInsertPoint()) 6272 return; 6273 6274 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6275 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6276 OMPBuilder.createTaskwait(CGF.Builder); 6277 } else { 6278 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6279 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6280 auto &M = CGM.getModule(); 6281 Address DependenciesArray = Address::invalid(); 6282 llvm::Value *NumOfElements; 6283 std::tie(NumOfElements, DependenciesArray) = 6284 emitDependClause(CGF, Data.Dependences, Loc); 6285 llvm::Value *DepWaitTaskArgs[6]; 6286 if (!Data.Dependences.empty()) { 6287 DepWaitTaskArgs[0] = UpLoc; 6288 DepWaitTaskArgs[1] = ThreadID; 6289 DepWaitTaskArgs[2] = NumOfElements; 6290 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6291 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6292 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6293 6294 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6295 6296 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6297 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6298 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6299 // is specified. 6300 CGF.EmitRuntimeCall( 6301 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6302 DepWaitTaskArgs); 6303 6304 } else { 6305 6306 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6307 // global_tid); 6308 llvm::Value *Args[] = {UpLoc, ThreadID}; 6309 // Ignore return result until untied tasks are supported. 6310 CGF.EmitRuntimeCall( 6311 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6312 Args); 6313 } 6314 } 6315 6316 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6317 Region->emitUntiedSwitch(CGF); 6318 } 6319 6320 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6321 OpenMPDirectiveKind InnerKind, 6322 const RegionCodeGenTy &CodeGen, 6323 bool HasCancel) { 6324 if (!CGF.HaveInsertPoint()) 6325 return; 6326 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6327 InnerKind != OMPD_critical && 6328 InnerKind != OMPD_master && 6329 InnerKind != OMPD_masked); 6330 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6331 } 6332 6333 namespace { 6334 enum RTCancelKind { 6335 CancelNoreq = 0, 6336 CancelParallel = 1, 6337 CancelLoop = 2, 6338 CancelSections = 3, 6339 CancelTaskgroup = 4 6340 }; 6341 } // anonymous namespace 6342 6343 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6344 RTCancelKind CancelKind = CancelNoreq; 6345 if (CancelRegion == OMPD_parallel) 6346 CancelKind = CancelParallel; 6347 else if (CancelRegion == OMPD_for) 6348 CancelKind = CancelLoop; 6349 else if (CancelRegion == OMPD_sections) 6350 CancelKind = CancelSections; 6351 else { 6352 assert(CancelRegion == OMPD_taskgroup); 6353 CancelKind = CancelTaskgroup; 6354 } 6355 return CancelKind; 6356 } 6357 6358 void CGOpenMPRuntime::emitCancellationPointCall( 6359 CodeGenFunction &CGF, SourceLocation Loc, 6360 OpenMPDirectiveKind CancelRegion) { 6361 if (!CGF.HaveInsertPoint()) 6362 return; 6363 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6364 // global_tid, kmp_int32 cncl_kind); 6365 if (auto *OMPRegionInfo = 6366 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6367 // For 'cancellation point taskgroup', the task region info may not have a 6368 // cancel. This may instead happen in another adjacent task. 6369 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6370 llvm::Value *Args[] = { 6371 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6372 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6373 // Ignore return result until untied tasks are supported. 6374 llvm::Value *Result = CGF.EmitRuntimeCall( 6375 OMPBuilder.getOrCreateRuntimeFunction( 6376 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6377 Args); 6378 // if (__kmpc_cancellationpoint()) { 6379 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6380 // exit from construct; 6381 // } 6382 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6383 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6384 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6385 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6386 CGF.EmitBlock(ExitBB); 6387 if (CancelRegion == OMPD_parallel) 6388 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6389 // exit from construct; 6390 CodeGenFunction::JumpDest CancelDest = 6391 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6392 CGF.EmitBranchThroughCleanup(CancelDest); 6393 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6394 } 6395 } 6396 } 6397 6398 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6399 const Expr *IfCond, 6400 OpenMPDirectiveKind CancelRegion) { 6401 if (!CGF.HaveInsertPoint()) 6402 return; 6403 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6404 // kmp_int32 cncl_kind); 6405 auto &M = CGM.getModule(); 6406 if (auto *OMPRegionInfo = 6407 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6408 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6409 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6410 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6411 llvm::Value *Args[] = { 6412 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6413 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6414 // Ignore return result until untied tasks are supported. 6415 llvm::Value *Result = CGF.EmitRuntimeCall( 6416 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6417 // if (__kmpc_cancel()) { 6418 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6419 // exit from construct; 6420 // } 6421 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6422 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6423 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6424 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6425 CGF.EmitBlock(ExitBB); 6426 if (CancelRegion == OMPD_parallel) 6427 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6428 // exit from construct; 6429 CodeGenFunction::JumpDest CancelDest = 6430 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6431 CGF.EmitBranchThroughCleanup(CancelDest); 6432 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6433 }; 6434 if (IfCond) { 6435 emitIfClause(CGF, IfCond, ThenGen, 6436 [](CodeGenFunction &, PrePostActionTy &) {}); 6437 } else { 6438 RegionCodeGenTy ThenRCG(ThenGen); 6439 ThenRCG(CGF); 6440 } 6441 } 6442 } 6443 6444 namespace { 6445 /// Cleanup action for uses_allocators support. 6446 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6447 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6448 6449 public: 6450 OMPUsesAllocatorsActionTy( 6451 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6452 : Allocators(Allocators) {} 6453 void Enter(CodeGenFunction &CGF) override { 6454 if (!CGF.HaveInsertPoint()) 6455 return; 6456 for (const auto &AllocatorData : Allocators) { 6457 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6458 CGF, AllocatorData.first, AllocatorData.second); 6459 } 6460 } 6461 void Exit(CodeGenFunction &CGF) override { 6462 if (!CGF.HaveInsertPoint()) 6463 return; 6464 for (const auto &AllocatorData : Allocators) { 6465 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6466 AllocatorData.first); 6467 } 6468 } 6469 }; 6470 } // namespace 6471 6472 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6473 const OMPExecutableDirective &D, StringRef ParentName, 6474 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6475 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6476 assert(!ParentName.empty() && "Invalid target region parent name!"); 6477 HasEmittedTargetRegion = true; 6478 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6479 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6480 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6481 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6482 if (!D.AllocatorTraits) 6483 continue; 6484 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6485 } 6486 } 6487 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6488 CodeGen.setAction(UsesAllocatorAction); 6489 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6490 IsOffloadEntry, CodeGen); 6491 } 6492 6493 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6494 const Expr *Allocator, 6495 const Expr *AllocatorTraits) { 6496 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6497 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6498 // Use default memspace handle. 6499 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6500 llvm::Value *NumTraits = llvm::ConstantInt::get( 6501 CGF.IntTy, cast<ConstantArrayType>( 6502 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6503 ->getSize() 6504 .getLimitedValue()); 6505 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6506 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6507 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6508 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6509 AllocatorTraitsLVal.getBaseInfo(), 6510 AllocatorTraitsLVal.getTBAAInfo()); 6511 llvm::Value *Traits = 6512 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6513 6514 llvm::Value *AllocatorVal = 6515 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6516 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6517 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6518 // Store to allocator. 6519 CGF.EmitVarDecl(*cast<VarDecl>( 6520 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6521 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6522 AllocatorVal = 6523 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6524 Allocator->getType(), Allocator->getExprLoc()); 6525 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6526 } 6527 6528 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6529 const Expr *Allocator) { 6530 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6531 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6532 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6533 llvm::Value *AllocatorVal = 6534 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6535 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6536 CGF.getContext().VoidPtrTy, 6537 Allocator->getExprLoc()); 6538 (void)CGF.EmitRuntimeCall( 6539 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6540 OMPRTL___kmpc_destroy_allocator), 6541 {ThreadId, AllocatorVal}); 6542 } 6543 6544 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6545 const OMPExecutableDirective &D, StringRef ParentName, 6546 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6547 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6548 // Create a unique name for the entry function using the source location 6549 // information of the current target region. The name will be something like: 6550 // 6551 // __omp_offloading_DD_FFFF_PP_lBB 6552 // 6553 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6554 // mangled name of the function that encloses the target region and BB is the 6555 // line number of the target region. 6556 6557 unsigned DeviceID; 6558 unsigned FileID; 6559 unsigned Line; 6560 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6561 Line); 6562 SmallString<64> EntryFnName; 6563 { 6564 llvm::raw_svector_ostream OS(EntryFnName); 6565 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6566 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6567 } 6568 6569 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6570 6571 CodeGenFunction CGF(CGM, true); 6572 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6573 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6574 6575 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6576 6577 // If this target outline function is not an offload entry, we don't need to 6578 // register it. 6579 if (!IsOffloadEntry) 6580 return; 6581 6582 // The target region ID is used by the runtime library to identify the current 6583 // target region, so it only has to be unique and not necessarily point to 6584 // anything. It could be the pointer to the outlined function that implements 6585 // the target region, but we aren't using that so that the compiler doesn't 6586 // need to keep that, and could therefore inline the host function if proven 6587 // worthwhile during optimization. In the other hand, if emitting code for the 6588 // device, the ID has to be the function address so that it can retrieved from 6589 // the offloading entry and launched by the runtime library. We also mark the 6590 // outlined function to have external linkage in case we are emitting code for 6591 // the device, because these functions will be entry points to the device. 6592 6593 if (CGM.getLangOpts().OpenMPIsDevice) { 6594 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6595 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6596 OutlinedFn->setDSOLocal(false); 6597 if (CGM.getTriple().isAMDGCN()) 6598 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6599 } else { 6600 std::string Name = getName({EntryFnName, "region_id"}); 6601 OutlinedFnID = new llvm::GlobalVariable( 6602 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6603 llvm::GlobalValue::WeakAnyLinkage, 6604 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6605 } 6606 6607 // Register the information for the entry associated with this target region. 6608 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6609 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6610 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6611 6612 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6613 int32_t DefaultValTeams = -1; 6614 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6615 if (DefaultValTeams > 0) { 6616 OutlinedFn->addFnAttr("omp_target_num_teams", 6617 std::to_string(DefaultValTeams)); 6618 } 6619 int32_t DefaultValThreads = -1; 6620 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6621 if (DefaultValThreads > 0) { 6622 OutlinedFn->addFnAttr("omp_target_thread_limit", 6623 std::to_string(DefaultValThreads)); 6624 } 6625 6626 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6627 } 6628 6629 /// Checks if the expression is constant or does not have non-trivial function 6630 /// calls. 6631 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6632 // We can skip constant expressions. 6633 // We can skip expressions with trivial calls or simple expressions. 6634 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6635 !E->hasNonTrivialCall(Ctx)) && 6636 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6637 } 6638 6639 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6640 const Stmt *Body) { 6641 const Stmt *Child = Body->IgnoreContainers(); 6642 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6643 Child = nullptr; 6644 for (const Stmt *S : C->body()) { 6645 if (const auto *E = dyn_cast<Expr>(S)) { 6646 if (isTrivial(Ctx, E)) 6647 continue; 6648 } 6649 // Some of the statements can be ignored. 6650 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6651 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6652 continue; 6653 // Analyze declarations. 6654 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6655 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6656 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6657 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6658 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6659 isa<UsingDirectiveDecl>(D) || 6660 isa<OMPDeclareReductionDecl>(D) || 6661 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6662 return true; 6663 const auto *VD = dyn_cast<VarDecl>(D); 6664 if (!VD) 6665 return false; 6666 return VD->hasGlobalStorage() || !VD->isUsed(); 6667 })) 6668 continue; 6669 } 6670 // Found multiple children - cannot get the one child only. 6671 if (Child) 6672 return nullptr; 6673 Child = S; 6674 } 6675 if (Child) 6676 Child = Child->IgnoreContainers(); 6677 } 6678 return Child; 6679 } 6680 6681 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6682 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6683 int32_t &DefaultVal) { 6684 6685 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6686 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6687 "Expected target-based executable directive."); 6688 switch (DirectiveKind) { 6689 case OMPD_target: { 6690 const auto *CS = D.getInnermostCapturedStmt(); 6691 const auto *Body = 6692 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6693 const Stmt *ChildStmt = 6694 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6695 if (const auto *NestedDir = 6696 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6697 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6698 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6699 const Expr *NumTeams = 6700 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6701 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6702 if (auto Constant = 6703 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6704 DefaultVal = Constant->getExtValue(); 6705 return NumTeams; 6706 } 6707 DefaultVal = 0; 6708 return nullptr; 6709 } 6710 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6711 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6712 DefaultVal = 1; 6713 return nullptr; 6714 } 6715 DefaultVal = 1; 6716 return nullptr; 6717 } 6718 // A value of -1 is used to check if we need to emit no teams region 6719 DefaultVal = -1; 6720 return nullptr; 6721 } 6722 case OMPD_target_teams: 6723 case OMPD_target_teams_distribute: 6724 case OMPD_target_teams_distribute_simd: 6725 case OMPD_target_teams_distribute_parallel_for: 6726 case OMPD_target_teams_distribute_parallel_for_simd: { 6727 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6728 const Expr *NumTeams = 6729 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6730 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6731 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6732 DefaultVal = Constant->getExtValue(); 6733 return NumTeams; 6734 } 6735 DefaultVal = 0; 6736 return nullptr; 6737 } 6738 case OMPD_target_parallel: 6739 case OMPD_target_parallel_for: 6740 case OMPD_target_parallel_for_simd: 6741 case OMPD_target_simd: 6742 DefaultVal = 1; 6743 return nullptr; 6744 case OMPD_parallel: 6745 case OMPD_for: 6746 case OMPD_parallel_for: 6747 case OMPD_parallel_master: 6748 case OMPD_parallel_sections: 6749 case OMPD_for_simd: 6750 case OMPD_parallel_for_simd: 6751 case OMPD_cancel: 6752 case OMPD_cancellation_point: 6753 case OMPD_ordered: 6754 case OMPD_threadprivate: 6755 case OMPD_allocate: 6756 case OMPD_task: 6757 case OMPD_simd: 6758 case OMPD_tile: 6759 case OMPD_unroll: 6760 case OMPD_sections: 6761 case OMPD_section: 6762 case OMPD_single: 6763 case OMPD_master: 6764 case OMPD_critical: 6765 case OMPD_taskyield: 6766 case OMPD_barrier: 6767 case OMPD_taskwait: 6768 case OMPD_taskgroup: 6769 case OMPD_atomic: 6770 case OMPD_flush: 6771 case OMPD_depobj: 6772 case OMPD_scan: 6773 case OMPD_teams: 6774 case OMPD_target_data: 6775 case OMPD_target_exit_data: 6776 case OMPD_target_enter_data: 6777 case OMPD_distribute: 6778 case OMPD_distribute_simd: 6779 case OMPD_distribute_parallel_for: 6780 case OMPD_distribute_parallel_for_simd: 6781 case OMPD_teams_distribute: 6782 case OMPD_teams_distribute_simd: 6783 case OMPD_teams_distribute_parallel_for: 6784 case OMPD_teams_distribute_parallel_for_simd: 6785 case OMPD_target_update: 6786 case OMPD_declare_simd: 6787 case OMPD_declare_variant: 6788 case OMPD_begin_declare_variant: 6789 case OMPD_end_declare_variant: 6790 case OMPD_declare_target: 6791 case OMPD_end_declare_target: 6792 case OMPD_declare_reduction: 6793 case OMPD_declare_mapper: 6794 case OMPD_taskloop: 6795 case OMPD_taskloop_simd: 6796 case OMPD_master_taskloop: 6797 case OMPD_master_taskloop_simd: 6798 case OMPD_parallel_master_taskloop: 6799 case OMPD_parallel_master_taskloop_simd: 6800 case OMPD_requires: 6801 case OMPD_metadirective: 6802 case OMPD_unknown: 6803 break; 6804 default: 6805 break; 6806 } 6807 llvm_unreachable("Unexpected directive kind."); 6808 } 6809 6810 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6811 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6812 assert(!CGF.getLangOpts().OpenMPIsDevice && 6813 "Clauses associated with the teams directive expected to be emitted " 6814 "only for the host!"); 6815 CGBuilderTy &Bld = CGF.Builder; 6816 int32_t DefaultNT = -1; 6817 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6818 if (NumTeams != nullptr) { 6819 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6820 6821 switch (DirectiveKind) { 6822 case OMPD_target: { 6823 const auto *CS = D.getInnermostCapturedStmt(); 6824 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6825 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6826 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6827 /*IgnoreResultAssign*/ true); 6828 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6829 /*isSigned=*/true); 6830 } 6831 case OMPD_target_teams: 6832 case OMPD_target_teams_distribute: 6833 case OMPD_target_teams_distribute_simd: 6834 case OMPD_target_teams_distribute_parallel_for: 6835 case OMPD_target_teams_distribute_parallel_for_simd: { 6836 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6837 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6838 /*IgnoreResultAssign*/ true); 6839 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6840 /*isSigned=*/true); 6841 } 6842 default: 6843 break; 6844 } 6845 } else if (DefaultNT == -1) { 6846 return nullptr; 6847 } 6848 6849 return Bld.getInt32(DefaultNT); 6850 } 6851 6852 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6853 llvm::Value *DefaultThreadLimitVal) { 6854 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6855 CGF.getContext(), CS->getCapturedStmt()); 6856 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6857 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6858 llvm::Value *NumThreads = nullptr; 6859 llvm::Value *CondVal = nullptr; 6860 // Handle if clause. If if clause present, the number of threads is 6861 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6862 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6863 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6864 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6865 const OMPIfClause *IfClause = nullptr; 6866 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6867 if (C->getNameModifier() == OMPD_unknown || 6868 C->getNameModifier() == OMPD_parallel) { 6869 IfClause = C; 6870 break; 6871 } 6872 } 6873 if (IfClause) { 6874 const Expr *Cond = IfClause->getCondition(); 6875 bool Result; 6876 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6877 if (!Result) 6878 return CGF.Builder.getInt32(1); 6879 } else { 6880 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6881 if (const auto *PreInit = 6882 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6883 for (const auto *I : PreInit->decls()) { 6884 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6885 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6886 } else { 6887 CodeGenFunction::AutoVarEmission Emission = 6888 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6889 CGF.EmitAutoVarCleanups(Emission); 6890 } 6891 } 6892 } 6893 CondVal = CGF.EvaluateExprAsBool(Cond); 6894 } 6895 } 6896 } 6897 // Check the value of num_threads clause iff if clause was not specified 6898 // or is not evaluated to false. 6899 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6900 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6901 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6902 const auto *NumThreadsClause = 6903 Dir->getSingleClause<OMPNumThreadsClause>(); 6904 CodeGenFunction::LexicalScope Scope( 6905 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6906 if (const auto *PreInit = 6907 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6908 for (const auto *I : PreInit->decls()) { 6909 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6910 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6911 } else { 6912 CodeGenFunction::AutoVarEmission Emission = 6913 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6914 CGF.EmitAutoVarCleanups(Emission); 6915 } 6916 } 6917 } 6918 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6919 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6920 /*isSigned=*/false); 6921 if (DefaultThreadLimitVal) 6922 NumThreads = CGF.Builder.CreateSelect( 6923 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6924 DefaultThreadLimitVal, NumThreads); 6925 } else { 6926 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6927 : CGF.Builder.getInt32(0); 6928 } 6929 // Process condition of the if clause. 6930 if (CondVal) { 6931 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6932 CGF.Builder.getInt32(1)); 6933 } 6934 return NumThreads; 6935 } 6936 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6937 return CGF.Builder.getInt32(1); 6938 return DefaultThreadLimitVal; 6939 } 6940 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6941 : CGF.Builder.getInt32(0); 6942 } 6943 6944 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6945 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6946 int32_t &DefaultVal) { 6947 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6948 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6949 "Expected target-based executable directive."); 6950 6951 switch (DirectiveKind) { 6952 case OMPD_target: 6953 // Teams have no clause thread_limit 6954 return nullptr; 6955 case OMPD_target_teams: 6956 case OMPD_target_teams_distribute: 6957 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6958 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6959 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6960 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6961 if (auto Constant = 6962 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6963 DefaultVal = Constant->getExtValue(); 6964 return ThreadLimit; 6965 } 6966 return nullptr; 6967 case OMPD_target_parallel: 6968 case OMPD_target_parallel_for: 6969 case OMPD_target_parallel_for_simd: 6970 case OMPD_target_teams_distribute_parallel_for: 6971 case OMPD_target_teams_distribute_parallel_for_simd: { 6972 Expr *ThreadLimit = nullptr; 6973 Expr *NumThreads = nullptr; 6974 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6975 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6976 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6977 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6978 if (auto Constant = 6979 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6980 DefaultVal = Constant->getExtValue(); 6981 } 6982 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6983 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6984 NumThreads = NumThreadsClause->getNumThreads(); 6985 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6986 if (auto Constant = 6987 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6988 if (Constant->getExtValue() < DefaultVal) { 6989 DefaultVal = Constant->getExtValue(); 6990 ThreadLimit = NumThreads; 6991 } 6992 } 6993 } 6994 } 6995 return ThreadLimit; 6996 } 6997 case OMPD_target_teams_distribute_simd: 6998 case OMPD_target_simd: 6999 DefaultVal = 1; 7000 return nullptr; 7001 case OMPD_parallel: 7002 case OMPD_for: 7003 case OMPD_parallel_for: 7004 case OMPD_parallel_master: 7005 case OMPD_parallel_sections: 7006 case OMPD_for_simd: 7007 case OMPD_parallel_for_simd: 7008 case OMPD_cancel: 7009 case OMPD_cancellation_point: 7010 case OMPD_ordered: 7011 case OMPD_threadprivate: 7012 case OMPD_allocate: 7013 case OMPD_task: 7014 case OMPD_simd: 7015 case OMPD_tile: 7016 case OMPD_unroll: 7017 case OMPD_sections: 7018 case OMPD_section: 7019 case OMPD_single: 7020 case OMPD_master: 7021 case OMPD_critical: 7022 case OMPD_taskyield: 7023 case OMPD_barrier: 7024 case OMPD_taskwait: 7025 case OMPD_taskgroup: 7026 case OMPD_atomic: 7027 case OMPD_flush: 7028 case OMPD_depobj: 7029 case OMPD_scan: 7030 case OMPD_teams: 7031 case OMPD_target_data: 7032 case OMPD_target_exit_data: 7033 case OMPD_target_enter_data: 7034 case OMPD_distribute: 7035 case OMPD_distribute_simd: 7036 case OMPD_distribute_parallel_for: 7037 case OMPD_distribute_parallel_for_simd: 7038 case OMPD_teams_distribute: 7039 case OMPD_teams_distribute_simd: 7040 case OMPD_teams_distribute_parallel_for: 7041 case OMPD_teams_distribute_parallel_for_simd: 7042 case OMPD_target_update: 7043 case OMPD_declare_simd: 7044 case OMPD_declare_variant: 7045 case OMPD_begin_declare_variant: 7046 case OMPD_end_declare_variant: 7047 case OMPD_declare_target: 7048 case OMPD_end_declare_target: 7049 case OMPD_declare_reduction: 7050 case OMPD_declare_mapper: 7051 case OMPD_taskloop: 7052 case OMPD_taskloop_simd: 7053 case OMPD_master_taskloop: 7054 case OMPD_master_taskloop_simd: 7055 case OMPD_parallel_master_taskloop: 7056 case OMPD_parallel_master_taskloop_simd: 7057 case OMPD_requires: 7058 case OMPD_unknown: 7059 break; 7060 default: 7061 break; 7062 } 7063 llvm_unreachable("Unsupported directive kind."); 7064 } 7065 7066 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7067 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7068 assert(!CGF.getLangOpts().OpenMPIsDevice && 7069 "Clauses associated with the teams directive expected to be emitted " 7070 "only for the host!"); 7071 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7072 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7073 "Expected target-based executable directive."); 7074 CGBuilderTy &Bld = CGF.Builder; 7075 llvm::Value *ThreadLimitVal = nullptr; 7076 llvm::Value *NumThreadsVal = nullptr; 7077 switch (DirectiveKind) { 7078 case OMPD_target: { 7079 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7080 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7081 return NumThreads; 7082 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7083 CGF.getContext(), CS->getCapturedStmt()); 7084 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7085 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7086 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7087 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7088 const auto *ThreadLimitClause = 7089 Dir->getSingleClause<OMPThreadLimitClause>(); 7090 CodeGenFunction::LexicalScope Scope( 7091 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7092 if (const auto *PreInit = 7093 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7094 for (const auto *I : PreInit->decls()) { 7095 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7096 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7097 } else { 7098 CodeGenFunction::AutoVarEmission Emission = 7099 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7100 CGF.EmitAutoVarCleanups(Emission); 7101 } 7102 } 7103 } 7104 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7105 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7106 ThreadLimitVal = 7107 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7108 } 7109 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7110 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7111 CS = Dir->getInnermostCapturedStmt(); 7112 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7113 CGF.getContext(), CS->getCapturedStmt()); 7114 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7115 } 7116 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7117 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7118 CS = Dir->getInnermostCapturedStmt(); 7119 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7120 return NumThreads; 7121 } 7122 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7123 return Bld.getInt32(1); 7124 } 7125 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7126 } 7127 case OMPD_target_teams: { 7128 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7129 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7130 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7131 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7132 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7133 ThreadLimitVal = 7134 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7135 } 7136 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7137 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7138 return NumThreads; 7139 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7140 CGF.getContext(), CS->getCapturedStmt()); 7141 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7142 if (Dir->getDirectiveKind() == OMPD_distribute) { 7143 CS = Dir->getInnermostCapturedStmt(); 7144 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7145 return NumThreads; 7146 } 7147 } 7148 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7149 } 7150 case OMPD_target_teams_distribute: 7151 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7152 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7153 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7154 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7155 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7156 ThreadLimitVal = 7157 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7158 } 7159 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7160 case OMPD_target_parallel: 7161 case OMPD_target_parallel_for: 7162 case OMPD_target_parallel_for_simd: 7163 case OMPD_target_teams_distribute_parallel_for: 7164 case OMPD_target_teams_distribute_parallel_for_simd: { 7165 llvm::Value *CondVal = nullptr; 7166 // Handle if clause. If if clause present, the number of threads is 7167 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7168 if (D.hasClausesOfKind<OMPIfClause>()) { 7169 const OMPIfClause *IfClause = nullptr; 7170 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7171 if (C->getNameModifier() == OMPD_unknown || 7172 C->getNameModifier() == OMPD_parallel) { 7173 IfClause = C; 7174 break; 7175 } 7176 } 7177 if (IfClause) { 7178 const Expr *Cond = IfClause->getCondition(); 7179 bool Result; 7180 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7181 if (!Result) 7182 return Bld.getInt32(1); 7183 } else { 7184 CodeGenFunction::RunCleanupsScope Scope(CGF); 7185 CondVal = CGF.EvaluateExprAsBool(Cond); 7186 } 7187 } 7188 } 7189 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7190 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7191 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7192 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7193 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7194 ThreadLimitVal = 7195 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7196 } 7197 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7198 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7199 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7200 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7201 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7202 NumThreadsVal = 7203 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7204 ThreadLimitVal = ThreadLimitVal 7205 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7206 ThreadLimitVal), 7207 NumThreadsVal, ThreadLimitVal) 7208 : NumThreadsVal; 7209 } 7210 if (!ThreadLimitVal) 7211 ThreadLimitVal = Bld.getInt32(0); 7212 if (CondVal) 7213 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7214 return ThreadLimitVal; 7215 } 7216 case OMPD_target_teams_distribute_simd: 7217 case OMPD_target_simd: 7218 return Bld.getInt32(1); 7219 case OMPD_parallel: 7220 case OMPD_for: 7221 case OMPD_parallel_for: 7222 case OMPD_parallel_master: 7223 case OMPD_parallel_sections: 7224 case OMPD_for_simd: 7225 case OMPD_parallel_for_simd: 7226 case OMPD_cancel: 7227 case OMPD_cancellation_point: 7228 case OMPD_ordered: 7229 case OMPD_threadprivate: 7230 case OMPD_allocate: 7231 case OMPD_task: 7232 case OMPD_simd: 7233 case OMPD_tile: 7234 case OMPD_unroll: 7235 case OMPD_sections: 7236 case OMPD_section: 7237 case OMPD_single: 7238 case OMPD_master: 7239 case OMPD_critical: 7240 case OMPD_taskyield: 7241 case OMPD_barrier: 7242 case OMPD_taskwait: 7243 case OMPD_taskgroup: 7244 case OMPD_atomic: 7245 case OMPD_flush: 7246 case OMPD_depobj: 7247 case OMPD_scan: 7248 case OMPD_teams: 7249 case OMPD_target_data: 7250 case OMPD_target_exit_data: 7251 case OMPD_target_enter_data: 7252 case OMPD_distribute: 7253 case OMPD_distribute_simd: 7254 case OMPD_distribute_parallel_for: 7255 case OMPD_distribute_parallel_for_simd: 7256 case OMPD_teams_distribute: 7257 case OMPD_teams_distribute_simd: 7258 case OMPD_teams_distribute_parallel_for: 7259 case OMPD_teams_distribute_parallel_for_simd: 7260 case OMPD_target_update: 7261 case OMPD_declare_simd: 7262 case OMPD_declare_variant: 7263 case OMPD_begin_declare_variant: 7264 case OMPD_end_declare_variant: 7265 case OMPD_declare_target: 7266 case OMPD_end_declare_target: 7267 case OMPD_declare_reduction: 7268 case OMPD_declare_mapper: 7269 case OMPD_taskloop: 7270 case OMPD_taskloop_simd: 7271 case OMPD_master_taskloop: 7272 case OMPD_master_taskloop_simd: 7273 case OMPD_parallel_master_taskloop: 7274 case OMPD_parallel_master_taskloop_simd: 7275 case OMPD_requires: 7276 case OMPD_metadirective: 7277 case OMPD_unknown: 7278 break; 7279 default: 7280 break; 7281 } 7282 llvm_unreachable("Unsupported directive kind."); 7283 } 7284 7285 namespace { 7286 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7287 7288 // Utility to handle information from clauses associated with a given 7289 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7290 // It provides a convenient interface to obtain the information and generate 7291 // code for that information. 7292 class MappableExprsHandler { 7293 public: 7294 /// Values for bit flags used to specify the mapping type for 7295 /// offloading. 7296 enum OpenMPOffloadMappingFlags : uint64_t { 7297 /// No flags 7298 OMP_MAP_NONE = 0x0, 7299 /// Allocate memory on the device and move data from host to device. 7300 OMP_MAP_TO = 0x01, 7301 /// Allocate memory on the device and move data from device to host. 7302 OMP_MAP_FROM = 0x02, 7303 /// Always perform the requested mapping action on the element, even 7304 /// if it was already mapped before. 7305 OMP_MAP_ALWAYS = 0x04, 7306 /// Delete the element from the device environment, ignoring the 7307 /// current reference count associated with the element. 7308 OMP_MAP_DELETE = 0x08, 7309 /// The element being mapped is a pointer-pointee pair; both the 7310 /// pointer and the pointee should be mapped. 7311 OMP_MAP_PTR_AND_OBJ = 0x10, 7312 /// This flags signals that the base address of an entry should be 7313 /// passed to the target kernel as an argument. 7314 OMP_MAP_TARGET_PARAM = 0x20, 7315 /// Signal that the runtime library has to return the device pointer 7316 /// in the current position for the data being mapped. Used when we have the 7317 /// use_device_ptr or use_device_addr clause. 7318 OMP_MAP_RETURN_PARAM = 0x40, 7319 /// This flag signals that the reference being passed is a pointer to 7320 /// private data. 7321 OMP_MAP_PRIVATE = 0x80, 7322 /// Pass the element to the device by value. 7323 OMP_MAP_LITERAL = 0x100, 7324 /// Implicit map 7325 OMP_MAP_IMPLICIT = 0x200, 7326 /// Close is a hint to the runtime to allocate memory close to 7327 /// the target device. 7328 OMP_MAP_CLOSE = 0x400, 7329 /// 0x800 is reserved for compatibility with XLC. 7330 /// Produce a runtime error if the data is not already allocated. 7331 OMP_MAP_PRESENT = 0x1000, 7332 // Increment and decrement a separate reference counter so that the data 7333 // cannot be unmapped within the associated region. Thus, this flag is 7334 // intended to be used on 'target' and 'target data' directives because they 7335 // are inherently structured. It is not intended to be used on 'target 7336 // enter data' and 'target exit data' directives because they are inherently 7337 // dynamic. 7338 // This is an OpenMP extension for the sake of OpenACC support. 7339 OMP_MAP_OMPX_HOLD = 0x2000, 7340 /// Signal that the runtime library should use args as an array of 7341 /// descriptor_dim pointers and use args_size as dims. Used when we have 7342 /// non-contiguous list items in target update directive 7343 OMP_MAP_NON_CONTIG = 0x100000000000, 7344 /// The 16 MSBs of the flags indicate whether the entry is member of some 7345 /// struct/class. 7346 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7347 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7348 }; 7349 7350 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7351 static unsigned getFlagMemberOffset() { 7352 unsigned Offset = 0; 7353 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7354 Remain = Remain >> 1) 7355 Offset++; 7356 return Offset; 7357 } 7358 7359 /// Class that holds debugging information for a data mapping to be passed to 7360 /// the runtime library. 7361 class MappingExprInfo { 7362 /// The variable declaration used for the data mapping. 7363 const ValueDecl *MapDecl = nullptr; 7364 /// The original expression used in the map clause, or null if there is 7365 /// none. 7366 const Expr *MapExpr = nullptr; 7367 7368 public: 7369 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7370 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7371 7372 const ValueDecl *getMapDecl() const { return MapDecl; } 7373 const Expr *getMapExpr() const { return MapExpr; } 7374 }; 7375 7376 /// Class that associates information with a base pointer to be passed to the 7377 /// runtime library. 7378 class BasePointerInfo { 7379 /// The base pointer. 7380 llvm::Value *Ptr = nullptr; 7381 /// The base declaration that refers to this device pointer, or null if 7382 /// there is none. 7383 const ValueDecl *DevPtrDecl = nullptr; 7384 7385 public: 7386 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7387 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7388 llvm::Value *operator*() const { return Ptr; } 7389 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7390 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7391 }; 7392 7393 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7394 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7395 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7396 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7397 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7398 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7399 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7400 7401 /// This structure contains combined information generated for mappable 7402 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7403 /// mappers, and non-contiguous information. 7404 struct MapCombinedInfoTy { 7405 struct StructNonContiguousInfo { 7406 bool IsNonContiguous = false; 7407 MapDimArrayTy Dims; 7408 MapNonContiguousArrayTy Offsets; 7409 MapNonContiguousArrayTy Counts; 7410 MapNonContiguousArrayTy Strides; 7411 }; 7412 MapExprsArrayTy Exprs; 7413 MapBaseValuesArrayTy BasePointers; 7414 MapValuesArrayTy Pointers; 7415 MapValuesArrayTy Sizes; 7416 MapFlagsArrayTy Types; 7417 MapMappersArrayTy Mappers; 7418 StructNonContiguousInfo NonContigInfo; 7419 7420 /// Append arrays in \a CurInfo. 7421 void append(MapCombinedInfoTy &CurInfo) { 7422 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7423 BasePointers.append(CurInfo.BasePointers.begin(), 7424 CurInfo.BasePointers.end()); 7425 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7426 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7427 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7428 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7429 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7430 CurInfo.NonContigInfo.Dims.end()); 7431 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7432 CurInfo.NonContigInfo.Offsets.end()); 7433 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7434 CurInfo.NonContigInfo.Counts.end()); 7435 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7436 CurInfo.NonContigInfo.Strides.end()); 7437 } 7438 }; 7439 7440 /// Map between a struct and the its lowest & highest elements which have been 7441 /// mapped. 7442 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7443 /// HE(FieldIndex, Pointer)} 7444 struct StructRangeInfoTy { 7445 MapCombinedInfoTy PreliminaryMapData; 7446 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7447 0, Address::invalid()}; 7448 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7449 0, Address::invalid()}; 7450 Address Base = Address::invalid(); 7451 Address LB = Address::invalid(); 7452 bool IsArraySection = false; 7453 bool HasCompleteRecord = false; 7454 }; 7455 7456 private: 7457 /// Kind that defines how a device pointer has to be returned. 7458 struct MapInfo { 7459 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7460 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7461 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7462 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7463 bool ReturnDevicePointer = false; 7464 bool IsImplicit = false; 7465 const ValueDecl *Mapper = nullptr; 7466 const Expr *VarRef = nullptr; 7467 bool ForDeviceAddr = false; 7468 7469 MapInfo() = default; 7470 MapInfo( 7471 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7472 OpenMPMapClauseKind MapType, 7473 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7474 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7475 bool ReturnDevicePointer, bool IsImplicit, 7476 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7477 bool ForDeviceAddr = false) 7478 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7479 MotionModifiers(MotionModifiers), 7480 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7481 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7482 }; 7483 7484 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7485 /// member and there is no map information about it, then emission of that 7486 /// entry is deferred until the whole struct has been processed. 7487 struct DeferredDevicePtrEntryTy { 7488 const Expr *IE = nullptr; 7489 const ValueDecl *VD = nullptr; 7490 bool ForDeviceAddr = false; 7491 7492 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7493 bool ForDeviceAddr) 7494 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7495 }; 7496 7497 /// The target directive from where the mappable clauses were extracted. It 7498 /// is either a executable directive or a user-defined mapper directive. 7499 llvm::PointerUnion<const OMPExecutableDirective *, 7500 const OMPDeclareMapperDecl *> 7501 CurDir; 7502 7503 /// Function the directive is being generated for. 7504 CodeGenFunction &CGF; 7505 7506 /// Set of all first private variables in the current directive. 7507 /// bool data is set to true if the variable is implicitly marked as 7508 /// firstprivate, false otherwise. 7509 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7510 7511 /// Map between device pointer declarations and their expression components. 7512 /// The key value for declarations in 'this' is null. 7513 llvm::DenseMap< 7514 const ValueDecl *, 7515 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7516 DevPointersMap; 7517 7518 /// Map between lambda declarations and their map type. 7519 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7520 7521 llvm::Value *getExprTypeSize(const Expr *E) const { 7522 QualType ExprTy = E->getType().getCanonicalType(); 7523 7524 // Calculate the size for array shaping expression. 7525 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7526 llvm::Value *Size = 7527 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7528 for (const Expr *SE : OAE->getDimensions()) { 7529 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7530 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7531 CGF.getContext().getSizeType(), 7532 SE->getExprLoc()); 7533 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7534 } 7535 return Size; 7536 } 7537 7538 // Reference types are ignored for mapping purposes. 7539 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7540 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7541 7542 // Given that an array section is considered a built-in type, we need to 7543 // do the calculation based on the length of the section instead of relying 7544 // on CGF.getTypeSize(E->getType()). 7545 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7546 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7547 OAE->getBase()->IgnoreParenImpCasts()) 7548 .getCanonicalType(); 7549 7550 // If there is no length associated with the expression and lower bound is 7551 // not specified too, that means we are using the whole length of the 7552 // base. 7553 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7554 !OAE->getLowerBound()) 7555 return CGF.getTypeSize(BaseTy); 7556 7557 llvm::Value *ElemSize; 7558 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7559 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7560 } else { 7561 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7562 assert(ATy && "Expecting array type if not a pointer type."); 7563 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7564 } 7565 7566 // If we don't have a length at this point, that is because we have an 7567 // array section with a single element. 7568 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7569 return ElemSize; 7570 7571 if (const Expr *LenExpr = OAE->getLength()) { 7572 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7573 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7574 CGF.getContext().getSizeType(), 7575 LenExpr->getExprLoc()); 7576 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7577 } 7578 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7579 OAE->getLowerBound() && "expected array_section[lb:]."); 7580 // Size = sizetype - lb * elemtype; 7581 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7582 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7583 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7584 CGF.getContext().getSizeType(), 7585 OAE->getLowerBound()->getExprLoc()); 7586 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7587 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7588 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7589 LengthVal = CGF.Builder.CreateSelect( 7590 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7591 return LengthVal; 7592 } 7593 return CGF.getTypeSize(ExprTy); 7594 } 7595 7596 /// Return the corresponding bits for a given map clause modifier. Add 7597 /// a flag marking the map as a pointer if requested. Add a flag marking the 7598 /// map as the first one of a series of maps that relate to the same map 7599 /// expression. 7600 OpenMPOffloadMappingFlags getMapTypeBits( 7601 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7602 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7603 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7604 OpenMPOffloadMappingFlags Bits = 7605 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7606 switch (MapType) { 7607 case OMPC_MAP_alloc: 7608 case OMPC_MAP_release: 7609 // alloc and release is the default behavior in the runtime library, i.e. 7610 // if we don't pass any bits alloc/release that is what the runtime is 7611 // going to do. Therefore, we don't need to signal anything for these two 7612 // type modifiers. 7613 break; 7614 case OMPC_MAP_to: 7615 Bits |= OMP_MAP_TO; 7616 break; 7617 case OMPC_MAP_from: 7618 Bits |= OMP_MAP_FROM; 7619 break; 7620 case OMPC_MAP_tofrom: 7621 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7622 break; 7623 case OMPC_MAP_delete: 7624 Bits |= OMP_MAP_DELETE; 7625 break; 7626 case OMPC_MAP_unknown: 7627 llvm_unreachable("Unexpected map type!"); 7628 } 7629 if (AddPtrFlag) 7630 Bits |= OMP_MAP_PTR_AND_OBJ; 7631 if (AddIsTargetParamFlag) 7632 Bits |= OMP_MAP_TARGET_PARAM; 7633 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7634 Bits |= OMP_MAP_ALWAYS; 7635 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7636 Bits |= OMP_MAP_CLOSE; 7637 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7638 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7639 Bits |= OMP_MAP_PRESENT; 7640 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7641 Bits |= OMP_MAP_OMPX_HOLD; 7642 if (IsNonContiguous) 7643 Bits |= OMP_MAP_NON_CONTIG; 7644 return Bits; 7645 } 7646 7647 /// Return true if the provided expression is a final array section. A 7648 /// final array section, is one whose length can't be proved to be one. 7649 bool isFinalArraySectionExpression(const Expr *E) const { 7650 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7651 7652 // It is not an array section and therefore not a unity-size one. 7653 if (!OASE) 7654 return false; 7655 7656 // An array section with no colon always refer to a single element. 7657 if (OASE->getColonLocFirst().isInvalid()) 7658 return false; 7659 7660 const Expr *Length = OASE->getLength(); 7661 7662 // If we don't have a length we have to check if the array has size 1 7663 // for this dimension. Also, we should always expect a length if the 7664 // base type is pointer. 7665 if (!Length) { 7666 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7667 OASE->getBase()->IgnoreParenImpCasts()) 7668 .getCanonicalType(); 7669 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7670 return ATy->getSize().getSExtValue() != 1; 7671 // If we don't have a constant dimension length, we have to consider 7672 // the current section as having any size, so it is not necessarily 7673 // unitary. If it happen to be unity size, that's user fault. 7674 return true; 7675 } 7676 7677 // Check if the length evaluates to 1. 7678 Expr::EvalResult Result; 7679 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7680 return true; // Can have more that size 1. 7681 7682 llvm::APSInt ConstLength = Result.Val.getInt(); 7683 return ConstLength.getSExtValue() != 1; 7684 } 7685 7686 /// Generate the base pointers, section pointers, sizes, map type bits, and 7687 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7688 /// map type, map or motion modifiers, and expression components. 7689 /// \a IsFirstComponent should be set to true if the provided set of 7690 /// components is the first associated with a capture. 7691 void generateInfoForComponentList( 7692 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7693 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7694 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7695 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7696 bool IsFirstComponentList, bool IsImplicit, 7697 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7698 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7699 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7700 OverlappedElements = llvm::None) const { 7701 // The following summarizes what has to be generated for each map and the 7702 // types below. The generated information is expressed in this order: 7703 // base pointer, section pointer, size, flags 7704 // (to add to the ones that come from the map type and modifier). 7705 // 7706 // double d; 7707 // int i[100]; 7708 // float *p; 7709 // 7710 // struct S1 { 7711 // int i; 7712 // float f[50]; 7713 // } 7714 // struct S2 { 7715 // int i; 7716 // float f[50]; 7717 // S1 s; 7718 // double *p; 7719 // struct S2 *ps; 7720 // int &ref; 7721 // } 7722 // S2 s; 7723 // S2 *ps; 7724 // 7725 // map(d) 7726 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7727 // 7728 // map(i) 7729 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7730 // 7731 // map(i[1:23]) 7732 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7733 // 7734 // map(p) 7735 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7736 // 7737 // map(p[1:24]) 7738 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7739 // in unified shared memory mode or for local pointers 7740 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7741 // 7742 // map(s) 7743 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7744 // 7745 // map(s.i) 7746 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7747 // 7748 // map(s.s.f) 7749 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7750 // 7751 // map(s.p) 7752 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7753 // 7754 // map(to: s.p[:22]) 7755 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7756 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7757 // &(s.p), &(s.p[0]), 22*sizeof(double), 7758 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7759 // (*) alloc space for struct members, only this is a target parameter 7760 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7761 // optimizes this entry out, same in the examples below) 7762 // (***) map the pointee (map: to) 7763 // 7764 // map(to: s.ref) 7765 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7766 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7767 // (*) alloc space for struct members, only this is a target parameter 7768 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7769 // optimizes this entry out, same in the examples below) 7770 // (***) map the pointee (map: to) 7771 // 7772 // map(s.ps) 7773 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7774 // 7775 // map(from: s.ps->s.i) 7776 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7777 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7778 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7779 // 7780 // map(to: s.ps->ps) 7781 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7782 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7783 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7784 // 7785 // map(s.ps->ps->ps) 7786 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7787 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7788 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7789 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7790 // 7791 // map(to: s.ps->ps->s.f[:22]) 7792 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7793 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7794 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7795 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7796 // 7797 // map(ps) 7798 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7799 // 7800 // map(ps->i) 7801 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7802 // 7803 // map(ps->s.f) 7804 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7805 // 7806 // map(from: ps->p) 7807 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7808 // 7809 // map(to: ps->p[:22]) 7810 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7811 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7812 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7813 // 7814 // map(ps->ps) 7815 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7816 // 7817 // map(from: ps->ps->s.i) 7818 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7819 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7820 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7821 // 7822 // map(from: ps->ps->ps) 7823 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7824 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7825 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7826 // 7827 // map(ps->ps->ps->ps) 7828 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7829 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7830 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7831 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7832 // 7833 // map(to: ps->ps->ps->s.f[:22]) 7834 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7835 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7836 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7837 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7838 // 7839 // map(to: s.f[:22]) map(from: s.p[:33]) 7840 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7841 // sizeof(double*) (**), TARGET_PARAM 7842 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7843 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7844 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7845 // (*) allocate contiguous space needed to fit all mapped members even if 7846 // we allocate space for members not mapped (in this example, 7847 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7848 // them as well because they fall between &s.f[0] and &s.p) 7849 // 7850 // map(from: s.f[:22]) map(to: ps->p[:33]) 7851 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7852 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7853 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7854 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7855 // (*) the struct this entry pertains to is the 2nd element in the list of 7856 // arguments, hence MEMBER_OF(2) 7857 // 7858 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7859 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7860 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7861 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7862 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7863 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7864 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7865 // (*) the struct this entry pertains to is the 4th element in the list 7866 // of arguments, hence MEMBER_OF(4) 7867 7868 // Track if the map information being generated is the first for a capture. 7869 bool IsCaptureFirstInfo = IsFirstComponentList; 7870 // When the variable is on a declare target link or in a to clause with 7871 // unified memory, a reference is needed to hold the host/device address 7872 // of the variable. 7873 bool RequiresReference = false; 7874 7875 // Scan the components from the base to the complete expression. 7876 auto CI = Components.rbegin(); 7877 auto CE = Components.rend(); 7878 auto I = CI; 7879 7880 // Track if the map information being generated is the first for a list of 7881 // components. 7882 bool IsExpressionFirstInfo = true; 7883 bool FirstPointerInComplexData = false; 7884 Address BP = Address::invalid(); 7885 const Expr *AssocExpr = I->getAssociatedExpression(); 7886 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7887 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7888 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7889 7890 if (isa<MemberExpr>(AssocExpr)) { 7891 // The base is the 'this' pointer. The content of the pointer is going 7892 // to be the base of the field being mapped. 7893 BP = CGF.LoadCXXThisAddress(); 7894 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7895 (OASE && 7896 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7897 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7898 } else if (OAShE && 7899 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7900 BP = Address( 7901 CGF.EmitScalarExpr(OAShE->getBase()), 7902 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7903 } else { 7904 // The base is the reference to the variable. 7905 // BP = &Var. 7906 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7907 if (const auto *VD = 7908 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7909 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7910 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7911 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7912 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7913 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7914 RequiresReference = true; 7915 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7916 } 7917 } 7918 } 7919 7920 // If the variable is a pointer and is being dereferenced (i.e. is not 7921 // the last component), the base has to be the pointer itself, not its 7922 // reference. References are ignored for mapping purposes. 7923 QualType Ty = 7924 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7925 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7926 // No need to generate individual map information for the pointer, it 7927 // can be associated with the combined storage if shared memory mode is 7928 // active or the base declaration is not global variable. 7929 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7930 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7931 !VD || VD->hasLocalStorage()) 7932 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7933 else 7934 FirstPointerInComplexData = true; 7935 ++I; 7936 } 7937 } 7938 7939 // Track whether a component of the list should be marked as MEMBER_OF some 7940 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7941 // in a component list should be marked as MEMBER_OF, all subsequent entries 7942 // do not belong to the base struct. E.g. 7943 // struct S2 s; 7944 // s.ps->ps->ps->f[:] 7945 // (1) (2) (3) (4) 7946 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7947 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7948 // is the pointee of ps(2) which is not member of struct s, so it should not 7949 // be marked as such (it is still PTR_AND_OBJ). 7950 // The variable is initialized to false so that PTR_AND_OBJ entries which 7951 // are not struct members are not considered (e.g. array of pointers to 7952 // data). 7953 bool ShouldBeMemberOf = false; 7954 7955 // Variable keeping track of whether or not we have encountered a component 7956 // in the component list which is a member expression. Useful when we have a 7957 // pointer or a final array section, in which case it is the previous 7958 // component in the list which tells us whether we have a member expression. 7959 // E.g. X.f[:] 7960 // While processing the final array section "[:]" it is "f" which tells us 7961 // whether we are dealing with a member of a declared struct. 7962 const MemberExpr *EncounteredME = nullptr; 7963 7964 // Track for the total number of dimension. Start from one for the dummy 7965 // dimension. 7966 uint64_t DimSize = 1; 7967 7968 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7969 bool IsPrevMemberReference = false; 7970 7971 for (; I != CE; ++I) { 7972 // If the current component is member of a struct (parent struct) mark it. 7973 if (!EncounteredME) { 7974 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7975 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7976 // as MEMBER_OF the parent struct. 7977 if (EncounteredME) { 7978 ShouldBeMemberOf = true; 7979 // Do not emit as complex pointer if this is actually not array-like 7980 // expression. 7981 if (FirstPointerInComplexData) { 7982 QualType Ty = std::prev(I) 7983 ->getAssociatedDeclaration() 7984 ->getType() 7985 .getNonReferenceType(); 7986 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7987 FirstPointerInComplexData = false; 7988 } 7989 } 7990 } 7991 7992 auto Next = std::next(I); 7993 7994 // We need to generate the addresses and sizes if this is the last 7995 // component, if the component is a pointer or if it is an array section 7996 // whose length can't be proved to be one. If this is a pointer, it 7997 // becomes the base address for the following components. 7998 7999 // A final array section, is one whose length can't be proved to be one. 8000 // If the map item is non-contiguous then we don't treat any array section 8001 // as final array section. 8002 bool IsFinalArraySection = 8003 !IsNonContiguous && 8004 isFinalArraySectionExpression(I->getAssociatedExpression()); 8005 8006 // If we have a declaration for the mapping use that, otherwise use 8007 // the base declaration of the map clause. 8008 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 8009 ? I->getAssociatedDeclaration() 8010 : BaseDecl; 8011 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 8012 : MapExpr; 8013 8014 // Get information on whether the element is a pointer. Have to do a 8015 // special treatment for array sections given that they are built-in 8016 // types. 8017 const auto *OASE = 8018 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 8019 const auto *OAShE = 8020 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 8021 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 8022 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 8023 bool IsPointer = 8024 OAShE || 8025 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8026 .getCanonicalType() 8027 ->isAnyPointerType()) || 8028 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8029 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 8030 MapDecl && 8031 MapDecl->getType()->isLValueReferenceType(); 8032 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 8033 8034 if (OASE) 8035 ++DimSize; 8036 8037 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8038 IsFinalArraySection) { 8039 // If this is not the last component, we expect the pointer to be 8040 // associated with an array expression or member expression. 8041 assert((Next == CE || 8042 isa<MemberExpr>(Next->getAssociatedExpression()) || 8043 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8044 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8045 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8046 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8047 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8048 "Unexpected expression"); 8049 8050 Address LB = Address::invalid(); 8051 Address LowestElem = Address::invalid(); 8052 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8053 const MemberExpr *E) { 8054 const Expr *BaseExpr = E->getBase(); 8055 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8056 // scalar. 8057 LValue BaseLV; 8058 if (E->isArrow()) { 8059 LValueBaseInfo BaseInfo; 8060 TBAAAccessInfo TBAAInfo; 8061 Address Addr = 8062 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8063 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8064 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8065 } else { 8066 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8067 } 8068 return BaseLV; 8069 }; 8070 if (OAShE) { 8071 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 8072 CGF.getContext().getTypeAlignInChars( 8073 OAShE->getBase()->getType())); 8074 } else if (IsMemberReference) { 8075 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8076 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8077 LowestElem = CGF.EmitLValueForFieldInitialization( 8078 BaseLVal, cast<FieldDecl>(MapDecl)) 8079 .getAddress(CGF); 8080 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8081 .getAddress(CGF); 8082 } else { 8083 LowestElem = LB = 8084 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8085 .getAddress(CGF); 8086 } 8087 8088 // If this component is a pointer inside the base struct then we don't 8089 // need to create any entry for it - it will be combined with the object 8090 // it is pointing to into a single PTR_AND_OBJ entry. 8091 bool IsMemberPointerOrAddr = 8092 EncounteredME && 8093 (((IsPointer || ForDeviceAddr) && 8094 I->getAssociatedExpression() == EncounteredME) || 8095 (IsPrevMemberReference && !IsPointer) || 8096 (IsMemberReference && Next != CE && 8097 !Next->getAssociatedExpression()->getType()->isPointerType())); 8098 if (!OverlappedElements.empty() && Next == CE) { 8099 // Handle base element with the info for overlapped elements. 8100 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8101 assert(!IsPointer && 8102 "Unexpected base element with the pointer type."); 8103 // Mark the whole struct as the struct that requires allocation on the 8104 // device. 8105 PartialStruct.LowestElem = {0, LowestElem}; 8106 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8107 I->getAssociatedExpression()->getType()); 8108 Address HB = CGF.Builder.CreateConstGEP( 8109 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 8110 CGF.VoidPtrTy), 8111 TypeSize.getQuantity() - 1); 8112 PartialStruct.HighestElem = { 8113 std::numeric_limits<decltype( 8114 PartialStruct.HighestElem.first)>::max(), 8115 HB}; 8116 PartialStruct.Base = BP; 8117 PartialStruct.LB = LB; 8118 assert( 8119 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8120 "Overlapped elements must be used only once for the variable."); 8121 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8122 // Emit data for non-overlapped data. 8123 OpenMPOffloadMappingFlags Flags = 8124 OMP_MAP_MEMBER_OF | 8125 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8126 /*AddPtrFlag=*/false, 8127 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8128 llvm::Value *Size = nullptr; 8129 // Do bitcopy of all non-overlapped structure elements. 8130 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8131 Component : OverlappedElements) { 8132 Address ComponentLB = Address::invalid(); 8133 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8134 Component) { 8135 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8136 const auto *FD = dyn_cast<FieldDecl>(VD); 8137 if (FD && FD->getType()->isLValueReferenceType()) { 8138 const auto *ME = 8139 cast<MemberExpr>(MC.getAssociatedExpression()); 8140 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8141 ComponentLB = 8142 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8143 .getAddress(CGF); 8144 } else { 8145 ComponentLB = 8146 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8147 .getAddress(CGF); 8148 } 8149 Size = CGF.Builder.CreatePtrDiff( 8150 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8151 CGF.EmitCastToVoidPtr(LB.getPointer())); 8152 break; 8153 } 8154 } 8155 assert(Size && "Failed to determine structure size"); 8156 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8157 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8158 CombinedInfo.Pointers.push_back(LB.getPointer()); 8159 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8160 Size, CGF.Int64Ty, /*isSigned=*/true)); 8161 CombinedInfo.Types.push_back(Flags); 8162 CombinedInfo.Mappers.push_back(nullptr); 8163 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8164 : 1); 8165 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8166 } 8167 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8168 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8169 CombinedInfo.Pointers.push_back(LB.getPointer()); 8170 Size = CGF.Builder.CreatePtrDiff( 8171 CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8172 CGF.EmitCastToVoidPtr(LB.getPointer())); 8173 CombinedInfo.Sizes.push_back( 8174 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8175 CombinedInfo.Types.push_back(Flags); 8176 CombinedInfo.Mappers.push_back(nullptr); 8177 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8178 : 1); 8179 break; 8180 } 8181 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8182 if (!IsMemberPointerOrAddr || 8183 (Next == CE && MapType != OMPC_MAP_unknown)) { 8184 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8185 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8186 CombinedInfo.Pointers.push_back(LB.getPointer()); 8187 CombinedInfo.Sizes.push_back( 8188 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8189 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8190 : 1); 8191 8192 // If Mapper is valid, the last component inherits the mapper. 8193 bool HasMapper = Mapper && Next == CE; 8194 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8195 8196 // We need to add a pointer flag for each map that comes from the 8197 // same expression except for the first one. We also need to signal 8198 // this map is the first one that relates with the current capture 8199 // (there is a set of entries for each capture). 8200 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8201 MapType, MapModifiers, MotionModifiers, IsImplicit, 8202 !IsExpressionFirstInfo || RequiresReference || 8203 FirstPointerInComplexData || IsMemberReference, 8204 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8205 8206 if (!IsExpressionFirstInfo || IsMemberReference) { 8207 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8208 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8209 if (IsPointer || (IsMemberReference && Next != CE)) 8210 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8211 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8212 8213 if (ShouldBeMemberOf) { 8214 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8215 // should be later updated with the correct value of MEMBER_OF. 8216 Flags |= OMP_MAP_MEMBER_OF; 8217 // From now on, all subsequent PTR_AND_OBJ entries should not be 8218 // marked as MEMBER_OF. 8219 ShouldBeMemberOf = false; 8220 } 8221 } 8222 8223 CombinedInfo.Types.push_back(Flags); 8224 } 8225 8226 // If we have encountered a member expression so far, keep track of the 8227 // mapped member. If the parent is "*this", then the value declaration 8228 // is nullptr. 8229 if (EncounteredME) { 8230 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8231 unsigned FieldIndex = FD->getFieldIndex(); 8232 8233 // Update info about the lowest and highest elements for this struct 8234 if (!PartialStruct.Base.isValid()) { 8235 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8236 if (IsFinalArraySection) { 8237 Address HB = 8238 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8239 .getAddress(CGF); 8240 PartialStruct.HighestElem = {FieldIndex, HB}; 8241 } else { 8242 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8243 } 8244 PartialStruct.Base = BP; 8245 PartialStruct.LB = BP; 8246 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8247 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8248 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8249 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8250 } 8251 } 8252 8253 // Need to emit combined struct for array sections. 8254 if (IsFinalArraySection || IsNonContiguous) 8255 PartialStruct.IsArraySection = true; 8256 8257 // If we have a final array section, we are done with this expression. 8258 if (IsFinalArraySection) 8259 break; 8260 8261 // The pointer becomes the base for the next element. 8262 if (Next != CE) 8263 BP = IsMemberReference ? LowestElem : LB; 8264 8265 IsExpressionFirstInfo = false; 8266 IsCaptureFirstInfo = false; 8267 FirstPointerInComplexData = false; 8268 IsPrevMemberReference = IsMemberReference; 8269 } else if (FirstPointerInComplexData) { 8270 QualType Ty = Components.rbegin() 8271 ->getAssociatedDeclaration() 8272 ->getType() 8273 .getNonReferenceType(); 8274 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8275 FirstPointerInComplexData = false; 8276 } 8277 } 8278 // If ran into the whole component - allocate the space for the whole 8279 // record. 8280 if (!EncounteredME) 8281 PartialStruct.HasCompleteRecord = true; 8282 8283 if (!IsNonContiguous) 8284 return; 8285 8286 const ASTContext &Context = CGF.getContext(); 8287 8288 // For supporting stride in array section, we need to initialize the first 8289 // dimension size as 1, first offset as 0, and first count as 1 8290 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8291 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8292 MapValuesArrayTy CurStrides; 8293 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8294 uint64_t ElementTypeSize; 8295 8296 // Collect Size information for each dimension and get the element size as 8297 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8298 // should be [10, 10] and the first stride is 4 btyes. 8299 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8300 Components) { 8301 const Expr *AssocExpr = Component.getAssociatedExpression(); 8302 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8303 8304 if (!OASE) 8305 continue; 8306 8307 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8308 auto *CAT = Context.getAsConstantArrayType(Ty); 8309 auto *VAT = Context.getAsVariableArrayType(Ty); 8310 8311 // We need all the dimension size except for the last dimension. 8312 assert((VAT || CAT || &Component == &*Components.begin()) && 8313 "Should be either ConstantArray or VariableArray if not the " 8314 "first Component"); 8315 8316 // Get element size if CurStrides is empty. 8317 if (CurStrides.empty()) { 8318 const Type *ElementType = nullptr; 8319 if (CAT) 8320 ElementType = CAT->getElementType().getTypePtr(); 8321 else if (VAT) 8322 ElementType = VAT->getElementType().getTypePtr(); 8323 else 8324 assert(&Component == &*Components.begin() && 8325 "Only expect pointer (non CAT or VAT) when this is the " 8326 "first Component"); 8327 // If ElementType is null, then it means the base is a pointer 8328 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8329 // for next iteration. 8330 if (ElementType) { 8331 // For the case that having pointer as base, we need to remove one 8332 // level of indirection. 8333 if (&Component != &*Components.begin()) 8334 ElementType = ElementType->getPointeeOrArrayElementType(); 8335 ElementTypeSize = 8336 Context.getTypeSizeInChars(ElementType).getQuantity(); 8337 CurStrides.push_back( 8338 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8339 } 8340 } 8341 // Get dimension value except for the last dimension since we don't need 8342 // it. 8343 if (DimSizes.size() < Components.size() - 1) { 8344 if (CAT) 8345 DimSizes.push_back(llvm::ConstantInt::get( 8346 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8347 else if (VAT) 8348 DimSizes.push_back(CGF.Builder.CreateIntCast( 8349 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8350 /*IsSigned=*/false)); 8351 } 8352 } 8353 8354 // Skip the dummy dimension since we have already have its information. 8355 auto DI = DimSizes.begin() + 1; 8356 // Product of dimension. 8357 llvm::Value *DimProd = 8358 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8359 8360 // Collect info for non-contiguous. Notice that offset, count, and stride 8361 // are only meaningful for array-section, so we insert a null for anything 8362 // other than array-section. 8363 // Also, the size of offset, count, and stride are not the same as 8364 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8365 // count, and stride are the same as the number of non-contiguous 8366 // declaration in target update to/from clause. 8367 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8368 Components) { 8369 const Expr *AssocExpr = Component.getAssociatedExpression(); 8370 8371 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8372 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8373 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8374 /*isSigned=*/false); 8375 CurOffsets.push_back(Offset); 8376 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8377 CurStrides.push_back(CurStrides.back()); 8378 continue; 8379 } 8380 8381 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8382 8383 if (!OASE) 8384 continue; 8385 8386 // Offset 8387 const Expr *OffsetExpr = OASE->getLowerBound(); 8388 llvm::Value *Offset = nullptr; 8389 if (!OffsetExpr) { 8390 // If offset is absent, then we just set it to zero. 8391 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8392 } else { 8393 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8394 CGF.Int64Ty, 8395 /*isSigned=*/false); 8396 } 8397 CurOffsets.push_back(Offset); 8398 8399 // Count 8400 const Expr *CountExpr = OASE->getLength(); 8401 llvm::Value *Count = nullptr; 8402 if (!CountExpr) { 8403 // In Clang, once a high dimension is an array section, we construct all 8404 // the lower dimension as array section, however, for case like 8405 // arr[0:2][2], Clang construct the inner dimension as an array section 8406 // but it actually is not in an array section form according to spec. 8407 if (!OASE->getColonLocFirst().isValid() && 8408 !OASE->getColonLocSecond().isValid()) { 8409 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8410 } else { 8411 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8412 // When the length is absent it defaults to ⌈(size − 8413 // lower-bound)/stride⌉, where size is the size of the array 8414 // dimension. 8415 const Expr *StrideExpr = OASE->getStride(); 8416 llvm::Value *Stride = 8417 StrideExpr 8418 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8419 CGF.Int64Ty, /*isSigned=*/false) 8420 : nullptr; 8421 if (Stride) 8422 Count = CGF.Builder.CreateUDiv( 8423 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8424 else 8425 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8426 } 8427 } else { 8428 Count = CGF.EmitScalarExpr(CountExpr); 8429 } 8430 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8431 CurCounts.push_back(Count); 8432 8433 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8434 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8435 // Offset Count Stride 8436 // D0 0 1 4 (int) <- dummy dimension 8437 // D1 0 2 8 (2 * (1) * 4) 8438 // D2 1 2 20 (1 * (1 * 5) * 4) 8439 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8440 const Expr *StrideExpr = OASE->getStride(); 8441 llvm::Value *Stride = 8442 StrideExpr 8443 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8444 CGF.Int64Ty, /*isSigned=*/false) 8445 : nullptr; 8446 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8447 if (Stride) 8448 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8449 else 8450 CurStrides.push_back(DimProd); 8451 if (DI != DimSizes.end()) 8452 ++DI; 8453 } 8454 8455 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8456 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8457 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8458 } 8459 8460 /// Return the adjusted map modifiers if the declaration a capture refers to 8461 /// appears in a first-private clause. This is expected to be used only with 8462 /// directives that start with 'target'. 8463 MappableExprsHandler::OpenMPOffloadMappingFlags 8464 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8465 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8466 8467 // A first private variable captured by reference will use only the 8468 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8469 // declaration is known as first-private in this handler. 8470 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8471 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8472 return MappableExprsHandler::OMP_MAP_TO | 8473 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8474 return MappableExprsHandler::OMP_MAP_PRIVATE | 8475 MappableExprsHandler::OMP_MAP_TO; 8476 } 8477 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8478 if (I != LambdasMap.end()) 8479 // for map(to: lambda): using user specified map type. 8480 return getMapTypeBits( 8481 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8482 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8483 /*AddPtrFlag=*/false, 8484 /*AddIsTargetParamFlag=*/false, 8485 /*isNonContiguous=*/false); 8486 return MappableExprsHandler::OMP_MAP_TO | 8487 MappableExprsHandler::OMP_MAP_FROM; 8488 } 8489 8490 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8491 // Rotate by getFlagMemberOffset() bits. 8492 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8493 << getFlagMemberOffset()); 8494 } 8495 8496 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8497 OpenMPOffloadMappingFlags MemberOfFlag) { 8498 // If the entry is PTR_AND_OBJ but has not been marked with the special 8499 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8500 // marked as MEMBER_OF. 8501 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8502 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8503 return; 8504 8505 // Reset the placeholder value to prepare the flag for the assignment of the 8506 // proper MEMBER_OF value. 8507 Flags &= ~OMP_MAP_MEMBER_OF; 8508 Flags |= MemberOfFlag; 8509 } 8510 8511 void getPlainLayout(const CXXRecordDecl *RD, 8512 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8513 bool AsBase) const { 8514 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8515 8516 llvm::StructType *St = 8517 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8518 8519 unsigned NumElements = St->getNumElements(); 8520 llvm::SmallVector< 8521 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8522 RecordLayout(NumElements); 8523 8524 // Fill bases. 8525 for (const auto &I : RD->bases()) { 8526 if (I.isVirtual()) 8527 continue; 8528 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8529 // Ignore empty bases. 8530 if (Base->isEmpty() || CGF.getContext() 8531 .getASTRecordLayout(Base) 8532 .getNonVirtualSize() 8533 .isZero()) 8534 continue; 8535 8536 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8537 RecordLayout[FieldIndex] = Base; 8538 } 8539 // Fill in virtual bases. 8540 for (const auto &I : RD->vbases()) { 8541 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8542 // Ignore empty bases. 8543 if (Base->isEmpty()) 8544 continue; 8545 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8546 if (RecordLayout[FieldIndex]) 8547 continue; 8548 RecordLayout[FieldIndex] = Base; 8549 } 8550 // Fill in all the fields. 8551 assert(!RD->isUnion() && "Unexpected union."); 8552 for (const auto *Field : RD->fields()) { 8553 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8554 // will fill in later.) 8555 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8556 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8557 RecordLayout[FieldIndex] = Field; 8558 } 8559 } 8560 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8561 &Data : RecordLayout) { 8562 if (Data.isNull()) 8563 continue; 8564 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8565 getPlainLayout(Base, Layout, /*AsBase=*/true); 8566 else 8567 Layout.push_back(Data.get<const FieldDecl *>()); 8568 } 8569 } 8570 8571 /// Generate all the base pointers, section pointers, sizes, map types, and 8572 /// mappers for the extracted mappable expressions (all included in \a 8573 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8574 /// pair of the relevant declaration and index where it occurs is appended to 8575 /// the device pointers info array. 8576 void generateAllInfoForClauses( 8577 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8578 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8579 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8580 // We have to process the component lists that relate with the same 8581 // declaration in a single chunk so that we can generate the map flags 8582 // correctly. Therefore, we organize all lists in a map. 8583 enum MapKind { Present, Allocs, Other, Total }; 8584 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8585 SmallVector<SmallVector<MapInfo, 8>, 4>> 8586 Info; 8587 8588 // Helper function to fill the information map for the different supported 8589 // clauses. 8590 auto &&InfoGen = 8591 [&Info, &SkipVarSet]( 8592 const ValueDecl *D, MapKind Kind, 8593 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8594 OpenMPMapClauseKind MapType, 8595 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8596 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8597 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8598 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8599 if (SkipVarSet.contains(D)) 8600 return; 8601 auto It = Info.find(D); 8602 if (It == Info.end()) 8603 It = Info 8604 .insert(std::make_pair( 8605 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8606 .first; 8607 It->second[Kind].emplace_back( 8608 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8609 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8610 }; 8611 8612 for (const auto *Cl : Clauses) { 8613 const auto *C = dyn_cast<OMPMapClause>(Cl); 8614 if (!C) 8615 continue; 8616 MapKind Kind = Other; 8617 if (llvm::is_contained(C->getMapTypeModifiers(), 8618 OMPC_MAP_MODIFIER_present)) 8619 Kind = Present; 8620 else if (C->getMapType() == OMPC_MAP_alloc) 8621 Kind = Allocs; 8622 const auto *EI = C->getVarRefs().begin(); 8623 for (const auto L : C->component_lists()) { 8624 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8625 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8626 C->getMapTypeModifiers(), llvm::None, 8627 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8628 E); 8629 ++EI; 8630 } 8631 } 8632 for (const auto *Cl : Clauses) { 8633 const auto *C = dyn_cast<OMPToClause>(Cl); 8634 if (!C) 8635 continue; 8636 MapKind Kind = Other; 8637 if (llvm::is_contained(C->getMotionModifiers(), 8638 OMPC_MOTION_MODIFIER_present)) 8639 Kind = Present; 8640 const auto *EI = C->getVarRefs().begin(); 8641 for (const auto L : C->component_lists()) { 8642 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8643 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8644 C->isImplicit(), std::get<2>(L), *EI); 8645 ++EI; 8646 } 8647 } 8648 for (const auto *Cl : Clauses) { 8649 const auto *C = dyn_cast<OMPFromClause>(Cl); 8650 if (!C) 8651 continue; 8652 MapKind Kind = Other; 8653 if (llvm::is_contained(C->getMotionModifiers(), 8654 OMPC_MOTION_MODIFIER_present)) 8655 Kind = Present; 8656 const auto *EI = C->getVarRefs().begin(); 8657 for (const auto L : C->component_lists()) { 8658 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8659 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8660 C->isImplicit(), std::get<2>(L), *EI); 8661 ++EI; 8662 } 8663 } 8664 8665 // Look at the use_device_ptr clause information and mark the existing map 8666 // entries as such. If there is no map information for an entry in the 8667 // use_device_ptr list, we create one with map type 'alloc' and zero size 8668 // section. It is the user fault if that was not mapped before. If there is 8669 // no map information and the pointer is a struct member, then we defer the 8670 // emission of that entry until the whole struct has been processed. 8671 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8672 SmallVector<DeferredDevicePtrEntryTy, 4>> 8673 DeferredInfo; 8674 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8675 8676 for (const auto *Cl : Clauses) { 8677 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8678 if (!C) 8679 continue; 8680 for (const auto L : C->component_lists()) { 8681 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8682 std::get<1>(L); 8683 assert(!Components.empty() && 8684 "Not expecting empty list of components!"); 8685 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8686 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8687 const Expr *IE = Components.back().getAssociatedExpression(); 8688 // If the first component is a member expression, we have to look into 8689 // 'this', which maps to null in the map of map information. Otherwise 8690 // look directly for the information. 8691 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8692 8693 // We potentially have map information for this declaration already. 8694 // Look for the first set of components that refer to it. 8695 if (It != Info.end()) { 8696 bool Found = false; 8697 for (auto &Data : It->second) { 8698 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8699 return MI.Components.back().getAssociatedDeclaration() == VD; 8700 }); 8701 // If we found a map entry, signal that the pointer has to be 8702 // returned and move on to the next declaration. Exclude cases where 8703 // the base pointer is mapped as array subscript, array section or 8704 // array shaping. The base address is passed as a pointer to base in 8705 // this case and cannot be used as a base for use_device_ptr list 8706 // item. 8707 if (CI != Data.end()) { 8708 auto PrevCI = std::next(CI->Components.rbegin()); 8709 const auto *VarD = dyn_cast<VarDecl>(VD); 8710 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8711 isa<MemberExpr>(IE) || 8712 !VD->getType().getNonReferenceType()->isPointerType() || 8713 PrevCI == CI->Components.rend() || 8714 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8715 VarD->hasLocalStorage()) { 8716 CI->ReturnDevicePointer = true; 8717 Found = true; 8718 break; 8719 } 8720 } 8721 } 8722 if (Found) 8723 continue; 8724 } 8725 8726 // We didn't find any match in our map information - generate a zero 8727 // size array section - if the pointer is a struct member we defer this 8728 // action until the whole struct has been processed. 8729 if (isa<MemberExpr>(IE)) { 8730 // Insert the pointer into Info to be processed by 8731 // generateInfoForComponentList. Because it is a member pointer 8732 // without a pointee, no entry will be generated for it, therefore 8733 // we need to generate one after the whole struct has been processed. 8734 // Nonetheless, generateInfoForComponentList must be called to take 8735 // the pointer into account for the calculation of the range of the 8736 // partial struct. 8737 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8738 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8739 nullptr); 8740 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8741 } else { 8742 llvm::Value *Ptr = 8743 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8744 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8745 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8746 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8747 UseDevicePtrCombinedInfo.Sizes.push_back( 8748 llvm::Constant::getNullValue(CGF.Int64Ty)); 8749 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8750 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8751 } 8752 } 8753 } 8754 8755 // Look at the use_device_addr clause information and mark the existing map 8756 // entries as such. If there is no map information for an entry in the 8757 // use_device_addr list, we create one with map type 'alloc' and zero size 8758 // section. It is the user fault if that was not mapped before. If there is 8759 // no map information and the pointer is a struct member, then we defer the 8760 // emission of that entry until the whole struct has been processed. 8761 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8762 for (const auto *Cl : Clauses) { 8763 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8764 if (!C) 8765 continue; 8766 for (const auto L : C->component_lists()) { 8767 assert(!std::get<1>(L).empty() && 8768 "Not expecting empty list of components!"); 8769 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8770 if (!Processed.insert(VD).second) 8771 continue; 8772 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8773 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8774 // If the first component is a member expression, we have to look into 8775 // 'this', which maps to null in the map of map information. Otherwise 8776 // look directly for the information. 8777 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8778 8779 // We potentially have map information for this declaration already. 8780 // Look for the first set of components that refer to it. 8781 if (It != Info.end()) { 8782 bool Found = false; 8783 for (auto &Data : It->second) { 8784 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8785 return MI.Components.back().getAssociatedDeclaration() == VD; 8786 }); 8787 // If we found a map entry, signal that the pointer has to be 8788 // returned and move on to the next declaration. 8789 if (CI != Data.end()) { 8790 CI->ReturnDevicePointer = true; 8791 Found = true; 8792 break; 8793 } 8794 } 8795 if (Found) 8796 continue; 8797 } 8798 8799 // We didn't find any match in our map information - generate a zero 8800 // size array section - if the pointer is a struct member we defer this 8801 // action until the whole struct has been processed. 8802 if (isa<MemberExpr>(IE)) { 8803 // Insert the pointer into Info to be processed by 8804 // generateInfoForComponentList. Because it is a member pointer 8805 // without a pointee, no entry will be generated for it, therefore 8806 // we need to generate one after the whole struct has been processed. 8807 // Nonetheless, generateInfoForComponentList must be called to take 8808 // the pointer into account for the calculation of the range of the 8809 // partial struct. 8810 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8811 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8812 nullptr, nullptr, /*ForDeviceAddr=*/true); 8813 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8814 } else { 8815 llvm::Value *Ptr; 8816 if (IE->isGLValue()) 8817 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8818 else 8819 Ptr = CGF.EmitScalarExpr(IE); 8820 CombinedInfo.Exprs.push_back(VD); 8821 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8822 CombinedInfo.Pointers.push_back(Ptr); 8823 CombinedInfo.Sizes.push_back( 8824 llvm::Constant::getNullValue(CGF.Int64Ty)); 8825 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8826 CombinedInfo.Mappers.push_back(nullptr); 8827 } 8828 } 8829 } 8830 8831 for (const auto &Data : Info) { 8832 StructRangeInfoTy PartialStruct; 8833 // Temporary generated information. 8834 MapCombinedInfoTy CurInfo; 8835 const Decl *D = Data.first; 8836 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8837 for (const auto &M : Data.second) { 8838 for (const MapInfo &L : M) { 8839 assert(!L.Components.empty() && 8840 "Not expecting declaration with no component lists."); 8841 8842 // Remember the current base pointer index. 8843 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8844 CurInfo.NonContigInfo.IsNonContiguous = 8845 L.Components.back().isNonContiguous(); 8846 generateInfoForComponentList( 8847 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8848 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8849 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8850 8851 // If this entry relates with a device pointer, set the relevant 8852 // declaration and add the 'return pointer' flag. 8853 if (L.ReturnDevicePointer) { 8854 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8855 "Unexpected number of mapped base pointers."); 8856 8857 const ValueDecl *RelevantVD = 8858 L.Components.back().getAssociatedDeclaration(); 8859 assert(RelevantVD && 8860 "No relevant declaration related with device pointer??"); 8861 8862 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8863 RelevantVD); 8864 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8865 } 8866 } 8867 } 8868 8869 // Append any pending zero-length pointers which are struct members and 8870 // used with use_device_ptr or use_device_addr. 8871 auto CI = DeferredInfo.find(Data.first); 8872 if (CI != DeferredInfo.end()) { 8873 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8874 llvm::Value *BasePtr; 8875 llvm::Value *Ptr; 8876 if (L.ForDeviceAddr) { 8877 if (L.IE->isGLValue()) 8878 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8879 else 8880 Ptr = this->CGF.EmitScalarExpr(L.IE); 8881 BasePtr = Ptr; 8882 // Entry is RETURN_PARAM. Also, set the placeholder value 8883 // MEMBER_OF=FFFF so that the entry is later updated with the 8884 // correct value of MEMBER_OF. 8885 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8886 } else { 8887 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8888 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8889 L.IE->getExprLoc()); 8890 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8891 // placeholder value MEMBER_OF=FFFF so that the entry is later 8892 // updated with the correct value of MEMBER_OF. 8893 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8894 OMP_MAP_MEMBER_OF); 8895 } 8896 CurInfo.Exprs.push_back(L.VD); 8897 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8898 CurInfo.Pointers.push_back(Ptr); 8899 CurInfo.Sizes.push_back( 8900 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8901 CurInfo.Mappers.push_back(nullptr); 8902 } 8903 } 8904 // If there is an entry in PartialStruct it means we have a struct with 8905 // individual members mapped. Emit an extra combined entry. 8906 if (PartialStruct.Base.isValid()) { 8907 CurInfo.NonContigInfo.Dims.push_back(0); 8908 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8909 } 8910 8911 // We need to append the results of this capture to what we already 8912 // have. 8913 CombinedInfo.append(CurInfo); 8914 } 8915 // Append data for use_device_ptr clauses. 8916 CombinedInfo.append(UseDevicePtrCombinedInfo); 8917 } 8918 8919 public: 8920 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8921 : CurDir(&Dir), CGF(CGF) { 8922 // Extract firstprivate clause information. 8923 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8924 for (const auto *D : C->varlists()) 8925 FirstPrivateDecls.try_emplace( 8926 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8927 // Extract implicit firstprivates from uses_allocators clauses. 8928 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8929 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8930 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8931 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8932 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8933 /*Implicit=*/true); 8934 else if (const auto *VD = dyn_cast<VarDecl>( 8935 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8936 ->getDecl())) 8937 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8938 } 8939 } 8940 // Extract device pointer clause information. 8941 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8942 for (auto L : C->component_lists()) 8943 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8944 // Extract map information. 8945 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8946 if (C->getMapType() != OMPC_MAP_to) 8947 continue; 8948 for (auto L : C->component_lists()) { 8949 const ValueDecl *VD = std::get<0>(L); 8950 const auto *RD = VD ? VD->getType() 8951 .getCanonicalType() 8952 .getNonReferenceType() 8953 ->getAsCXXRecordDecl() 8954 : nullptr; 8955 if (RD && RD->isLambda()) 8956 LambdasMap.try_emplace(std::get<0>(L), C); 8957 } 8958 } 8959 } 8960 8961 /// Constructor for the declare mapper directive. 8962 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8963 : CurDir(&Dir), CGF(CGF) {} 8964 8965 /// Generate code for the combined entry if we have a partially mapped struct 8966 /// and take care of the mapping flags of the arguments corresponding to 8967 /// individual struct members. 8968 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8969 MapFlagsArrayTy &CurTypes, 8970 const StructRangeInfoTy &PartialStruct, 8971 const ValueDecl *VD = nullptr, 8972 bool NotTargetParams = true) const { 8973 if (CurTypes.size() == 1 && 8974 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8975 !PartialStruct.IsArraySection) 8976 return; 8977 Address LBAddr = PartialStruct.LowestElem.second; 8978 Address HBAddr = PartialStruct.HighestElem.second; 8979 if (PartialStruct.HasCompleteRecord) { 8980 LBAddr = PartialStruct.LB; 8981 HBAddr = PartialStruct.LB; 8982 } 8983 CombinedInfo.Exprs.push_back(VD); 8984 // Base is the base of the struct 8985 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8986 // Pointer is the address of the lowest element 8987 llvm::Value *LB = LBAddr.getPointer(); 8988 CombinedInfo.Pointers.push_back(LB); 8989 // There should not be a mapper for a combined entry. 8990 CombinedInfo.Mappers.push_back(nullptr); 8991 // Size is (addr of {highest+1} element) - (addr of lowest element) 8992 llvm::Value *HB = HBAddr.getPointer(); 8993 llvm::Value *HAddr = 8994 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8995 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8996 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8997 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8998 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8999 /*isSigned=*/false); 9000 CombinedInfo.Sizes.push_back(Size); 9001 // Map type is always TARGET_PARAM, if generate info for captures. 9002 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 9003 : OMP_MAP_TARGET_PARAM); 9004 // If any element has the present modifier, then make sure the runtime 9005 // doesn't attempt to allocate the struct. 9006 if (CurTypes.end() != 9007 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9008 return Type & OMP_MAP_PRESENT; 9009 })) 9010 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 9011 // Remove TARGET_PARAM flag from the first element 9012 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 9013 // If any element has the ompx_hold modifier, then make sure the runtime 9014 // uses the hold reference count for the struct as a whole so that it won't 9015 // be unmapped by an extra dynamic reference count decrement. Add it to all 9016 // elements as well so the runtime knows which reference count to check 9017 // when determining whether it's time for device-to-host transfers of 9018 // individual elements. 9019 if (CurTypes.end() != 9020 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9021 return Type & OMP_MAP_OMPX_HOLD; 9022 })) { 9023 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 9024 for (auto &M : CurTypes) 9025 M |= OMP_MAP_OMPX_HOLD; 9026 } 9027 9028 // All other current entries will be MEMBER_OF the combined entry 9029 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9030 // 0xFFFF in the MEMBER_OF field). 9031 OpenMPOffloadMappingFlags MemberOfFlag = 9032 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 9033 for (auto &M : CurTypes) 9034 setCorrectMemberOfFlag(M, MemberOfFlag); 9035 } 9036 9037 /// Generate all the base pointers, section pointers, sizes, map types, and 9038 /// mappers for the extracted mappable expressions (all included in \a 9039 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9040 /// pair of the relevant declaration and index where it occurs is appended to 9041 /// the device pointers info array. 9042 void generateAllInfo( 9043 MapCombinedInfoTy &CombinedInfo, 9044 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9045 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9046 assert(CurDir.is<const OMPExecutableDirective *>() && 9047 "Expect a executable directive"); 9048 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9049 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9050 } 9051 9052 /// Generate all the base pointers, section pointers, sizes, map types, and 9053 /// mappers for the extracted map clauses of user-defined mapper (all included 9054 /// in \a CombinedInfo). 9055 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9056 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9057 "Expect a declare mapper directive"); 9058 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9059 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9060 } 9061 9062 /// Emit capture info for lambdas for variables captured by reference. 9063 void generateInfoForLambdaCaptures( 9064 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9065 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9066 const auto *RD = VD->getType() 9067 .getCanonicalType() 9068 .getNonReferenceType() 9069 ->getAsCXXRecordDecl(); 9070 if (!RD || !RD->isLambda()) 9071 return; 9072 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 9073 LValue VDLVal = CGF.MakeAddrLValue( 9074 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 9075 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9076 FieldDecl *ThisCapture = nullptr; 9077 RD->getCaptureFields(Captures, ThisCapture); 9078 if (ThisCapture) { 9079 LValue ThisLVal = 9080 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9081 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9082 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9083 VDLVal.getPointer(CGF)); 9084 CombinedInfo.Exprs.push_back(VD); 9085 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9086 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9087 CombinedInfo.Sizes.push_back( 9088 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9089 CGF.Int64Ty, /*isSigned=*/true)); 9090 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9091 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9092 CombinedInfo.Mappers.push_back(nullptr); 9093 } 9094 for (const LambdaCapture &LC : RD->captures()) { 9095 if (!LC.capturesVariable()) 9096 continue; 9097 const VarDecl *VD = LC.getCapturedVar(); 9098 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9099 continue; 9100 auto It = Captures.find(VD); 9101 assert(It != Captures.end() && "Found lambda capture without field."); 9102 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9103 if (LC.getCaptureKind() == LCK_ByRef) { 9104 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9105 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9106 VDLVal.getPointer(CGF)); 9107 CombinedInfo.Exprs.push_back(VD); 9108 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9109 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9110 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9111 CGF.getTypeSize( 9112 VD->getType().getCanonicalType().getNonReferenceType()), 9113 CGF.Int64Ty, /*isSigned=*/true)); 9114 } else { 9115 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9116 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9117 VDLVal.getPointer(CGF)); 9118 CombinedInfo.Exprs.push_back(VD); 9119 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9120 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9121 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9122 } 9123 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9124 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9125 CombinedInfo.Mappers.push_back(nullptr); 9126 } 9127 } 9128 9129 /// Set correct indices for lambdas captures. 9130 void adjustMemberOfForLambdaCaptures( 9131 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9132 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9133 MapFlagsArrayTy &Types) const { 9134 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9135 // Set correct member_of idx for all implicit lambda captures. 9136 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9137 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9138 continue; 9139 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9140 assert(BasePtr && "Unable to find base lambda address."); 9141 int TgtIdx = -1; 9142 for (unsigned J = I; J > 0; --J) { 9143 unsigned Idx = J - 1; 9144 if (Pointers[Idx] != BasePtr) 9145 continue; 9146 TgtIdx = Idx; 9147 break; 9148 } 9149 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9150 // All other current entries will be MEMBER_OF the combined entry 9151 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9152 // 0xFFFF in the MEMBER_OF field). 9153 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9154 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9155 } 9156 } 9157 9158 /// Generate the base pointers, section pointers, sizes, map types, and 9159 /// mappers associated to a given capture (all included in \a CombinedInfo). 9160 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9161 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9162 StructRangeInfoTy &PartialStruct) const { 9163 assert(!Cap->capturesVariableArrayType() && 9164 "Not expecting to generate map info for a variable array type!"); 9165 9166 // We need to know when we generating information for the first component 9167 const ValueDecl *VD = Cap->capturesThis() 9168 ? nullptr 9169 : Cap->getCapturedVar()->getCanonicalDecl(); 9170 9171 // for map(to: lambda): skip here, processing it in 9172 // generateDefaultMapInfo 9173 if (LambdasMap.count(VD)) 9174 return; 9175 9176 // If this declaration appears in a is_device_ptr clause we just have to 9177 // pass the pointer by value. If it is a reference to a declaration, we just 9178 // pass its value. 9179 if (DevPointersMap.count(VD)) { 9180 CombinedInfo.Exprs.push_back(VD); 9181 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9182 CombinedInfo.Pointers.push_back(Arg); 9183 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9184 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9185 /*isSigned=*/true)); 9186 CombinedInfo.Types.push_back( 9187 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9188 OMP_MAP_TARGET_PARAM); 9189 CombinedInfo.Mappers.push_back(nullptr); 9190 return; 9191 } 9192 9193 using MapData = 9194 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9195 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9196 const ValueDecl *, const Expr *>; 9197 SmallVector<MapData, 4> DeclComponentLists; 9198 assert(CurDir.is<const OMPExecutableDirective *>() && 9199 "Expect a executable directive"); 9200 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9201 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9202 const auto *EI = C->getVarRefs().begin(); 9203 for (const auto L : C->decl_component_lists(VD)) { 9204 const ValueDecl *VDecl, *Mapper; 9205 // The Expression is not correct if the mapping is implicit 9206 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9207 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9208 std::tie(VDecl, Components, Mapper) = L; 9209 assert(VDecl == VD && "We got information for the wrong declaration??"); 9210 assert(!Components.empty() && 9211 "Not expecting declaration with no component lists."); 9212 DeclComponentLists.emplace_back(Components, C->getMapType(), 9213 C->getMapTypeModifiers(), 9214 C->isImplicit(), Mapper, E); 9215 ++EI; 9216 } 9217 } 9218 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9219 const MapData &RHS) { 9220 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9221 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9222 bool HasPresent = 9223 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9224 bool HasAllocs = MapType == OMPC_MAP_alloc; 9225 MapModifiers = std::get<2>(RHS); 9226 MapType = std::get<1>(LHS); 9227 bool HasPresentR = 9228 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9229 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9230 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9231 }); 9232 9233 // Find overlapping elements (including the offset from the base element). 9234 llvm::SmallDenseMap< 9235 const MapData *, 9236 llvm::SmallVector< 9237 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9238 4> 9239 OverlappedData; 9240 size_t Count = 0; 9241 for (const MapData &L : DeclComponentLists) { 9242 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9243 OpenMPMapClauseKind MapType; 9244 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9245 bool IsImplicit; 9246 const ValueDecl *Mapper; 9247 const Expr *VarRef; 9248 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9249 L; 9250 ++Count; 9251 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9252 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9253 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9254 VarRef) = L1; 9255 auto CI = Components.rbegin(); 9256 auto CE = Components.rend(); 9257 auto SI = Components1.rbegin(); 9258 auto SE = Components1.rend(); 9259 for (; CI != CE && SI != SE; ++CI, ++SI) { 9260 if (CI->getAssociatedExpression()->getStmtClass() != 9261 SI->getAssociatedExpression()->getStmtClass()) 9262 break; 9263 // Are we dealing with different variables/fields? 9264 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9265 break; 9266 } 9267 // Found overlapping if, at least for one component, reached the head 9268 // of the components list. 9269 if (CI == CE || SI == SE) { 9270 // Ignore it if it is the same component. 9271 if (CI == CE && SI == SE) 9272 continue; 9273 const auto It = (SI == SE) ? CI : SI; 9274 // If one component is a pointer and another one is a kind of 9275 // dereference of this pointer (array subscript, section, dereference, 9276 // etc.), it is not an overlapping. 9277 // Same, if one component is a base and another component is a 9278 // dereferenced pointer memberexpr with the same base. 9279 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9280 (std::prev(It)->getAssociatedDeclaration() && 9281 std::prev(It) 9282 ->getAssociatedDeclaration() 9283 ->getType() 9284 ->isPointerType()) || 9285 (It->getAssociatedDeclaration() && 9286 It->getAssociatedDeclaration()->getType()->isPointerType() && 9287 std::next(It) != CE && std::next(It) != SE)) 9288 continue; 9289 const MapData &BaseData = CI == CE ? L : L1; 9290 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9291 SI == SE ? Components : Components1; 9292 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9293 OverlappedElements.getSecond().push_back(SubData); 9294 } 9295 } 9296 } 9297 // Sort the overlapped elements for each item. 9298 llvm::SmallVector<const FieldDecl *, 4> Layout; 9299 if (!OverlappedData.empty()) { 9300 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9301 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9302 while (BaseType != OrigType) { 9303 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9304 OrigType = BaseType->getPointeeOrArrayElementType(); 9305 } 9306 9307 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9308 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9309 else { 9310 const auto *RD = BaseType->getAsRecordDecl(); 9311 Layout.append(RD->field_begin(), RD->field_end()); 9312 } 9313 } 9314 for (auto &Pair : OverlappedData) { 9315 llvm::stable_sort( 9316 Pair.getSecond(), 9317 [&Layout]( 9318 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9319 OMPClauseMappableExprCommon::MappableExprComponentListRef 9320 Second) { 9321 auto CI = First.rbegin(); 9322 auto CE = First.rend(); 9323 auto SI = Second.rbegin(); 9324 auto SE = Second.rend(); 9325 for (; CI != CE && SI != SE; ++CI, ++SI) { 9326 if (CI->getAssociatedExpression()->getStmtClass() != 9327 SI->getAssociatedExpression()->getStmtClass()) 9328 break; 9329 // Are we dealing with different variables/fields? 9330 if (CI->getAssociatedDeclaration() != 9331 SI->getAssociatedDeclaration()) 9332 break; 9333 } 9334 9335 // Lists contain the same elements. 9336 if (CI == CE && SI == SE) 9337 return false; 9338 9339 // List with less elements is less than list with more elements. 9340 if (CI == CE || SI == SE) 9341 return CI == CE; 9342 9343 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9344 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9345 if (FD1->getParent() == FD2->getParent()) 9346 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9347 const auto *It = 9348 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9349 return FD == FD1 || FD == FD2; 9350 }); 9351 return *It == FD1; 9352 }); 9353 } 9354 9355 // Associated with a capture, because the mapping flags depend on it. 9356 // Go through all of the elements with the overlapped elements. 9357 bool IsFirstComponentList = true; 9358 for (const auto &Pair : OverlappedData) { 9359 const MapData &L = *Pair.getFirst(); 9360 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9361 OpenMPMapClauseKind MapType; 9362 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9363 bool IsImplicit; 9364 const ValueDecl *Mapper; 9365 const Expr *VarRef; 9366 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9367 L; 9368 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9369 OverlappedComponents = Pair.getSecond(); 9370 generateInfoForComponentList( 9371 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9372 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9373 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9374 IsFirstComponentList = false; 9375 } 9376 // Go through other elements without overlapped elements. 9377 for (const MapData &L : DeclComponentLists) { 9378 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9379 OpenMPMapClauseKind MapType; 9380 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9381 bool IsImplicit; 9382 const ValueDecl *Mapper; 9383 const Expr *VarRef; 9384 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9385 L; 9386 auto It = OverlappedData.find(&L); 9387 if (It == OverlappedData.end()) 9388 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9389 Components, CombinedInfo, PartialStruct, 9390 IsFirstComponentList, IsImplicit, Mapper, 9391 /*ForDeviceAddr=*/false, VD, VarRef); 9392 IsFirstComponentList = false; 9393 } 9394 } 9395 9396 /// Generate the default map information for a given capture \a CI, 9397 /// record field declaration \a RI and captured value \a CV. 9398 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9399 const FieldDecl &RI, llvm::Value *CV, 9400 MapCombinedInfoTy &CombinedInfo) const { 9401 bool IsImplicit = true; 9402 // Do the default mapping. 9403 if (CI.capturesThis()) { 9404 CombinedInfo.Exprs.push_back(nullptr); 9405 CombinedInfo.BasePointers.push_back(CV); 9406 CombinedInfo.Pointers.push_back(CV); 9407 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9408 CombinedInfo.Sizes.push_back( 9409 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9410 CGF.Int64Ty, /*isSigned=*/true)); 9411 // Default map type. 9412 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9413 } else if (CI.capturesVariableByCopy()) { 9414 const VarDecl *VD = CI.getCapturedVar(); 9415 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9416 CombinedInfo.BasePointers.push_back(CV); 9417 CombinedInfo.Pointers.push_back(CV); 9418 if (!RI.getType()->isAnyPointerType()) { 9419 // We have to signal to the runtime captures passed by value that are 9420 // not pointers. 9421 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9422 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9423 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9424 } else { 9425 // Pointers are implicitly mapped with a zero size and no flags 9426 // (other than first map that is added for all implicit maps). 9427 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9428 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9429 } 9430 auto I = FirstPrivateDecls.find(VD); 9431 if (I != FirstPrivateDecls.end()) 9432 IsImplicit = I->getSecond(); 9433 } else { 9434 assert(CI.capturesVariable() && "Expected captured reference."); 9435 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9436 QualType ElementType = PtrTy->getPointeeType(); 9437 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9438 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9439 // The default map type for a scalar/complex type is 'to' because by 9440 // default the value doesn't have to be retrieved. For an aggregate 9441 // type, the default is 'tofrom'. 9442 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9443 const VarDecl *VD = CI.getCapturedVar(); 9444 auto I = FirstPrivateDecls.find(VD); 9445 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9446 CombinedInfo.BasePointers.push_back(CV); 9447 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9448 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9449 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9450 AlignmentSource::Decl)); 9451 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9452 } else { 9453 CombinedInfo.Pointers.push_back(CV); 9454 } 9455 if (I != FirstPrivateDecls.end()) 9456 IsImplicit = I->getSecond(); 9457 } 9458 // Every default map produces a single argument which is a target parameter. 9459 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9460 9461 // Add flag stating this is an implicit map. 9462 if (IsImplicit) 9463 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9464 9465 // No user-defined mapper for default mapping. 9466 CombinedInfo.Mappers.push_back(nullptr); 9467 } 9468 }; 9469 } // anonymous namespace 9470 9471 static void emitNonContiguousDescriptor( 9472 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9473 CGOpenMPRuntime::TargetDataInfo &Info) { 9474 CodeGenModule &CGM = CGF.CGM; 9475 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9476 &NonContigInfo = CombinedInfo.NonContigInfo; 9477 9478 // Build an array of struct descriptor_dim and then assign it to 9479 // offload_args. 9480 // 9481 // struct descriptor_dim { 9482 // uint64_t offset; 9483 // uint64_t count; 9484 // uint64_t stride 9485 // }; 9486 ASTContext &C = CGF.getContext(); 9487 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9488 RecordDecl *RD; 9489 RD = C.buildImplicitRecord("descriptor_dim"); 9490 RD->startDefinition(); 9491 addFieldToRecordDecl(C, RD, Int64Ty); 9492 addFieldToRecordDecl(C, RD, Int64Ty); 9493 addFieldToRecordDecl(C, RD, Int64Ty); 9494 RD->completeDefinition(); 9495 QualType DimTy = C.getRecordType(RD); 9496 9497 enum { OffsetFD = 0, CountFD, StrideFD }; 9498 // We need two index variable here since the size of "Dims" is the same as the 9499 // size of Components, however, the size of offset, count, and stride is equal 9500 // to the size of base declaration that is non-contiguous. 9501 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9502 // Skip emitting ir if dimension size is 1 since it cannot be 9503 // non-contiguous. 9504 if (NonContigInfo.Dims[I] == 1) 9505 continue; 9506 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9507 QualType ArrayTy = 9508 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9509 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9510 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9511 unsigned RevIdx = EE - II - 1; 9512 LValue DimsLVal = CGF.MakeAddrLValue( 9513 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9514 // Offset 9515 LValue OffsetLVal = CGF.EmitLValueForField( 9516 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9517 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9518 // Count 9519 LValue CountLVal = CGF.EmitLValueForField( 9520 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9521 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9522 // Stride 9523 LValue StrideLVal = CGF.EmitLValueForField( 9524 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9525 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9526 } 9527 // args[I] = &dims 9528 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9529 DimsAddr, CGM.Int8PtrTy); 9530 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9531 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9532 Info.PointersArray, 0, I); 9533 Address PAddr(P, CGF.getPointerAlign()); 9534 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9535 ++L; 9536 } 9537 } 9538 9539 // Try to extract the base declaration from a `this->x` expression if possible. 9540 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9541 if (!E) 9542 return nullptr; 9543 9544 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9545 if (const MemberExpr *ME = 9546 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9547 return ME->getMemberDecl(); 9548 return nullptr; 9549 } 9550 9551 /// Emit a string constant containing the names of the values mapped to the 9552 /// offloading runtime library. 9553 llvm::Constant * 9554 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9555 MappableExprsHandler::MappingExprInfo &MapExprs) { 9556 9557 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9558 return OMPBuilder.getOrCreateDefaultSrcLocStr(); 9559 9560 SourceLocation Loc; 9561 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9562 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9563 Loc = VD->getLocation(); 9564 else 9565 Loc = MapExprs.getMapExpr()->getExprLoc(); 9566 } else { 9567 Loc = MapExprs.getMapDecl()->getLocation(); 9568 } 9569 9570 std::string ExprName = ""; 9571 if (MapExprs.getMapExpr()) { 9572 PrintingPolicy P(CGF.getContext().getLangOpts()); 9573 llvm::raw_string_ostream OS(ExprName); 9574 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9575 OS.flush(); 9576 } else { 9577 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9578 } 9579 9580 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9581 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(), 9582 PLoc.getLine(), PLoc.getColumn()); 9583 } 9584 9585 /// Emit the arrays used to pass the captures and map information to the 9586 /// offloading runtime library. If there is no map or capture information, 9587 /// return nullptr by reference. 9588 static void emitOffloadingArrays( 9589 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9590 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9591 bool IsNonContiguous = false) { 9592 CodeGenModule &CGM = CGF.CGM; 9593 ASTContext &Ctx = CGF.getContext(); 9594 9595 // Reset the array information. 9596 Info.clearArrayInfo(); 9597 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9598 9599 if (Info.NumberOfPtrs) { 9600 // Detect if we have any capture size requiring runtime evaluation of the 9601 // size so that a constant array could be eventually used. 9602 bool hasRuntimeEvaluationCaptureSize = false; 9603 for (llvm::Value *S : CombinedInfo.Sizes) 9604 if (!isa<llvm::Constant>(S)) { 9605 hasRuntimeEvaluationCaptureSize = true; 9606 break; 9607 } 9608 9609 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9610 QualType PointerArrayType = Ctx.getConstantArrayType( 9611 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9612 /*IndexTypeQuals=*/0); 9613 9614 Info.BasePointersArray = 9615 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9616 Info.PointersArray = 9617 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9618 Address MappersArray = 9619 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9620 Info.MappersArray = MappersArray.getPointer(); 9621 9622 // If we don't have any VLA types or other types that require runtime 9623 // evaluation, we can use a constant array for the map sizes, otherwise we 9624 // need to fill up the arrays as we do for the pointers. 9625 QualType Int64Ty = 9626 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9627 if (hasRuntimeEvaluationCaptureSize) { 9628 QualType SizeArrayType = Ctx.getConstantArrayType( 9629 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9630 /*IndexTypeQuals=*/0); 9631 Info.SizesArray = 9632 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9633 } else { 9634 // We expect all the sizes to be constant, so we collect them to create 9635 // a constant array. 9636 SmallVector<llvm::Constant *, 16> ConstSizes; 9637 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9638 if (IsNonContiguous && 9639 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9640 ConstSizes.push_back(llvm::ConstantInt::get( 9641 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9642 } else { 9643 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9644 } 9645 } 9646 9647 auto *SizesArrayInit = llvm::ConstantArray::get( 9648 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9649 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9650 auto *SizesArrayGbl = new llvm::GlobalVariable( 9651 CGM.getModule(), SizesArrayInit->getType(), 9652 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9653 SizesArrayInit, Name); 9654 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9655 Info.SizesArray = SizesArrayGbl; 9656 } 9657 9658 // The map types are always constant so we don't need to generate code to 9659 // fill arrays. Instead, we create an array constant. 9660 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9661 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9662 std::string MaptypesName = 9663 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9664 auto *MapTypesArrayGbl = 9665 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9666 Info.MapTypesArray = MapTypesArrayGbl; 9667 9668 // The information types are only built if there is debug information 9669 // requested. 9670 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9671 Info.MapNamesArray = llvm::Constant::getNullValue( 9672 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9673 } else { 9674 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9675 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9676 }; 9677 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9678 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9679 std::string MapnamesName = 9680 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9681 auto *MapNamesArrayGbl = 9682 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9683 Info.MapNamesArray = MapNamesArrayGbl; 9684 } 9685 9686 // If there's a present map type modifier, it must not be applied to the end 9687 // of a region, so generate a separate map type array in that case. 9688 if (Info.separateBeginEndCalls()) { 9689 bool EndMapTypesDiffer = false; 9690 for (uint64_t &Type : Mapping) { 9691 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9692 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9693 EndMapTypesDiffer = true; 9694 } 9695 } 9696 if (EndMapTypesDiffer) { 9697 MapTypesArrayGbl = 9698 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9699 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9700 } 9701 } 9702 9703 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9704 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9705 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9706 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9707 Info.BasePointersArray, 0, I); 9708 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9709 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9710 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9711 CGF.Builder.CreateStore(BPVal, BPAddr); 9712 9713 if (Info.requiresDevicePointerInfo()) 9714 if (const ValueDecl *DevVD = 9715 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9716 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9717 9718 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9719 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9720 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9721 Info.PointersArray, 0, I); 9722 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9723 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9724 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9725 CGF.Builder.CreateStore(PVal, PAddr); 9726 9727 if (hasRuntimeEvaluationCaptureSize) { 9728 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9729 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9730 Info.SizesArray, 9731 /*Idx0=*/0, 9732 /*Idx1=*/I); 9733 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9734 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9735 CGM.Int64Ty, 9736 /*isSigned=*/true), 9737 SAddr); 9738 } 9739 9740 // Fill up the mapper array. 9741 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9742 if (CombinedInfo.Mappers[I]) { 9743 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9744 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9745 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9746 Info.HasMapper = true; 9747 } 9748 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9749 CGF.Builder.CreateStore(MFunc, MAddr); 9750 } 9751 } 9752 9753 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9754 Info.NumberOfPtrs == 0) 9755 return; 9756 9757 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9758 } 9759 9760 namespace { 9761 /// Additional arguments for emitOffloadingArraysArgument function. 9762 struct ArgumentsOptions { 9763 bool ForEndCall = false; 9764 ArgumentsOptions() = default; 9765 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9766 }; 9767 } // namespace 9768 9769 /// Emit the arguments to be passed to the runtime library based on the 9770 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9771 /// ForEndCall, emit map types to be passed for the end of the region instead of 9772 /// the beginning. 9773 static void emitOffloadingArraysArgument( 9774 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9775 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9776 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9777 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9778 const ArgumentsOptions &Options = ArgumentsOptions()) { 9779 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9780 "expected region end call to runtime only when end call is separate"); 9781 CodeGenModule &CGM = CGF.CGM; 9782 if (Info.NumberOfPtrs) { 9783 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9784 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9785 Info.BasePointersArray, 9786 /*Idx0=*/0, /*Idx1=*/0); 9787 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9788 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9789 Info.PointersArray, 9790 /*Idx0=*/0, 9791 /*Idx1=*/0); 9792 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9793 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9794 /*Idx0=*/0, /*Idx1=*/0); 9795 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9796 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9797 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9798 : Info.MapTypesArray, 9799 /*Idx0=*/0, 9800 /*Idx1=*/0); 9801 9802 // Only emit the mapper information arrays if debug information is 9803 // requested. 9804 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9805 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9806 else 9807 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9808 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9809 Info.MapNamesArray, 9810 /*Idx0=*/0, 9811 /*Idx1=*/0); 9812 // If there is no user-defined mapper, set the mapper array to nullptr to 9813 // avoid an unnecessary data privatization 9814 if (!Info.HasMapper) 9815 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9816 else 9817 MappersArrayArg = 9818 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9819 } else { 9820 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9821 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9822 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9823 MapTypesArrayArg = 9824 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9825 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9826 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9827 } 9828 } 9829 9830 /// Check for inner distribute directive. 9831 static const OMPExecutableDirective * 9832 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9833 const auto *CS = D.getInnermostCapturedStmt(); 9834 const auto *Body = 9835 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9836 const Stmt *ChildStmt = 9837 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9838 9839 if (const auto *NestedDir = 9840 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9841 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9842 switch (D.getDirectiveKind()) { 9843 case OMPD_target: 9844 if (isOpenMPDistributeDirective(DKind)) 9845 return NestedDir; 9846 if (DKind == OMPD_teams) { 9847 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9848 /*IgnoreCaptured=*/true); 9849 if (!Body) 9850 return nullptr; 9851 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9852 if (const auto *NND = 9853 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9854 DKind = NND->getDirectiveKind(); 9855 if (isOpenMPDistributeDirective(DKind)) 9856 return NND; 9857 } 9858 } 9859 return nullptr; 9860 case OMPD_target_teams: 9861 if (isOpenMPDistributeDirective(DKind)) 9862 return NestedDir; 9863 return nullptr; 9864 case OMPD_target_parallel: 9865 case OMPD_target_simd: 9866 case OMPD_target_parallel_for: 9867 case OMPD_target_parallel_for_simd: 9868 return nullptr; 9869 case OMPD_target_teams_distribute: 9870 case OMPD_target_teams_distribute_simd: 9871 case OMPD_target_teams_distribute_parallel_for: 9872 case OMPD_target_teams_distribute_parallel_for_simd: 9873 case OMPD_parallel: 9874 case OMPD_for: 9875 case OMPD_parallel_for: 9876 case OMPD_parallel_master: 9877 case OMPD_parallel_sections: 9878 case OMPD_for_simd: 9879 case OMPD_parallel_for_simd: 9880 case OMPD_cancel: 9881 case OMPD_cancellation_point: 9882 case OMPD_ordered: 9883 case OMPD_threadprivate: 9884 case OMPD_allocate: 9885 case OMPD_task: 9886 case OMPD_simd: 9887 case OMPD_tile: 9888 case OMPD_unroll: 9889 case OMPD_sections: 9890 case OMPD_section: 9891 case OMPD_single: 9892 case OMPD_master: 9893 case OMPD_critical: 9894 case OMPD_taskyield: 9895 case OMPD_barrier: 9896 case OMPD_taskwait: 9897 case OMPD_taskgroup: 9898 case OMPD_atomic: 9899 case OMPD_flush: 9900 case OMPD_depobj: 9901 case OMPD_scan: 9902 case OMPD_teams: 9903 case OMPD_target_data: 9904 case OMPD_target_exit_data: 9905 case OMPD_target_enter_data: 9906 case OMPD_distribute: 9907 case OMPD_distribute_simd: 9908 case OMPD_distribute_parallel_for: 9909 case OMPD_distribute_parallel_for_simd: 9910 case OMPD_teams_distribute: 9911 case OMPD_teams_distribute_simd: 9912 case OMPD_teams_distribute_parallel_for: 9913 case OMPD_teams_distribute_parallel_for_simd: 9914 case OMPD_target_update: 9915 case OMPD_declare_simd: 9916 case OMPD_declare_variant: 9917 case OMPD_begin_declare_variant: 9918 case OMPD_end_declare_variant: 9919 case OMPD_declare_target: 9920 case OMPD_end_declare_target: 9921 case OMPD_declare_reduction: 9922 case OMPD_declare_mapper: 9923 case OMPD_taskloop: 9924 case OMPD_taskloop_simd: 9925 case OMPD_master_taskloop: 9926 case OMPD_master_taskloop_simd: 9927 case OMPD_parallel_master_taskloop: 9928 case OMPD_parallel_master_taskloop_simd: 9929 case OMPD_requires: 9930 case OMPD_metadirective: 9931 case OMPD_unknown: 9932 default: 9933 llvm_unreachable("Unexpected directive."); 9934 } 9935 } 9936 9937 return nullptr; 9938 } 9939 9940 /// Emit the user-defined mapper function. The code generation follows the 9941 /// pattern in the example below. 9942 /// \code 9943 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9944 /// void *base, void *begin, 9945 /// int64_t size, int64_t type, 9946 /// void *name = nullptr) { 9947 /// // Allocate space for an array section first or add a base/begin for 9948 /// // pointer dereference. 9949 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9950 /// !maptype.IsDelete) 9951 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9952 /// size*sizeof(Ty), clearToFromMember(type)); 9953 /// // Map members. 9954 /// for (unsigned i = 0; i < size; i++) { 9955 /// // For each component specified by this mapper: 9956 /// for (auto c : begin[i]->all_components) { 9957 /// if (c.hasMapper()) 9958 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9959 /// c.arg_type, c.arg_name); 9960 /// else 9961 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9962 /// c.arg_begin, c.arg_size, c.arg_type, 9963 /// c.arg_name); 9964 /// } 9965 /// } 9966 /// // Delete the array section. 9967 /// if (size > 1 && maptype.IsDelete) 9968 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9969 /// size*sizeof(Ty), clearToFromMember(type)); 9970 /// } 9971 /// \endcode 9972 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9973 CodeGenFunction *CGF) { 9974 if (UDMMap.count(D) > 0) 9975 return; 9976 ASTContext &C = CGM.getContext(); 9977 QualType Ty = D->getType(); 9978 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9979 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9980 auto *MapperVarDecl = 9981 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9982 SourceLocation Loc = D->getLocation(); 9983 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9984 9985 // Prepare mapper function arguments and attributes. 9986 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9987 C.VoidPtrTy, ImplicitParamDecl::Other); 9988 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9989 ImplicitParamDecl::Other); 9990 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9991 C.VoidPtrTy, ImplicitParamDecl::Other); 9992 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9993 ImplicitParamDecl::Other); 9994 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9995 ImplicitParamDecl::Other); 9996 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9997 ImplicitParamDecl::Other); 9998 FunctionArgList Args; 9999 Args.push_back(&HandleArg); 10000 Args.push_back(&BaseArg); 10001 Args.push_back(&BeginArg); 10002 Args.push_back(&SizeArg); 10003 Args.push_back(&TypeArg); 10004 Args.push_back(&NameArg); 10005 const CGFunctionInfo &FnInfo = 10006 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 10007 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 10008 SmallString<64> TyStr; 10009 llvm::raw_svector_ostream Out(TyStr); 10010 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 10011 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 10012 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 10013 Name, &CGM.getModule()); 10014 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 10015 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 10016 // Start the mapper function code generation. 10017 CodeGenFunction MapperCGF(CGM); 10018 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 10019 // Compute the starting and end addresses of array elements. 10020 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 10021 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 10022 C.getPointerType(Int64Ty), Loc); 10023 // Prepare common arguments for array initiation and deletion. 10024 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 10025 MapperCGF.GetAddrOfLocalVar(&HandleArg), 10026 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10027 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10028 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10029 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10030 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10031 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10032 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10033 // Convert the size in bytes into the number of array elements. 10034 Size = MapperCGF.Builder.CreateExactUDiv( 10035 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10036 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10037 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10038 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP( 10039 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size); 10040 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10041 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10042 C.getPointerType(Int64Ty), Loc); 10043 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10044 MapperCGF.GetAddrOfLocalVar(&NameArg), 10045 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10046 10047 // Emit array initiation if this is an array section and \p MapType indicates 10048 // that memory allocation is required. 10049 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10050 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10051 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10052 10053 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10054 10055 // Emit the loop header block. 10056 MapperCGF.EmitBlock(HeadBB); 10057 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10058 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10059 // Evaluate whether the initial condition is satisfied. 10060 llvm::Value *IsEmpty = 10061 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10062 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10063 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10064 10065 // Emit the loop body block. 10066 MapperCGF.EmitBlock(BodyBB); 10067 llvm::BasicBlock *LastBB = BodyBB; 10068 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10069 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10070 PtrPHI->addIncoming(PtrBegin, EntryBB); 10071 Address PtrCurrent = 10072 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 10073 .getAlignment() 10074 .alignmentOfArrayElement(ElementSize)); 10075 // Privatize the declared variable of mapper to be the current array element. 10076 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10077 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 10078 (void)Scope.Privatize(); 10079 10080 // Get map clause information. Fill up the arrays with all mapped variables. 10081 MappableExprsHandler::MapCombinedInfoTy Info; 10082 MappableExprsHandler MEHandler(*D, MapperCGF); 10083 MEHandler.generateAllInfoForMapper(Info); 10084 10085 // Call the runtime API __tgt_mapper_num_components to get the number of 10086 // pre-existing components. 10087 llvm::Value *OffloadingArgs[] = {Handle}; 10088 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10089 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10090 OMPRTL___tgt_mapper_num_components), 10091 OffloadingArgs); 10092 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10093 PreviousSize, 10094 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10095 10096 // Fill up the runtime mapper handle for all components. 10097 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10098 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10099 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10100 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10101 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10102 llvm::Value *CurSizeArg = Info.Sizes[I]; 10103 llvm::Value *CurNameArg = 10104 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10105 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10106 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10107 10108 // Extract the MEMBER_OF field from the map type. 10109 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10110 llvm::Value *MemberMapType = 10111 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10112 10113 // Combine the map type inherited from user-defined mapper with that 10114 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10115 // bits of the \a MapType, which is the input argument of the mapper 10116 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10117 // bits of MemberMapType. 10118 // [OpenMP 5.0], 1.2.6. map-type decay. 10119 // | alloc | to | from | tofrom | release | delete 10120 // ---------------------------------------------------------- 10121 // alloc | alloc | alloc | alloc | alloc | release | delete 10122 // to | alloc | to | alloc | to | release | delete 10123 // from | alloc | alloc | from | from | release | delete 10124 // tofrom | alloc | to | from | tofrom | release | delete 10125 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10126 MapType, 10127 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10128 MappableExprsHandler::OMP_MAP_FROM)); 10129 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10130 llvm::BasicBlock *AllocElseBB = 10131 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10132 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10133 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10134 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10135 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10136 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10137 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10138 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10139 MapperCGF.EmitBlock(AllocBB); 10140 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10141 MemberMapType, 10142 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10143 MappableExprsHandler::OMP_MAP_FROM))); 10144 MapperCGF.Builder.CreateBr(EndBB); 10145 MapperCGF.EmitBlock(AllocElseBB); 10146 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10147 LeftToFrom, 10148 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10149 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10150 // In case of to, clear OMP_MAP_FROM. 10151 MapperCGF.EmitBlock(ToBB); 10152 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10153 MemberMapType, 10154 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10155 MapperCGF.Builder.CreateBr(EndBB); 10156 MapperCGF.EmitBlock(ToElseBB); 10157 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10158 LeftToFrom, 10159 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10160 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10161 // In case of from, clear OMP_MAP_TO. 10162 MapperCGF.EmitBlock(FromBB); 10163 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10164 MemberMapType, 10165 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10166 // In case of tofrom, do nothing. 10167 MapperCGF.EmitBlock(EndBB); 10168 LastBB = EndBB; 10169 llvm::PHINode *CurMapType = 10170 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10171 CurMapType->addIncoming(AllocMapType, AllocBB); 10172 CurMapType->addIncoming(ToMapType, ToBB); 10173 CurMapType->addIncoming(FromMapType, FromBB); 10174 CurMapType->addIncoming(MemberMapType, ToElseBB); 10175 10176 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10177 CurSizeArg, CurMapType, CurNameArg}; 10178 if (Info.Mappers[I]) { 10179 // Call the corresponding mapper function. 10180 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10181 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10182 assert(MapperFunc && "Expect a valid mapper function is available."); 10183 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10184 } else { 10185 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10186 // data structure. 10187 MapperCGF.EmitRuntimeCall( 10188 OMPBuilder.getOrCreateRuntimeFunction( 10189 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10190 OffloadingArgs); 10191 } 10192 } 10193 10194 // Update the pointer to point to the next element that needs to be mapped, 10195 // and check whether we have mapped all elements. 10196 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType(); 10197 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10198 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10199 PtrPHI->addIncoming(PtrNext, LastBB); 10200 llvm::Value *IsDone = 10201 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10202 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10203 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10204 10205 MapperCGF.EmitBlock(ExitBB); 10206 // Emit array deletion if this is an array section and \p MapType indicates 10207 // that deletion is required. 10208 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10209 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10210 10211 // Emit the function exit block. 10212 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10213 MapperCGF.FinishFunction(); 10214 UDMMap.try_emplace(D, Fn); 10215 if (CGF) { 10216 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10217 Decls.second.push_back(D); 10218 } 10219 } 10220 10221 /// Emit the array initialization or deletion portion for user-defined mapper 10222 /// code generation. First, it evaluates whether an array section is mapped and 10223 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10224 /// true, and \a MapType indicates to not delete this array, array 10225 /// initialization code is generated. If \a IsInit is false, and \a MapType 10226 /// indicates to not this array, array deletion code is generated. 10227 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10228 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10229 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10230 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10231 bool IsInit) { 10232 StringRef Prefix = IsInit ? ".init" : ".del"; 10233 10234 // Evaluate if this is an array section. 10235 llvm::BasicBlock *BodyBB = 10236 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10237 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10238 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10239 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10240 MapType, 10241 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10242 llvm::Value *DeleteCond; 10243 llvm::Value *Cond; 10244 if (IsInit) { 10245 // base != begin? 10246 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 10247 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 10248 // IsPtrAndObj? 10249 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10250 MapType, 10251 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10252 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10253 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10254 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10255 DeleteCond = MapperCGF.Builder.CreateIsNull( 10256 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10257 } else { 10258 Cond = IsArray; 10259 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10260 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10261 } 10262 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10263 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10264 10265 MapperCGF.EmitBlock(BodyBB); 10266 // Get the array size by multiplying element size and element number (i.e., \p 10267 // Size). 10268 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10269 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10270 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10271 // memory allocation/deletion purpose only. 10272 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10273 MapType, 10274 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10275 MappableExprsHandler::OMP_MAP_FROM))); 10276 MapTypeArg = MapperCGF.Builder.CreateOr( 10277 MapTypeArg, 10278 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10279 10280 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10281 // data structure. 10282 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10283 ArraySize, MapTypeArg, MapName}; 10284 MapperCGF.EmitRuntimeCall( 10285 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10286 OMPRTL___tgt_push_mapper_component), 10287 OffloadingArgs); 10288 } 10289 10290 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10291 const OMPDeclareMapperDecl *D) { 10292 auto I = UDMMap.find(D); 10293 if (I != UDMMap.end()) 10294 return I->second; 10295 emitUserDefinedMapper(D); 10296 return UDMMap.lookup(D); 10297 } 10298 10299 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10300 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10301 llvm::Value *DeviceID, 10302 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10303 const OMPLoopDirective &D)> 10304 SizeEmitter) { 10305 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10306 const OMPExecutableDirective *TD = &D; 10307 // Get nested teams distribute kind directive, if any. 10308 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10309 TD = getNestedDistributeDirective(CGM.getContext(), D); 10310 if (!TD) 10311 return; 10312 const auto *LD = cast<OMPLoopDirective>(TD); 10313 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10314 PrePostActionTy &) { 10315 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10316 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10317 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10318 CGF.EmitRuntimeCall( 10319 OMPBuilder.getOrCreateRuntimeFunction( 10320 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10321 Args); 10322 } 10323 }; 10324 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10325 } 10326 10327 void CGOpenMPRuntime::emitTargetCall( 10328 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10329 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10330 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10331 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10332 const OMPLoopDirective &D)> 10333 SizeEmitter) { 10334 if (!CGF.HaveInsertPoint()) 10335 return; 10336 10337 assert(OutlinedFn && "Invalid outlined function!"); 10338 10339 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10340 D.hasClausesOfKind<OMPNowaitClause>(); 10341 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10342 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10343 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10344 PrePostActionTy &) { 10345 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10346 }; 10347 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10348 10349 CodeGenFunction::OMPTargetDataInfo InputInfo; 10350 llvm::Value *MapTypesArray = nullptr; 10351 llvm::Value *MapNamesArray = nullptr; 10352 // Fill up the pointer arrays and transfer execution to the device. 10353 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10354 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10355 &CapturedVars, 10356 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10357 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10358 // Reverse offloading is not supported, so just execute on the host. 10359 if (RequiresOuterTask) { 10360 CapturedVars.clear(); 10361 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10362 } 10363 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10364 return; 10365 } 10366 10367 // On top of the arrays that were filled up, the target offloading call 10368 // takes as arguments the device id as well as the host pointer. The host 10369 // pointer is used by the runtime library to identify the current target 10370 // region, so it only has to be unique and not necessarily point to 10371 // anything. It could be the pointer to the outlined function that 10372 // implements the target region, but we aren't using that so that the 10373 // compiler doesn't need to keep that, and could therefore inline the host 10374 // function if proven worthwhile during optimization. 10375 10376 // From this point on, we need to have an ID of the target region defined. 10377 assert(OutlinedFnID && "Invalid outlined function ID!"); 10378 10379 // Emit device ID if any. 10380 llvm::Value *DeviceID; 10381 if (Device.getPointer()) { 10382 assert((Device.getInt() == OMPC_DEVICE_unknown || 10383 Device.getInt() == OMPC_DEVICE_device_num) && 10384 "Expected device_num modifier."); 10385 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10386 DeviceID = 10387 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10388 } else { 10389 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10390 } 10391 10392 // Emit the number of elements in the offloading arrays. 10393 llvm::Value *PointerNum = 10394 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10395 10396 // Return value of the runtime offloading call. 10397 llvm::Value *Return; 10398 10399 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10400 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10401 10402 // Source location for the ident struct 10403 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10404 10405 // Emit tripcount for the target loop-based directive. 10406 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10407 10408 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10409 // The target region is an outlined function launched by the runtime 10410 // via calls __tgt_target() or __tgt_target_teams(). 10411 // 10412 // __tgt_target() launches a target region with one team and one thread, 10413 // executing a serial region. This master thread may in turn launch 10414 // more threads within its team upon encountering a parallel region, 10415 // however, no additional teams can be launched on the device. 10416 // 10417 // __tgt_target_teams() launches a target region with one or more teams, 10418 // each with one or more threads. This call is required for target 10419 // constructs such as: 10420 // 'target teams' 10421 // 'target' / 'teams' 10422 // 'target teams distribute parallel for' 10423 // 'target parallel' 10424 // and so on. 10425 // 10426 // Note that on the host and CPU targets, the runtime implementation of 10427 // these calls simply call the outlined function without forking threads. 10428 // The outlined functions themselves have runtime calls to 10429 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10430 // the compiler in emitTeamsCall() and emitParallelCall(). 10431 // 10432 // In contrast, on the NVPTX target, the implementation of 10433 // __tgt_target_teams() launches a GPU kernel with the requested number 10434 // of teams and threads so no additional calls to the runtime are required. 10435 if (NumTeams) { 10436 // If we have NumTeams defined this means that we have an enclosed teams 10437 // region. Therefore we also expect to have NumThreads defined. These two 10438 // values should be defined in the presence of a teams directive, 10439 // regardless of having any clauses associated. If the user is using teams 10440 // but no clauses, these two values will be the default that should be 10441 // passed to the runtime library - a 32-bit integer with the value zero. 10442 assert(NumThreads && "Thread limit expression should be available along " 10443 "with number of teams."); 10444 SmallVector<llvm::Value *> OffloadingArgs = { 10445 RTLoc, 10446 DeviceID, 10447 OutlinedFnID, 10448 PointerNum, 10449 InputInfo.BasePointersArray.getPointer(), 10450 InputInfo.PointersArray.getPointer(), 10451 InputInfo.SizesArray.getPointer(), 10452 MapTypesArray, 10453 MapNamesArray, 10454 InputInfo.MappersArray.getPointer(), 10455 NumTeams, 10456 NumThreads}; 10457 if (HasNowait) { 10458 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10459 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10460 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10461 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10462 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10463 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10464 } 10465 Return = CGF.EmitRuntimeCall( 10466 OMPBuilder.getOrCreateRuntimeFunction( 10467 CGM.getModule(), HasNowait 10468 ? OMPRTL___tgt_target_teams_nowait_mapper 10469 : OMPRTL___tgt_target_teams_mapper), 10470 OffloadingArgs); 10471 } else { 10472 SmallVector<llvm::Value *> OffloadingArgs = { 10473 RTLoc, 10474 DeviceID, 10475 OutlinedFnID, 10476 PointerNum, 10477 InputInfo.BasePointersArray.getPointer(), 10478 InputInfo.PointersArray.getPointer(), 10479 InputInfo.SizesArray.getPointer(), 10480 MapTypesArray, 10481 MapNamesArray, 10482 InputInfo.MappersArray.getPointer()}; 10483 if (HasNowait) { 10484 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10485 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10486 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10487 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10488 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10489 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10490 } 10491 Return = CGF.EmitRuntimeCall( 10492 OMPBuilder.getOrCreateRuntimeFunction( 10493 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10494 : OMPRTL___tgt_target_mapper), 10495 OffloadingArgs); 10496 } 10497 10498 // Check the error code and execute the host version if required. 10499 llvm::BasicBlock *OffloadFailedBlock = 10500 CGF.createBasicBlock("omp_offload.failed"); 10501 llvm::BasicBlock *OffloadContBlock = 10502 CGF.createBasicBlock("omp_offload.cont"); 10503 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10504 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10505 10506 CGF.EmitBlock(OffloadFailedBlock); 10507 if (RequiresOuterTask) { 10508 CapturedVars.clear(); 10509 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10510 } 10511 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10512 CGF.EmitBranch(OffloadContBlock); 10513 10514 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10515 }; 10516 10517 // Notify that the host version must be executed. 10518 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10519 RequiresOuterTask](CodeGenFunction &CGF, 10520 PrePostActionTy &) { 10521 if (RequiresOuterTask) { 10522 CapturedVars.clear(); 10523 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10524 } 10525 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10526 }; 10527 10528 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10529 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10530 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10531 // Fill up the arrays with all the captured variables. 10532 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10533 10534 // Get mappable expression information. 10535 MappableExprsHandler MEHandler(D, CGF); 10536 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10537 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10538 10539 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10540 auto *CV = CapturedVars.begin(); 10541 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10542 CE = CS.capture_end(); 10543 CI != CE; ++CI, ++RI, ++CV) { 10544 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10545 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10546 10547 // VLA sizes are passed to the outlined region by copy and do not have map 10548 // information associated. 10549 if (CI->capturesVariableArrayType()) { 10550 CurInfo.Exprs.push_back(nullptr); 10551 CurInfo.BasePointers.push_back(*CV); 10552 CurInfo.Pointers.push_back(*CV); 10553 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10554 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10555 // Copy to the device as an argument. No need to retrieve it. 10556 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10557 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10558 MappableExprsHandler::OMP_MAP_IMPLICIT); 10559 CurInfo.Mappers.push_back(nullptr); 10560 } else { 10561 // If we have any information in the map clause, we use it, otherwise we 10562 // just do a default mapping. 10563 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10564 if (!CI->capturesThis()) 10565 MappedVarSet.insert(CI->getCapturedVar()); 10566 else 10567 MappedVarSet.insert(nullptr); 10568 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10569 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10570 // Generate correct mapping for variables captured by reference in 10571 // lambdas. 10572 if (CI->capturesVariable()) 10573 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10574 CurInfo, LambdaPointers); 10575 } 10576 // We expect to have at least an element of information for this capture. 10577 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10578 "Non-existing map pointer for capture!"); 10579 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10580 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10581 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10582 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10583 "Inconsistent map information sizes!"); 10584 10585 // If there is an entry in PartialStruct it means we have a struct with 10586 // individual members mapped. Emit an extra combined entry. 10587 if (PartialStruct.Base.isValid()) { 10588 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10589 MEHandler.emitCombinedEntry( 10590 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10591 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10592 } 10593 10594 // We need to append the results of this capture to what we already have. 10595 CombinedInfo.append(CurInfo); 10596 } 10597 // Adjust MEMBER_OF flags for the lambdas captures. 10598 MEHandler.adjustMemberOfForLambdaCaptures( 10599 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10600 CombinedInfo.Types); 10601 // Map any list items in a map clause that were not captures because they 10602 // weren't referenced within the construct. 10603 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10604 10605 TargetDataInfo Info; 10606 // Fill up the arrays and create the arguments. 10607 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10608 emitOffloadingArraysArgument( 10609 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10610 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10611 {/*ForEndTask=*/false}); 10612 10613 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10614 InputInfo.BasePointersArray = 10615 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10616 InputInfo.PointersArray = 10617 Address(Info.PointersArray, CGM.getPointerAlign()); 10618 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10619 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10620 MapTypesArray = Info.MapTypesArray; 10621 MapNamesArray = Info.MapNamesArray; 10622 if (RequiresOuterTask) 10623 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10624 else 10625 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10626 }; 10627 10628 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10629 CodeGenFunction &CGF, PrePostActionTy &) { 10630 if (RequiresOuterTask) { 10631 CodeGenFunction::OMPTargetDataInfo InputInfo; 10632 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10633 } else { 10634 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10635 } 10636 }; 10637 10638 // If we have a target function ID it means that we need to support 10639 // offloading, otherwise, just execute on the host. We need to execute on host 10640 // regardless of the conditional in the if clause if, e.g., the user do not 10641 // specify target triples. 10642 if (OutlinedFnID) { 10643 if (IfCond) { 10644 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10645 } else { 10646 RegionCodeGenTy ThenRCG(TargetThenGen); 10647 ThenRCG(CGF); 10648 } 10649 } else { 10650 RegionCodeGenTy ElseRCG(TargetElseGen); 10651 ElseRCG(CGF); 10652 } 10653 } 10654 10655 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10656 StringRef ParentName) { 10657 if (!S) 10658 return; 10659 10660 // Codegen OMP target directives that offload compute to the device. 10661 bool RequiresDeviceCodegen = 10662 isa<OMPExecutableDirective>(S) && 10663 isOpenMPTargetExecutionDirective( 10664 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10665 10666 if (RequiresDeviceCodegen) { 10667 const auto &E = *cast<OMPExecutableDirective>(S); 10668 unsigned DeviceID; 10669 unsigned FileID; 10670 unsigned Line; 10671 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10672 FileID, Line); 10673 10674 // Is this a target region that should not be emitted as an entry point? If 10675 // so just signal we are done with this target region. 10676 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10677 ParentName, Line)) 10678 return; 10679 10680 switch (E.getDirectiveKind()) { 10681 case OMPD_target: 10682 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10683 cast<OMPTargetDirective>(E)); 10684 break; 10685 case OMPD_target_parallel: 10686 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10687 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10688 break; 10689 case OMPD_target_teams: 10690 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10691 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10692 break; 10693 case OMPD_target_teams_distribute: 10694 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10695 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10696 break; 10697 case OMPD_target_teams_distribute_simd: 10698 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10699 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10700 break; 10701 case OMPD_target_parallel_for: 10702 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10703 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10704 break; 10705 case OMPD_target_parallel_for_simd: 10706 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10707 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10708 break; 10709 case OMPD_target_simd: 10710 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10711 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10712 break; 10713 case OMPD_target_teams_distribute_parallel_for: 10714 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10715 CGM, ParentName, 10716 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10717 break; 10718 case OMPD_target_teams_distribute_parallel_for_simd: 10719 CodeGenFunction:: 10720 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10721 CGM, ParentName, 10722 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10723 break; 10724 case OMPD_parallel: 10725 case OMPD_for: 10726 case OMPD_parallel_for: 10727 case OMPD_parallel_master: 10728 case OMPD_parallel_sections: 10729 case OMPD_for_simd: 10730 case OMPD_parallel_for_simd: 10731 case OMPD_cancel: 10732 case OMPD_cancellation_point: 10733 case OMPD_ordered: 10734 case OMPD_threadprivate: 10735 case OMPD_allocate: 10736 case OMPD_task: 10737 case OMPD_simd: 10738 case OMPD_tile: 10739 case OMPD_unroll: 10740 case OMPD_sections: 10741 case OMPD_section: 10742 case OMPD_single: 10743 case OMPD_master: 10744 case OMPD_critical: 10745 case OMPD_taskyield: 10746 case OMPD_barrier: 10747 case OMPD_taskwait: 10748 case OMPD_taskgroup: 10749 case OMPD_atomic: 10750 case OMPD_flush: 10751 case OMPD_depobj: 10752 case OMPD_scan: 10753 case OMPD_teams: 10754 case OMPD_target_data: 10755 case OMPD_target_exit_data: 10756 case OMPD_target_enter_data: 10757 case OMPD_distribute: 10758 case OMPD_distribute_simd: 10759 case OMPD_distribute_parallel_for: 10760 case OMPD_distribute_parallel_for_simd: 10761 case OMPD_teams_distribute: 10762 case OMPD_teams_distribute_simd: 10763 case OMPD_teams_distribute_parallel_for: 10764 case OMPD_teams_distribute_parallel_for_simd: 10765 case OMPD_target_update: 10766 case OMPD_declare_simd: 10767 case OMPD_declare_variant: 10768 case OMPD_begin_declare_variant: 10769 case OMPD_end_declare_variant: 10770 case OMPD_declare_target: 10771 case OMPD_end_declare_target: 10772 case OMPD_declare_reduction: 10773 case OMPD_declare_mapper: 10774 case OMPD_taskloop: 10775 case OMPD_taskloop_simd: 10776 case OMPD_master_taskloop: 10777 case OMPD_master_taskloop_simd: 10778 case OMPD_parallel_master_taskloop: 10779 case OMPD_parallel_master_taskloop_simd: 10780 case OMPD_requires: 10781 case OMPD_metadirective: 10782 case OMPD_unknown: 10783 default: 10784 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10785 } 10786 return; 10787 } 10788 10789 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10790 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10791 return; 10792 10793 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10794 return; 10795 } 10796 10797 // If this is a lambda function, look into its body. 10798 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10799 S = L->getBody(); 10800 10801 // Keep looking for target regions recursively. 10802 for (const Stmt *II : S->children()) 10803 scanForTargetRegionsFunctions(II, ParentName); 10804 } 10805 10806 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10807 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10808 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10809 if (!DevTy) 10810 return false; 10811 // Do not emit device_type(nohost) functions for the host. 10812 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10813 return true; 10814 // Do not emit device_type(host) functions for the device. 10815 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10816 return true; 10817 return false; 10818 } 10819 10820 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10821 // If emitting code for the host, we do not process FD here. Instead we do 10822 // the normal code generation. 10823 if (!CGM.getLangOpts().OpenMPIsDevice) { 10824 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10825 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10826 CGM.getLangOpts().OpenMPIsDevice)) 10827 return true; 10828 return false; 10829 } 10830 10831 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10832 // Try to detect target regions in the function. 10833 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10834 StringRef Name = CGM.getMangledName(GD); 10835 scanForTargetRegionsFunctions(FD->getBody(), Name); 10836 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10837 CGM.getLangOpts().OpenMPIsDevice)) 10838 return true; 10839 } 10840 10841 // Do not to emit function if it is not marked as declare target. 10842 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10843 AlreadyEmittedTargetDecls.count(VD) == 0; 10844 } 10845 10846 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10847 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10848 CGM.getLangOpts().OpenMPIsDevice)) 10849 return true; 10850 10851 if (!CGM.getLangOpts().OpenMPIsDevice) 10852 return false; 10853 10854 // Check if there are Ctors/Dtors in this declaration and look for target 10855 // regions in it. We use the complete variant to produce the kernel name 10856 // mangling. 10857 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10858 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10859 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10860 StringRef ParentName = 10861 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10862 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10863 } 10864 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10865 StringRef ParentName = 10866 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10867 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10868 } 10869 } 10870 10871 // Do not to emit variable if it is not marked as declare target. 10872 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10873 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10874 cast<VarDecl>(GD.getDecl())); 10875 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10876 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10877 HasRequiresUnifiedSharedMemory)) { 10878 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10879 return true; 10880 } 10881 return false; 10882 } 10883 10884 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10885 llvm::Constant *Addr) { 10886 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10887 !CGM.getLangOpts().OpenMPIsDevice) 10888 return; 10889 10890 // If we have host/nohost variables, they do not need to be registered. 10891 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10892 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10893 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10894 return; 10895 10896 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10897 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10898 if (!Res) { 10899 if (CGM.getLangOpts().OpenMPIsDevice) { 10900 // Register non-target variables being emitted in device code (debug info 10901 // may cause this). 10902 StringRef VarName = CGM.getMangledName(VD); 10903 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10904 } 10905 return; 10906 } 10907 // Register declare target variables. 10908 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10909 StringRef VarName; 10910 CharUnits VarSize; 10911 llvm::GlobalValue::LinkageTypes Linkage; 10912 10913 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10914 !HasRequiresUnifiedSharedMemory) { 10915 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10916 VarName = CGM.getMangledName(VD); 10917 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10918 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10919 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10920 } else { 10921 VarSize = CharUnits::Zero(); 10922 } 10923 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10924 // Temp solution to prevent optimizations of the internal variables. 10925 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10926 // Do not create a "ref-variable" if the original is not also available 10927 // on the host. 10928 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10929 return; 10930 std::string RefName = getName({VarName, "ref"}); 10931 if (!CGM.GetGlobalValue(RefName)) { 10932 llvm::Constant *AddrRef = 10933 getOrCreateInternalVariable(Addr->getType(), RefName); 10934 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10935 GVAddrRef->setConstant(/*Val=*/true); 10936 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10937 GVAddrRef->setInitializer(Addr); 10938 CGM.addCompilerUsedGlobal(GVAddrRef); 10939 } 10940 } 10941 } else { 10942 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10943 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10944 HasRequiresUnifiedSharedMemory)) && 10945 "Declare target attribute must link or to with unified memory."); 10946 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10947 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10948 else 10949 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10950 10951 if (CGM.getLangOpts().OpenMPIsDevice) { 10952 VarName = Addr->getName(); 10953 Addr = nullptr; 10954 } else { 10955 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10956 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10957 } 10958 VarSize = CGM.getPointerSize(); 10959 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10960 } 10961 10962 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10963 VarName, Addr, VarSize, Flags, Linkage); 10964 } 10965 10966 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10967 if (isa<FunctionDecl>(GD.getDecl()) || 10968 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10969 return emitTargetFunctions(GD); 10970 10971 return emitTargetGlobalVariable(GD); 10972 } 10973 10974 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10975 for (const VarDecl *VD : DeferredGlobalVariables) { 10976 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10977 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10978 if (!Res) 10979 continue; 10980 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10981 !HasRequiresUnifiedSharedMemory) { 10982 CGM.EmitGlobal(VD); 10983 } else { 10984 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10985 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10986 HasRequiresUnifiedSharedMemory)) && 10987 "Expected link clause or to clause with unified memory."); 10988 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10989 } 10990 } 10991 } 10992 10993 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10994 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10995 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10996 " Expected target-based directive."); 10997 } 10998 10999 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 11000 for (const OMPClause *Clause : D->clauselists()) { 11001 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 11002 HasRequiresUnifiedSharedMemory = true; 11003 } else if (const auto *AC = 11004 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 11005 switch (AC->getAtomicDefaultMemOrderKind()) { 11006 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 11007 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 11008 break; 11009 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 11010 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 11011 break; 11012 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 11013 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 11014 break; 11015 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 11016 break; 11017 } 11018 } 11019 } 11020 } 11021 11022 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 11023 return RequiresAtomicOrdering; 11024 } 11025 11026 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11027 LangAS &AS) { 11028 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11029 return false; 11030 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11031 switch(A->getAllocatorType()) { 11032 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11033 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11034 // Not supported, fallback to the default mem space. 11035 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11036 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11037 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11038 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11039 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11040 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11041 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11042 AS = LangAS::Default; 11043 return true; 11044 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11045 llvm_unreachable("Expected predefined allocator for the variables with the " 11046 "static storage."); 11047 } 11048 return false; 11049 } 11050 11051 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11052 return HasRequiresUnifiedSharedMemory; 11053 } 11054 11055 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11056 CodeGenModule &CGM) 11057 : CGM(CGM) { 11058 if (CGM.getLangOpts().OpenMPIsDevice) { 11059 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11060 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11061 } 11062 } 11063 11064 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11065 if (CGM.getLangOpts().OpenMPIsDevice) 11066 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11067 } 11068 11069 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11070 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11071 return true; 11072 11073 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11074 // Do not to emit function if it is marked as declare target as it was already 11075 // emitted. 11076 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11077 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11078 if (auto *F = dyn_cast_or_null<llvm::Function>( 11079 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11080 return !F->isDeclaration(); 11081 return false; 11082 } 11083 return true; 11084 } 11085 11086 return !AlreadyEmittedTargetDecls.insert(D).second; 11087 } 11088 11089 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11090 // If we don't have entries or if we are emitting code for the device, we 11091 // don't need to do anything. 11092 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11093 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11094 (OffloadEntriesInfoManager.empty() && 11095 !HasEmittedDeclareTargetRegion && 11096 !HasEmittedTargetRegion)) 11097 return nullptr; 11098 11099 // Create and register the function that handles the requires directives. 11100 ASTContext &C = CGM.getContext(); 11101 11102 llvm::Function *RequiresRegFn; 11103 { 11104 CodeGenFunction CGF(CGM); 11105 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11106 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11107 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11108 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11109 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11110 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11111 // TODO: check for other requires clauses. 11112 // The requires directive takes effect only when a target region is 11113 // present in the compilation unit. Otherwise it is ignored and not 11114 // passed to the runtime. This avoids the runtime from throwing an error 11115 // for mismatching requires clauses across compilation units that don't 11116 // contain at least 1 target region. 11117 assert((HasEmittedTargetRegion || 11118 HasEmittedDeclareTargetRegion || 11119 !OffloadEntriesInfoManager.empty()) && 11120 "Target or declare target region expected."); 11121 if (HasRequiresUnifiedSharedMemory) 11122 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11123 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11124 CGM.getModule(), OMPRTL___tgt_register_requires), 11125 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11126 CGF.FinishFunction(); 11127 } 11128 return RequiresRegFn; 11129 } 11130 11131 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11132 const OMPExecutableDirective &D, 11133 SourceLocation Loc, 11134 llvm::Function *OutlinedFn, 11135 ArrayRef<llvm::Value *> CapturedVars) { 11136 if (!CGF.HaveInsertPoint()) 11137 return; 11138 11139 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11140 CodeGenFunction::RunCleanupsScope Scope(CGF); 11141 11142 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11143 llvm::Value *Args[] = { 11144 RTLoc, 11145 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11146 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11147 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11148 RealArgs.append(std::begin(Args), std::end(Args)); 11149 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11150 11151 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11152 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11153 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11154 } 11155 11156 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11157 const Expr *NumTeams, 11158 const Expr *ThreadLimit, 11159 SourceLocation Loc) { 11160 if (!CGF.HaveInsertPoint()) 11161 return; 11162 11163 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11164 11165 llvm::Value *NumTeamsVal = 11166 NumTeams 11167 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11168 CGF.CGM.Int32Ty, /* isSigned = */ true) 11169 : CGF.Builder.getInt32(0); 11170 11171 llvm::Value *ThreadLimitVal = 11172 ThreadLimit 11173 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11174 CGF.CGM.Int32Ty, /* isSigned = */ true) 11175 : CGF.Builder.getInt32(0); 11176 11177 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11178 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11179 ThreadLimitVal}; 11180 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11181 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11182 PushNumTeamsArgs); 11183 } 11184 11185 void CGOpenMPRuntime::emitTargetDataCalls( 11186 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11187 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11188 if (!CGF.HaveInsertPoint()) 11189 return; 11190 11191 // Action used to replace the default codegen action and turn privatization 11192 // off. 11193 PrePostActionTy NoPrivAction; 11194 11195 // Generate the code for the opening of the data environment. Capture all the 11196 // arguments of the runtime call by reference because they are used in the 11197 // closing of the region. 11198 auto &&BeginThenGen = [this, &D, Device, &Info, 11199 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11200 // Fill up the arrays with all the mapped variables. 11201 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11202 11203 // Get map clause information. 11204 MappableExprsHandler MEHandler(D, CGF); 11205 MEHandler.generateAllInfo(CombinedInfo); 11206 11207 // Fill up the arrays and create the arguments. 11208 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11209 /*IsNonContiguous=*/true); 11210 11211 llvm::Value *BasePointersArrayArg = nullptr; 11212 llvm::Value *PointersArrayArg = nullptr; 11213 llvm::Value *SizesArrayArg = nullptr; 11214 llvm::Value *MapTypesArrayArg = nullptr; 11215 llvm::Value *MapNamesArrayArg = nullptr; 11216 llvm::Value *MappersArrayArg = nullptr; 11217 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11218 SizesArrayArg, MapTypesArrayArg, 11219 MapNamesArrayArg, MappersArrayArg, Info); 11220 11221 // Emit device ID if any. 11222 llvm::Value *DeviceID = nullptr; 11223 if (Device) { 11224 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11225 CGF.Int64Ty, /*isSigned=*/true); 11226 } else { 11227 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11228 } 11229 11230 // Emit the number of elements in the offloading arrays. 11231 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11232 // 11233 // Source location for the ident struct 11234 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11235 11236 llvm::Value *OffloadingArgs[] = {RTLoc, 11237 DeviceID, 11238 PointerNum, 11239 BasePointersArrayArg, 11240 PointersArrayArg, 11241 SizesArrayArg, 11242 MapTypesArrayArg, 11243 MapNamesArrayArg, 11244 MappersArrayArg}; 11245 CGF.EmitRuntimeCall( 11246 OMPBuilder.getOrCreateRuntimeFunction( 11247 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11248 OffloadingArgs); 11249 11250 // If device pointer privatization is required, emit the body of the region 11251 // here. It will have to be duplicated: with and without privatization. 11252 if (!Info.CaptureDeviceAddrMap.empty()) 11253 CodeGen(CGF); 11254 }; 11255 11256 // Generate code for the closing of the data region. 11257 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11258 PrePostActionTy &) { 11259 assert(Info.isValid() && "Invalid data environment closing arguments."); 11260 11261 llvm::Value *BasePointersArrayArg = nullptr; 11262 llvm::Value *PointersArrayArg = nullptr; 11263 llvm::Value *SizesArrayArg = nullptr; 11264 llvm::Value *MapTypesArrayArg = nullptr; 11265 llvm::Value *MapNamesArrayArg = nullptr; 11266 llvm::Value *MappersArrayArg = nullptr; 11267 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11268 SizesArrayArg, MapTypesArrayArg, 11269 MapNamesArrayArg, MappersArrayArg, Info, 11270 {/*ForEndCall=*/true}); 11271 11272 // Emit device ID if any. 11273 llvm::Value *DeviceID = nullptr; 11274 if (Device) { 11275 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11276 CGF.Int64Ty, /*isSigned=*/true); 11277 } else { 11278 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11279 } 11280 11281 // Emit the number of elements in the offloading arrays. 11282 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11283 11284 // Source location for the ident struct 11285 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11286 11287 llvm::Value *OffloadingArgs[] = {RTLoc, 11288 DeviceID, 11289 PointerNum, 11290 BasePointersArrayArg, 11291 PointersArrayArg, 11292 SizesArrayArg, 11293 MapTypesArrayArg, 11294 MapNamesArrayArg, 11295 MappersArrayArg}; 11296 CGF.EmitRuntimeCall( 11297 OMPBuilder.getOrCreateRuntimeFunction( 11298 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11299 OffloadingArgs); 11300 }; 11301 11302 // If we need device pointer privatization, we need to emit the body of the 11303 // region with no privatization in the 'else' branch of the conditional. 11304 // Otherwise, we don't have to do anything. 11305 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11306 PrePostActionTy &) { 11307 if (!Info.CaptureDeviceAddrMap.empty()) { 11308 CodeGen.setAction(NoPrivAction); 11309 CodeGen(CGF); 11310 } 11311 }; 11312 11313 // We don't have to do anything to close the region if the if clause evaluates 11314 // to false. 11315 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11316 11317 if (IfCond) { 11318 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11319 } else { 11320 RegionCodeGenTy RCG(BeginThenGen); 11321 RCG(CGF); 11322 } 11323 11324 // If we don't require privatization of device pointers, we emit the body in 11325 // between the runtime calls. This avoids duplicating the body code. 11326 if (Info.CaptureDeviceAddrMap.empty()) { 11327 CodeGen.setAction(NoPrivAction); 11328 CodeGen(CGF); 11329 } 11330 11331 if (IfCond) { 11332 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11333 } else { 11334 RegionCodeGenTy RCG(EndThenGen); 11335 RCG(CGF); 11336 } 11337 } 11338 11339 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11340 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11341 const Expr *Device) { 11342 if (!CGF.HaveInsertPoint()) 11343 return; 11344 11345 assert((isa<OMPTargetEnterDataDirective>(D) || 11346 isa<OMPTargetExitDataDirective>(D) || 11347 isa<OMPTargetUpdateDirective>(D)) && 11348 "Expecting either target enter, exit data, or update directives."); 11349 11350 CodeGenFunction::OMPTargetDataInfo InputInfo; 11351 llvm::Value *MapTypesArray = nullptr; 11352 llvm::Value *MapNamesArray = nullptr; 11353 // Generate the code for the opening of the data environment. 11354 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11355 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11356 // Emit device ID if any. 11357 llvm::Value *DeviceID = nullptr; 11358 if (Device) { 11359 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11360 CGF.Int64Ty, /*isSigned=*/true); 11361 } else { 11362 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11363 } 11364 11365 // Emit the number of elements in the offloading arrays. 11366 llvm::Constant *PointerNum = 11367 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11368 11369 // Source location for the ident struct 11370 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11371 11372 llvm::Value *OffloadingArgs[] = {RTLoc, 11373 DeviceID, 11374 PointerNum, 11375 InputInfo.BasePointersArray.getPointer(), 11376 InputInfo.PointersArray.getPointer(), 11377 InputInfo.SizesArray.getPointer(), 11378 MapTypesArray, 11379 MapNamesArray, 11380 InputInfo.MappersArray.getPointer()}; 11381 11382 // Select the right runtime function call for each standalone 11383 // directive. 11384 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11385 RuntimeFunction RTLFn; 11386 switch (D.getDirectiveKind()) { 11387 case OMPD_target_enter_data: 11388 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11389 : OMPRTL___tgt_target_data_begin_mapper; 11390 break; 11391 case OMPD_target_exit_data: 11392 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11393 : OMPRTL___tgt_target_data_end_mapper; 11394 break; 11395 case OMPD_target_update: 11396 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11397 : OMPRTL___tgt_target_data_update_mapper; 11398 break; 11399 case OMPD_parallel: 11400 case OMPD_for: 11401 case OMPD_parallel_for: 11402 case OMPD_parallel_master: 11403 case OMPD_parallel_sections: 11404 case OMPD_for_simd: 11405 case OMPD_parallel_for_simd: 11406 case OMPD_cancel: 11407 case OMPD_cancellation_point: 11408 case OMPD_ordered: 11409 case OMPD_threadprivate: 11410 case OMPD_allocate: 11411 case OMPD_task: 11412 case OMPD_simd: 11413 case OMPD_tile: 11414 case OMPD_unroll: 11415 case OMPD_sections: 11416 case OMPD_section: 11417 case OMPD_single: 11418 case OMPD_master: 11419 case OMPD_critical: 11420 case OMPD_taskyield: 11421 case OMPD_barrier: 11422 case OMPD_taskwait: 11423 case OMPD_taskgroup: 11424 case OMPD_atomic: 11425 case OMPD_flush: 11426 case OMPD_depobj: 11427 case OMPD_scan: 11428 case OMPD_teams: 11429 case OMPD_target_data: 11430 case OMPD_distribute: 11431 case OMPD_distribute_simd: 11432 case OMPD_distribute_parallel_for: 11433 case OMPD_distribute_parallel_for_simd: 11434 case OMPD_teams_distribute: 11435 case OMPD_teams_distribute_simd: 11436 case OMPD_teams_distribute_parallel_for: 11437 case OMPD_teams_distribute_parallel_for_simd: 11438 case OMPD_declare_simd: 11439 case OMPD_declare_variant: 11440 case OMPD_begin_declare_variant: 11441 case OMPD_end_declare_variant: 11442 case OMPD_declare_target: 11443 case OMPD_end_declare_target: 11444 case OMPD_declare_reduction: 11445 case OMPD_declare_mapper: 11446 case OMPD_taskloop: 11447 case OMPD_taskloop_simd: 11448 case OMPD_master_taskloop: 11449 case OMPD_master_taskloop_simd: 11450 case OMPD_parallel_master_taskloop: 11451 case OMPD_parallel_master_taskloop_simd: 11452 case OMPD_target: 11453 case OMPD_target_simd: 11454 case OMPD_target_teams_distribute: 11455 case OMPD_target_teams_distribute_simd: 11456 case OMPD_target_teams_distribute_parallel_for: 11457 case OMPD_target_teams_distribute_parallel_for_simd: 11458 case OMPD_target_teams: 11459 case OMPD_target_parallel: 11460 case OMPD_target_parallel_for: 11461 case OMPD_target_parallel_for_simd: 11462 case OMPD_requires: 11463 case OMPD_metadirective: 11464 case OMPD_unknown: 11465 default: 11466 llvm_unreachable("Unexpected standalone target data directive."); 11467 break; 11468 } 11469 CGF.EmitRuntimeCall( 11470 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11471 OffloadingArgs); 11472 }; 11473 11474 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11475 &MapNamesArray](CodeGenFunction &CGF, 11476 PrePostActionTy &) { 11477 // Fill up the arrays with all the mapped variables. 11478 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11479 11480 // Get map clause information. 11481 MappableExprsHandler MEHandler(D, CGF); 11482 MEHandler.generateAllInfo(CombinedInfo); 11483 11484 TargetDataInfo Info; 11485 // Fill up the arrays and create the arguments. 11486 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11487 /*IsNonContiguous=*/true); 11488 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11489 D.hasClausesOfKind<OMPNowaitClause>(); 11490 emitOffloadingArraysArgument( 11491 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11492 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11493 {/*ForEndTask=*/false}); 11494 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11495 InputInfo.BasePointersArray = 11496 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11497 InputInfo.PointersArray = 11498 Address(Info.PointersArray, CGM.getPointerAlign()); 11499 InputInfo.SizesArray = 11500 Address(Info.SizesArray, CGM.getPointerAlign()); 11501 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11502 MapTypesArray = Info.MapTypesArray; 11503 MapNamesArray = Info.MapNamesArray; 11504 if (RequiresOuterTask) 11505 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11506 else 11507 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11508 }; 11509 11510 if (IfCond) { 11511 emitIfClause(CGF, IfCond, TargetThenGen, 11512 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11513 } else { 11514 RegionCodeGenTy ThenRCG(TargetThenGen); 11515 ThenRCG(CGF); 11516 } 11517 } 11518 11519 namespace { 11520 /// Kind of parameter in a function with 'declare simd' directive. 11521 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11522 /// Attribute set of the parameter. 11523 struct ParamAttrTy { 11524 ParamKindTy Kind = Vector; 11525 llvm::APSInt StrideOrArg; 11526 llvm::APSInt Alignment; 11527 }; 11528 } // namespace 11529 11530 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11531 ArrayRef<ParamAttrTy> ParamAttrs) { 11532 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11533 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11534 // of that clause. The VLEN value must be power of 2. 11535 // In other case the notion of the function`s "characteristic data type" (CDT) 11536 // is used to compute the vector length. 11537 // CDT is defined in the following order: 11538 // a) For non-void function, the CDT is the return type. 11539 // b) If the function has any non-uniform, non-linear parameters, then the 11540 // CDT is the type of the first such parameter. 11541 // c) If the CDT determined by a) or b) above is struct, union, or class 11542 // type which is pass-by-value (except for the type that maps to the 11543 // built-in complex data type), the characteristic data type is int. 11544 // d) If none of the above three cases is applicable, the CDT is int. 11545 // The VLEN is then determined based on the CDT and the size of vector 11546 // register of that ISA for which current vector version is generated. The 11547 // VLEN is computed using the formula below: 11548 // VLEN = sizeof(vector_register) / sizeof(CDT), 11549 // where vector register size specified in section 3.2.1 Registers and the 11550 // Stack Frame of original AMD64 ABI document. 11551 QualType RetType = FD->getReturnType(); 11552 if (RetType.isNull()) 11553 return 0; 11554 ASTContext &C = FD->getASTContext(); 11555 QualType CDT; 11556 if (!RetType.isNull() && !RetType->isVoidType()) { 11557 CDT = RetType; 11558 } else { 11559 unsigned Offset = 0; 11560 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11561 if (ParamAttrs[Offset].Kind == Vector) 11562 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11563 ++Offset; 11564 } 11565 if (CDT.isNull()) { 11566 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11567 if (ParamAttrs[I + Offset].Kind == Vector) { 11568 CDT = FD->getParamDecl(I)->getType(); 11569 break; 11570 } 11571 } 11572 } 11573 } 11574 if (CDT.isNull()) 11575 CDT = C.IntTy; 11576 CDT = CDT->getCanonicalTypeUnqualified(); 11577 if (CDT->isRecordType() || CDT->isUnionType()) 11578 CDT = C.IntTy; 11579 return C.getTypeSize(CDT); 11580 } 11581 11582 static void 11583 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11584 const llvm::APSInt &VLENVal, 11585 ArrayRef<ParamAttrTy> ParamAttrs, 11586 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11587 struct ISADataTy { 11588 char ISA; 11589 unsigned VecRegSize; 11590 }; 11591 ISADataTy ISAData[] = { 11592 { 11593 'b', 128 11594 }, // SSE 11595 { 11596 'c', 256 11597 }, // AVX 11598 { 11599 'd', 256 11600 }, // AVX2 11601 { 11602 'e', 512 11603 }, // AVX512 11604 }; 11605 llvm::SmallVector<char, 2> Masked; 11606 switch (State) { 11607 case OMPDeclareSimdDeclAttr::BS_Undefined: 11608 Masked.push_back('N'); 11609 Masked.push_back('M'); 11610 break; 11611 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11612 Masked.push_back('N'); 11613 break; 11614 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11615 Masked.push_back('M'); 11616 break; 11617 } 11618 for (char Mask : Masked) { 11619 for (const ISADataTy &Data : ISAData) { 11620 SmallString<256> Buffer; 11621 llvm::raw_svector_ostream Out(Buffer); 11622 Out << "_ZGV" << Data.ISA << Mask; 11623 if (!VLENVal) { 11624 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11625 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11626 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11627 } else { 11628 Out << VLENVal; 11629 } 11630 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11631 switch (ParamAttr.Kind){ 11632 case LinearWithVarStride: 11633 Out << 's' << ParamAttr.StrideOrArg; 11634 break; 11635 case Linear: 11636 Out << 'l'; 11637 if (ParamAttr.StrideOrArg != 1) 11638 Out << ParamAttr.StrideOrArg; 11639 break; 11640 case Uniform: 11641 Out << 'u'; 11642 break; 11643 case Vector: 11644 Out << 'v'; 11645 break; 11646 } 11647 if (!!ParamAttr.Alignment) 11648 Out << 'a' << ParamAttr.Alignment; 11649 } 11650 Out << '_' << Fn->getName(); 11651 Fn->addFnAttr(Out.str()); 11652 } 11653 } 11654 } 11655 11656 // This are the Functions that are needed to mangle the name of the 11657 // vector functions generated by the compiler, according to the rules 11658 // defined in the "Vector Function ABI specifications for AArch64", 11659 // available at 11660 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11661 11662 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11663 /// 11664 /// TODO: Need to implement the behavior for reference marked with a 11665 /// var or no linear modifiers (1.b in the section). For this, we 11666 /// need to extend ParamKindTy to support the linear modifiers. 11667 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11668 QT = QT.getCanonicalType(); 11669 11670 if (QT->isVoidType()) 11671 return false; 11672 11673 if (Kind == ParamKindTy::Uniform) 11674 return false; 11675 11676 if (Kind == ParamKindTy::Linear) 11677 return false; 11678 11679 // TODO: Handle linear references with modifiers 11680 11681 if (Kind == ParamKindTy::LinearWithVarStride) 11682 return false; 11683 11684 return true; 11685 } 11686 11687 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11688 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11689 QT = QT.getCanonicalType(); 11690 unsigned Size = C.getTypeSize(QT); 11691 11692 // Only scalars and complex within 16 bytes wide set PVB to true. 11693 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11694 return false; 11695 11696 if (QT->isFloatingType()) 11697 return true; 11698 11699 if (QT->isIntegerType()) 11700 return true; 11701 11702 if (QT->isPointerType()) 11703 return true; 11704 11705 // TODO: Add support for complex types (section 3.1.2, item 2). 11706 11707 return false; 11708 } 11709 11710 /// Computes the lane size (LS) of a return type or of an input parameter, 11711 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11712 /// TODO: Add support for references, section 3.2.1, item 1. 11713 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11714 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11715 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11716 if (getAArch64PBV(PTy, C)) 11717 return C.getTypeSize(PTy); 11718 } 11719 if (getAArch64PBV(QT, C)) 11720 return C.getTypeSize(QT); 11721 11722 return C.getTypeSize(C.getUIntPtrType()); 11723 } 11724 11725 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11726 // signature of the scalar function, as defined in 3.2.2 of the 11727 // AAVFABI. 11728 static std::tuple<unsigned, unsigned, bool> 11729 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11730 QualType RetType = FD->getReturnType().getCanonicalType(); 11731 11732 ASTContext &C = FD->getASTContext(); 11733 11734 bool OutputBecomesInput = false; 11735 11736 llvm::SmallVector<unsigned, 8> Sizes; 11737 if (!RetType->isVoidType()) { 11738 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11739 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11740 OutputBecomesInput = true; 11741 } 11742 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11743 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11744 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11745 } 11746 11747 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11748 // The LS of a function parameter / return value can only be a power 11749 // of 2, starting from 8 bits, up to 128. 11750 assert(llvm::all_of(Sizes, 11751 [](unsigned Size) { 11752 return Size == 8 || Size == 16 || Size == 32 || 11753 Size == 64 || Size == 128; 11754 }) && 11755 "Invalid size"); 11756 11757 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11758 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11759 OutputBecomesInput); 11760 } 11761 11762 /// Mangle the parameter part of the vector function name according to 11763 /// their OpenMP classification. The mangling function is defined in 11764 /// section 3.5 of the AAVFABI. 11765 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11766 SmallString<256> Buffer; 11767 llvm::raw_svector_ostream Out(Buffer); 11768 for (const auto &ParamAttr : ParamAttrs) { 11769 switch (ParamAttr.Kind) { 11770 case LinearWithVarStride: 11771 Out << "ls" << ParamAttr.StrideOrArg; 11772 break; 11773 case Linear: 11774 Out << 'l'; 11775 // Don't print the step value if it is not present or if it is 11776 // equal to 1. 11777 if (ParamAttr.StrideOrArg != 1) 11778 Out << ParamAttr.StrideOrArg; 11779 break; 11780 case Uniform: 11781 Out << 'u'; 11782 break; 11783 case Vector: 11784 Out << 'v'; 11785 break; 11786 } 11787 11788 if (!!ParamAttr.Alignment) 11789 Out << 'a' << ParamAttr.Alignment; 11790 } 11791 11792 return std::string(Out.str()); 11793 } 11794 11795 // Function used to add the attribute. The parameter `VLEN` is 11796 // templated to allow the use of "x" when targeting scalable functions 11797 // for SVE. 11798 template <typename T> 11799 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11800 char ISA, StringRef ParSeq, 11801 StringRef MangledName, bool OutputBecomesInput, 11802 llvm::Function *Fn) { 11803 SmallString<256> Buffer; 11804 llvm::raw_svector_ostream Out(Buffer); 11805 Out << Prefix << ISA << LMask << VLEN; 11806 if (OutputBecomesInput) 11807 Out << "v"; 11808 Out << ParSeq << "_" << MangledName; 11809 Fn->addFnAttr(Out.str()); 11810 } 11811 11812 // Helper function to generate the Advanced SIMD names depending on 11813 // the value of the NDS when simdlen is not present. 11814 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11815 StringRef Prefix, char ISA, 11816 StringRef ParSeq, StringRef MangledName, 11817 bool OutputBecomesInput, 11818 llvm::Function *Fn) { 11819 switch (NDS) { 11820 case 8: 11821 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11822 OutputBecomesInput, Fn); 11823 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11824 OutputBecomesInput, Fn); 11825 break; 11826 case 16: 11827 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11828 OutputBecomesInput, Fn); 11829 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11830 OutputBecomesInput, Fn); 11831 break; 11832 case 32: 11833 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11834 OutputBecomesInput, Fn); 11835 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11836 OutputBecomesInput, Fn); 11837 break; 11838 case 64: 11839 case 128: 11840 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11841 OutputBecomesInput, Fn); 11842 break; 11843 default: 11844 llvm_unreachable("Scalar type is too wide."); 11845 } 11846 } 11847 11848 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11849 static void emitAArch64DeclareSimdFunction( 11850 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11851 ArrayRef<ParamAttrTy> ParamAttrs, 11852 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11853 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11854 11855 // Get basic data for building the vector signature. 11856 const auto Data = getNDSWDS(FD, ParamAttrs); 11857 const unsigned NDS = std::get<0>(Data); 11858 const unsigned WDS = std::get<1>(Data); 11859 const bool OutputBecomesInput = std::get<2>(Data); 11860 11861 // Check the values provided via `simdlen` by the user. 11862 // 1. A `simdlen(1)` doesn't produce vector signatures, 11863 if (UserVLEN == 1) { 11864 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11865 DiagnosticsEngine::Warning, 11866 "The clause simdlen(1) has no effect when targeting aarch64."); 11867 CGM.getDiags().Report(SLoc, DiagID); 11868 return; 11869 } 11870 11871 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11872 // Advanced SIMD output. 11873 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11874 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11875 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11876 "power of 2 when targeting Advanced SIMD."); 11877 CGM.getDiags().Report(SLoc, DiagID); 11878 return; 11879 } 11880 11881 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11882 // limits. 11883 if (ISA == 's' && UserVLEN != 0) { 11884 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11885 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11886 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11887 "lanes in the architectural constraints " 11888 "for SVE (min is 128-bit, max is " 11889 "2048-bit, by steps of 128-bit)"); 11890 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11891 return; 11892 } 11893 } 11894 11895 // Sort out parameter sequence. 11896 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11897 StringRef Prefix = "_ZGV"; 11898 // Generate simdlen from user input (if any). 11899 if (UserVLEN) { 11900 if (ISA == 's') { 11901 // SVE generates only a masked function. 11902 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11903 OutputBecomesInput, Fn); 11904 } else { 11905 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11906 // Advanced SIMD generates one or two functions, depending on 11907 // the `[not]inbranch` clause. 11908 switch (State) { 11909 case OMPDeclareSimdDeclAttr::BS_Undefined: 11910 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11911 OutputBecomesInput, Fn); 11912 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11913 OutputBecomesInput, Fn); 11914 break; 11915 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11916 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11917 OutputBecomesInput, Fn); 11918 break; 11919 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11920 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11921 OutputBecomesInput, Fn); 11922 break; 11923 } 11924 } 11925 } else { 11926 // If no user simdlen is provided, follow the AAVFABI rules for 11927 // generating the vector length. 11928 if (ISA == 's') { 11929 // SVE, section 3.4.1, item 1. 11930 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11931 OutputBecomesInput, Fn); 11932 } else { 11933 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11934 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11935 // two vector names depending on the use of the clause 11936 // `[not]inbranch`. 11937 switch (State) { 11938 case OMPDeclareSimdDeclAttr::BS_Undefined: 11939 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11940 OutputBecomesInput, Fn); 11941 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11942 OutputBecomesInput, Fn); 11943 break; 11944 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11945 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11946 OutputBecomesInput, Fn); 11947 break; 11948 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11949 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11950 OutputBecomesInput, Fn); 11951 break; 11952 } 11953 } 11954 } 11955 } 11956 11957 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11958 llvm::Function *Fn) { 11959 ASTContext &C = CGM.getContext(); 11960 FD = FD->getMostRecentDecl(); 11961 // Map params to their positions in function decl. 11962 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11963 if (isa<CXXMethodDecl>(FD)) 11964 ParamPositions.try_emplace(FD, 0); 11965 unsigned ParamPos = ParamPositions.size(); 11966 for (const ParmVarDecl *P : FD->parameters()) { 11967 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11968 ++ParamPos; 11969 } 11970 while (FD) { 11971 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11972 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11973 // Mark uniform parameters. 11974 for (const Expr *E : Attr->uniforms()) { 11975 E = E->IgnoreParenImpCasts(); 11976 unsigned Pos; 11977 if (isa<CXXThisExpr>(E)) { 11978 Pos = ParamPositions[FD]; 11979 } else { 11980 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11981 ->getCanonicalDecl(); 11982 Pos = ParamPositions[PVD]; 11983 } 11984 ParamAttrs[Pos].Kind = Uniform; 11985 } 11986 // Get alignment info. 11987 auto NI = Attr->alignments_begin(); 11988 for (const Expr *E : Attr->aligneds()) { 11989 E = E->IgnoreParenImpCasts(); 11990 unsigned Pos; 11991 QualType ParmTy; 11992 if (isa<CXXThisExpr>(E)) { 11993 Pos = ParamPositions[FD]; 11994 ParmTy = E->getType(); 11995 } else { 11996 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11997 ->getCanonicalDecl(); 11998 Pos = ParamPositions[PVD]; 11999 ParmTy = PVD->getType(); 12000 } 12001 ParamAttrs[Pos].Alignment = 12002 (*NI) 12003 ? (*NI)->EvaluateKnownConstInt(C) 12004 : llvm::APSInt::getUnsigned( 12005 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 12006 .getQuantity()); 12007 ++NI; 12008 } 12009 // Mark linear parameters. 12010 auto SI = Attr->steps_begin(); 12011 auto MI = Attr->modifiers_begin(); 12012 for (const Expr *E : Attr->linears()) { 12013 E = E->IgnoreParenImpCasts(); 12014 unsigned Pos; 12015 // Rescaling factor needed to compute the linear parameter 12016 // value in the mangled name. 12017 unsigned PtrRescalingFactor = 1; 12018 if (isa<CXXThisExpr>(E)) { 12019 Pos = ParamPositions[FD]; 12020 } else { 12021 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12022 ->getCanonicalDecl(); 12023 Pos = ParamPositions[PVD]; 12024 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 12025 PtrRescalingFactor = CGM.getContext() 12026 .getTypeSizeInChars(P->getPointeeType()) 12027 .getQuantity(); 12028 } 12029 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12030 ParamAttr.Kind = Linear; 12031 // Assuming a stride of 1, for `linear` without modifiers. 12032 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12033 if (*SI) { 12034 Expr::EvalResult Result; 12035 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12036 if (const auto *DRE = 12037 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12038 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 12039 ParamAttr.Kind = LinearWithVarStride; 12040 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 12041 ParamPositions[StridePVD->getCanonicalDecl()]); 12042 } 12043 } 12044 } else { 12045 ParamAttr.StrideOrArg = Result.Val.getInt(); 12046 } 12047 } 12048 // If we are using a linear clause on a pointer, we need to 12049 // rescale the value of linear_step with the byte size of the 12050 // pointee type. 12051 if (Linear == ParamAttr.Kind) 12052 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12053 ++SI; 12054 ++MI; 12055 } 12056 llvm::APSInt VLENVal; 12057 SourceLocation ExprLoc; 12058 const Expr *VLENExpr = Attr->getSimdlen(); 12059 if (VLENExpr) { 12060 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12061 ExprLoc = VLENExpr->getExprLoc(); 12062 } 12063 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12064 if (CGM.getTriple().isX86()) { 12065 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12066 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12067 unsigned VLEN = VLENVal.getExtValue(); 12068 StringRef MangledName = Fn->getName(); 12069 if (CGM.getTarget().hasFeature("sve")) 12070 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12071 MangledName, 's', 128, Fn, ExprLoc); 12072 if (CGM.getTarget().hasFeature("neon")) 12073 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12074 MangledName, 'n', 128, Fn, ExprLoc); 12075 } 12076 } 12077 FD = FD->getPreviousDecl(); 12078 } 12079 } 12080 12081 namespace { 12082 /// Cleanup action for doacross support. 12083 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12084 public: 12085 static const int DoacrossFinArgs = 2; 12086 12087 private: 12088 llvm::FunctionCallee RTLFn; 12089 llvm::Value *Args[DoacrossFinArgs]; 12090 12091 public: 12092 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12093 ArrayRef<llvm::Value *> CallArgs) 12094 : RTLFn(RTLFn) { 12095 assert(CallArgs.size() == DoacrossFinArgs); 12096 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12097 } 12098 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12099 if (!CGF.HaveInsertPoint()) 12100 return; 12101 CGF.EmitRuntimeCall(RTLFn, Args); 12102 } 12103 }; 12104 } // namespace 12105 12106 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12107 const OMPLoopDirective &D, 12108 ArrayRef<Expr *> NumIterations) { 12109 if (!CGF.HaveInsertPoint()) 12110 return; 12111 12112 ASTContext &C = CGM.getContext(); 12113 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12114 RecordDecl *RD; 12115 if (KmpDimTy.isNull()) { 12116 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12117 // kmp_int64 lo; // lower 12118 // kmp_int64 up; // upper 12119 // kmp_int64 st; // stride 12120 // }; 12121 RD = C.buildImplicitRecord("kmp_dim"); 12122 RD->startDefinition(); 12123 addFieldToRecordDecl(C, RD, Int64Ty); 12124 addFieldToRecordDecl(C, RD, Int64Ty); 12125 addFieldToRecordDecl(C, RD, Int64Ty); 12126 RD->completeDefinition(); 12127 KmpDimTy = C.getRecordType(RD); 12128 } else { 12129 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12130 } 12131 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12132 QualType ArrayTy = 12133 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12134 12135 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12136 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12137 enum { LowerFD = 0, UpperFD, StrideFD }; 12138 // Fill dims with data. 12139 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12140 LValue DimsLVal = CGF.MakeAddrLValue( 12141 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12142 // dims.upper = num_iterations; 12143 LValue UpperLVal = CGF.EmitLValueForField( 12144 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12145 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12146 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12147 Int64Ty, NumIterations[I]->getExprLoc()); 12148 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12149 // dims.stride = 1; 12150 LValue StrideLVal = CGF.EmitLValueForField( 12151 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12152 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12153 StrideLVal); 12154 } 12155 12156 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12157 // kmp_int32 num_dims, struct kmp_dim * dims); 12158 llvm::Value *Args[] = { 12159 emitUpdateLocation(CGF, D.getBeginLoc()), 12160 getThreadID(CGF, D.getBeginLoc()), 12161 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12162 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12163 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12164 CGM.VoidPtrTy)}; 12165 12166 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12167 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12168 CGF.EmitRuntimeCall(RTLFn, Args); 12169 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12170 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12171 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12172 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12173 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12174 llvm::makeArrayRef(FiniArgs)); 12175 } 12176 12177 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12178 const OMPDependClause *C) { 12179 QualType Int64Ty = 12180 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12181 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12182 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12183 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12184 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12185 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12186 const Expr *CounterVal = C->getLoopData(I); 12187 assert(CounterVal); 12188 llvm::Value *CntVal = CGF.EmitScalarConversion( 12189 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12190 CounterVal->getExprLoc()); 12191 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12192 /*Volatile=*/false, Int64Ty); 12193 } 12194 llvm::Value *Args[] = { 12195 emitUpdateLocation(CGF, C->getBeginLoc()), 12196 getThreadID(CGF, C->getBeginLoc()), 12197 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12198 llvm::FunctionCallee RTLFn; 12199 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12200 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12201 OMPRTL___kmpc_doacross_post); 12202 } else { 12203 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12204 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12205 OMPRTL___kmpc_doacross_wait); 12206 } 12207 CGF.EmitRuntimeCall(RTLFn, Args); 12208 } 12209 12210 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12211 llvm::FunctionCallee Callee, 12212 ArrayRef<llvm::Value *> Args) const { 12213 assert(Loc.isValid() && "Outlined function call location must be valid."); 12214 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12215 12216 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12217 if (Fn->doesNotThrow()) { 12218 CGF.EmitNounwindRuntimeCall(Fn, Args); 12219 return; 12220 } 12221 } 12222 CGF.EmitRuntimeCall(Callee, Args); 12223 } 12224 12225 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12226 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12227 ArrayRef<llvm::Value *> Args) const { 12228 emitCall(CGF, Loc, OutlinedFn, Args); 12229 } 12230 12231 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12232 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12233 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12234 HasEmittedDeclareTargetRegion = true; 12235 } 12236 12237 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12238 const VarDecl *NativeParam, 12239 const VarDecl *TargetParam) const { 12240 return CGF.GetAddrOfLocalVar(NativeParam); 12241 } 12242 12243 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12244 const VarDecl *VD) { 12245 if (!VD) 12246 return Address::invalid(); 12247 Address UntiedAddr = Address::invalid(); 12248 Address UntiedRealAddr = Address::invalid(); 12249 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12250 if (It != FunctionToUntiedTaskStackMap.end()) { 12251 const UntiedLocalVarsAddressesMap &UntiedData = 12252 UntiedLocalVarsStack[It->second]; 12253 auto I = UntiedData.find(VD); 12254 if (I != UntiedData.end()) { 12255 UntiedAddr = I->second.first; 12256 UntiedRealAddr = I->second.second; 12257 } 12258 } 12259 const VarDecl *CVD = VD->getCanonicalDecl(); 12260 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12261 // Use the default allocation. 12262 if (!isAllocatableDecl(VD)) 12263 return UntiedAddr; 12264 llvm::Value *Size; 12265 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12266 if (CVD->getType()->isVariablyModifiedType()) { 12267 Size = CGF.getTypeSize(CVD->getType()); 12268 // Align the size: ((size + align - 1) / align) * align 12269 Size = CGF.Builder.CreateNUWAdd( 12270 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12271 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12272 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12273 } else { 12274 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12275 Size = CGM.getSize(Sz.alignTo(Align)); 12276 } 12277 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12278 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12279 assert(AA->getAllocator() && 12280 "Expected allocator expression for non-default allocator."); 12281 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 12282 // According to the standard, the original allocator type is a enum 12283 // (integer). Convert to pointer type, if required. 12284 Allocator = CGF.EmitScalarConversion( 12285 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 12286 AA->getAllocator()->getExprLoc()); 12287 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 12288 12289 llvm::Value *Addr = 12290 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 12291 CGM.getModule(), OMPRTL___kmpc_alloc), 12292 Args, getName({CVD->getName(), ".void.addr"})); 12293 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12294 CGM.getModule(), OMPRTL___kmpc_free); 12295 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12296 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12297 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12298 if (UntiedAddr.isValid()) 12299 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12300 12301 // Cleanup action for allocate support. 12302 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12303 llvm::FunctionCallee RTLFn; 12304 SourceLocation::UIntTy LocEncoding; 12305 Address Addr; 12306 const Expr *Allocator; 12307 12308 public: 12309 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12310 SourceLocation::UIntTy LocEncoding, Address Addr, 12311 const Expr *Allocator) 12312 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12313 Allocator(Allocator) {} 12314 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12315 if (!CGF.HaveInsertPoint()) 12316 return; 12317 llvm::Value *Args[3]; 12318 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12319 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12320 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12321 Addr.getPointer(), CGF.VoidPtrTy); 12322 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 12323 // According to the standard, the original allocator type is a enum 12324 // (integer). Convert to pointer type, if required. 12325 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12326 CGF.getContext().VoidPtrTy, 12327 Allocator->getExprLoc()); 12328 Args[2] = AllocVal; 12329 12330 CGF.EmitRuntimeCall(RTLFn, Args); 12331 } 12332 }; 12333 Address VDAddr = 12334 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12335 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12336 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12337 VDAddr, AA->getAllocator()); 12338 if (UntiedRealAddr.isValid()) 12339 if (auto *Region = 12340 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12341 Region->emitUntiedSwitch(CGF); 12342 return VDAddr; 12343 } 12344 return UntiedAddr; 12345 } 12346 12347 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12348 const VarDecl *VD) const { 12349 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12350 if (It == FunctionToUntiedTaskStackMap.end()) 12351 return false; 12352 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12353 } 12354 12355 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12356 CodeGenModule &CGM, const OMPLoopDirective &S) 12357 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12358 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12359 if (!NeedToPush) 12360 return; 12361 NontemporalDeclsSet &DS = 12362 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12363 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12364 for (const Stmt *Ref : C->private_refs()) { 12365 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12366 const ValueDecl *VD; 12367 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12368 VD = DRE->getDecl(); 12369 } else { 12370 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12371 assert((ME->isImplicitCXXThis() || 12372 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12373 "Expected member of current class."); 12374 VD = ME->getMemberDecl(); 12375 } 12376 DS.insert(VD); 12377 } 12378 } 12379 } 12380 12381 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12382 if (!NeedToPush) 12383 return; 12384 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12385 } 12386 12387 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12388 CodeGenFunction &CGF, 12389 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12390 std::pair<Address, Address>> &LocalVars) 12391 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12392 if (!NeedToPush) 12393 return; 12394 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12395 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12396 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12397 } 12398 12399 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12400 if (!NeedToPush) 12401 return; 12402 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12403 } 12404 12405 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12406 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12407 12408 return llvm::any_of( 12409 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12410 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12411 } 12412 12413 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12414 const OMPExecutableDirective &S, 12415 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12416 const { 12417 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12418 // Vars in target/task regions must be excluded completely. 12419 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12420 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12421 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12422 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12423 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12424 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12425 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12426 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12427 } 12428 } 12429 // Exclude vars in private clauses. 12430 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12431 for (const Expr *Ref : C->varlists()) { 12432 if (!Ref->getType()->isScalarType()) 12433 continue; 12434 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12435 if (!DRE) 12436 continue; 12437 NeedToCheckForLPCs.insert(DRE->getDecl()); 12438 } 12439 } 12440 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12441 for (const Expr *Ref : C->varlists()) { 12442 if (!Ref->getType()->isScalarType()) 12443 continue; 12444 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12445 if (!DRE) 12446 continue; 12447 NeedToCheckForLPCs.insert(DRE->getDecl()); 12448 } 12449 } 12450 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12451 for (const Expr *Ref : C->varlists()) { 12452 if (!Ref->getType()->isScalarType()) 12453 continue; 12454 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12455 if (!DRE) 12456 continue; 12457 NeedToCheckForLPCs.insert(DRE->getDecl()); 12458 } 12459 } 12460 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12461 for (const Expr *Ref : C->varlists()) { 12462 if (!Ref->getType()->isScalarType()) 12463 continue; 12464 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12465 if (!DRE) 12466 continue; 12467 NeedToCheckForLPCs.insert(DRE->getDecl()); 12468 } 12469 } 12470 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12471 for (const Expr *Ref : C->varlists()) { 12472 if (!Ref->getType()->isScalarType()) 12473 continue; 12474 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12475 if (!DRE) 12476 continue; 12477 NeedToCheckForLPCs.insert(DRE->getDecl()); 12478 } 12479 } 12480 for (const Decl *VD : NeedToCheckForLPCs) { 12481 for (const LastprivateConditionalData &Data : 12482 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12483 if (Data.DeclToUniqueName.count(VD) > 0) { 12484 if (!Data.Disabled) 12485 NeedToAddForLPCsAsDisabled.insert(VD); 12486 break; 12487 } 12488 } 12489 } 12490 } 12491 12492 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12493 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12494 : CGM(CGF.CGM), 12495 Action((CGM.getLangOpts().OpenMP >= 50 && 12496 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12497 [](const OMPLastprivateClause *C) { 12498 return C->getKind() == 12499 OMPC_LASTPRIVATE_conditional; 12500 })) 12501 ? ActionToDo::PushAsLastprivateConditional 12502 : ActionToDo::DoNotPush) { 12503 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12504 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12505 return; 12506 assert(Action == ActionToDo::PushAsLastprivateConditional && 12507 "Expected a push action."); 12508 LastprivateConditionalData &Data = 12509 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12510 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12511 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12512 continue; 12513 12514 for (const Expr *Ref : C->varlists()) { 12515 Data.DeclToUniqueName.insert(std::make_pair( 12516 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12517 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12518 } 12519 } 12520 Data.IVLVal = IVLVal; 12521 Data.Fn = CGF.CurFn; 12522 } 12523 12524 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12525 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12526 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12527 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12528 if (CGM.getLangOpts().OpenMP < 50) 12529 return; 12530 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12531 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12532 if (!NeedToAddForLPCsAsDisabled.empty()) { 12533 Action = ActionToDo::DisableLastprivateConditional; 12534 LastprivateConditionalData &Data = 12535 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12536 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12537 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12538 Data.Fn = CGF.CurFn; 12539 Data.Disabled = true; 12540 } 12541 } 12542 12543 CGOpenMPRuntime::LastprivateConditionalRAII 12544 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12545 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12546 return LastprivateConditionalRAII(CGF, S); 12547 } 12548 12549 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12550 if (CGM.getLangOpts().OpenMP < 50) 12551 return; 12552 if (Action == ActionToDo::DisableLastprivateConditional) { 12553 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12554 "Expected list of disabled private vars."); 12555 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12556 } 12557 if (Action == ActionToDo::PushAsLastprivateConditional) { 12558 assert( 12559 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12560 "Expected list of lastprivate conditional vars."); 12561 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12562 } 12563 } 12564 12565 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12566 const VarDecl *VD) { 12567 ASTContext &C = CGM.getContext(); 12568 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12569 if (I == LastprivateConditionalToTypes.end()) 12570 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12571 QualType NewType; 12572 const FieldDecl *VDField; 12573 const FieldDecl *FiredField; 12574 LValue BaseLVal; 12575 auto VI = I->getSecond().find(VD); 12576 if (VI == I->getSecond().end()) { 12577 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12578 RD->startDefinition(); 12579 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12580 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12581 RD->completeDefinition(); 12582 NewType = C.getRecordType(RD); 12583 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12584 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12585 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12586 } else { 12587 NewType = std::get<0>(VI->getSecond()); 12588 VDField = std::get<1>(VI->getSecond()); 12589 FiredField = std::get<2>(VI->getSecond()); 12590 BaseLVal = std::get<3>(VI->getSecond()); 12591 } 12592 LValue FiredLVal = 12593 CGF.EmitLValueForField(BaseLVal, FiredField); 12594 CGF.EmitStoreOfScalar( 12595 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12596 FiredLVal); 12597 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12598 } 12599 12600 namespace { 12601 /// Checks if the lastprivate conditional variable is referenced in LHS. 12602 class LastprivateConditionalRefChecker final 12603 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12604 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12605 const Expr *FoundE = nullptr; 12606 const Decl *FoundD = nullptr; 12607 StringRef UniqueDeclName; 12608 LValue IVLVal; 12609 llvm::Function *FoundFn = nullptr; 12610 SourceLocation Loc; 12611 12612 public: 12613 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12614 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12615 llvm::reverse(LPM)) { 12616 auto It = D.DeclToUniqueName.find(E->getDecl()); 12617 if (It == D.DeclToUniqueName.end()) 12618 continue; 12619 if (D.Disabled) 12620 return false; 12621 FoundE = E; 12622 FoundD = E->getDecl()->getCanonicalDecl(); 12623 UniqueDeclName = It->second; 12624 IVLVal = D.IVLVal; 12625 FoundFn = D.Fn; 12626 break; 12627 } 12628 return FoundE == E; 12629 } 12630 bool VisitMemberExpr(const MemberExpr *E) { 12631 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12632 return false; 12633 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12634 llvm::reverse(LPM)) { 12635 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12636 if (It == D.DeclToUniqueName.end()) 12637 continue; 12638 if (D.Disabled) 12639 return false; 12640 FoundE = E; 12641 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12642 UniqueDeclName = It->second; 12643 IVLVal = D.IVLVal; 12644 FoundFn = D.Fn; 12645 break; 12646 } 12647 return FoundE == E; 12648 } 12649 bool VisitStmt(const Stmt *S) { 12650 for (const Stmt *Child : S->children()) { 12651 if (!Child) 12652 continue; 12653 if (const auto *E = dyn_cast<Expr>(Child)) 12654 if (!E->isGLValue()) 12655 continue; 12656 if (Visit(Child)) 12657 return true; 12658 } 12659 return false; 12660 } 12661 explicit LastprivateConditionalRefChecker( 12662 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12663 : LPM(LPM) {} 12664 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12665 getFoundData() const { 12666 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12667 } 12668 }; 12669 } // namespace 12670 12671 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12672 LValue IVLVal, 12673 StringRef UniqueDeclName, 12674 LValue LVal, 12675 SourceLocation Loc) { 12676 // Last updated loop counter for the lastprivate conditional var. 12677 // int<xx> last_iv = 0; 12678 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12679 llvm::Constant *LastIV = 12680 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12681 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12682 IVLVal.getAlignment().getAsAlign()); 12683 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12684 12685 // Last value of the lastprivate conditional. 12686 // decltype(priv_a) last_a; 12687 llvm::Constant *Last = getOrCreateInternalVariable( 12688 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12689 cast<llvm::GlobalVariable>(Last)->setAlignment( 12690 LVal.getAlignment().getAsAlign()); 12691 LValue LastLVal = 12692 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12693 12694 // Global loop counter. Required to handle inner parallel-for regions. 12695 // iv 12696 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12697 12698 // #pragma omp critical(a) 12699 // if (last_iv <= iv) { 12700 // last_iv = iv; 12701 // last_a = priv_a; 12702 // } 12703 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12704 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12705 Action.Enter(CGF); 12706 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12707 // (last_iv <= iv) ? Check if the variable is updated and store new 12708 // value in global var. 12709 llvm::Value *CmpRes; 12710 if (IVLVal.getType()->isSignedIntegerType()) { 12711 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12712 } else { 12713 assert(IVLVal.getType()->isUnsignedIntegerType() && 12714 "Loop iteration variable must be integer."); 12715 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12716 } 12717 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12718 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12719 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12720 // { 12721 CGF.EmitBlock(ThenBB); 12722 12723 // last_iv = iv; 12724 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12725 12726 // last_a = priv_a; 12727 switch (CGF.getEvaluationKind(LVal.getType())) { 12728 case TEK_Scalar: { 12729 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12730 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12731 break; 12732 } 12733 case TEK_Complex: { 12734 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12735 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12736 break; 12737 } 12738 case TEK_Aggregate: 12739 llvm_unreachable( 12740 "Aggregates are not supported in lastprivate conditional."); 12741 } 12742 // } 12743 CGF.EmitBranch(ExitBB); 12744 // There is no need to emit line number for unconditional branch. 12745 (void)ApplyDebugLocation::CreateEmpty(CGF); 12746 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12747 }; 12748 12749 if (CGM.getLangOpts().OpenMPSimd) { 12750 // Do not emit as a critical region as no parallel region could be emitted. 12751 RegionCodeGenTy ThenRCG(CodeGen); 12752 ThenRCG(CGF); 12753 } else { 12754 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12755 } 12756 } 12757 12758 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12759 const Expr *LHS) { 12760 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12761 return; 12762 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12763 if (!Checker.Visit(LHS)) 12764 return; 12765 const Expr *FoundE; 12766 const Decl *FoundD; 12767 StringRef UniqueDeclName; 12768 LValue IVLVal; 12769 llvm::Function *FoundFn; 12770 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12771 Checker.getFoundData(); 12772 if (FoundFn != CGF.CurFn) { 12773 // Special codegen for inner parallel regions. 12774 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12775 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12776 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12777 "Lastprivate conditional is not found in outer region."); 12778 QualType StructTy = std::get<0>(It->getSecond()); 12779 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12780 LValue PrivLVal = CGF.EmitLValue(FoundE); 12781 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12782 PrivLVal.getAddress(CGF), 12783 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12784 LValue BaseLVal = 12785 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12786 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12787 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12788 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12789 FiredLVal, llvm::AtomicOrdering::Unordered, 12790 /*IsVolatile=*/true, /*isInit=*/false); 12791 return; 12792 } 12793 12794 // Private address of the lastprivate conditional in the current context. 12795 // priv_a 12796 LValue LVal = CGF.EmitLValue(FoundE); 12797 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12798 FoundE->getExprLoc()); 12799 } 12800 12801 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12802 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12803 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12804 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12805 return; 12806 auto Range = llvm::reverse(LastprivateConditionalStack); 12807 auto It = llvm::find_if( 12808 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12809 if (It == Range.end() || It->Fn != CGF.CurFn) 12810 return; 12811 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12812 assert(LPCI != LastprivateConditionalToTypes.end() && 12813 "Lastprivates must be registered already."); 12814 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12815 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12816 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12817 for (const auto &Pair : It->DeclToUniqueName) { 12818 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12819 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12820 continue; 12821 auto I = LPCI->getSecond().find(Pair.first); 12822 assert(I != LPCI->getSecond().end() && 12823 "Lastprivate must be rehistered already."); 12824 // bool Cmp = priv_a.Fired != 0; 12825 LValue BaseLVal = std::get<3>(I->getSecond()); 12826 LValue FiredLVal = 12827 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12828 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12829 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12830 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12831 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12832 // if (Cmp) { 12833 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12834 CGF.EmitBlock(ThenBB); 12835 Address Addr = CGF.GetAddrOfLocalVar(VD); 12836 LValue LVal; 12837 if (VD->getType()->isReferenceType()) 12838 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12839 AlignmentSource::Decl); 12840 else 12841 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12842 AlignmentSource::Decl); 12843 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12844 D.getBeginLoc()); 12845 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12846 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12847 // } 12848 } 12849 } 12850 12851 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12852 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12853 SourceLocation Loc) { 12854 if (CGF.getLangOpts().OpenMP < 50) 12855 return; 12856 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12857 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12858 "Unknown lastprivate conditional variable."); 12859 StringRef UniqueName = It->second; 12860 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12861 // The variable was not updated in the region - exit. 12862 if (!GV) 12863 return; 12864 LValue LPLVal = CGF.MakeAddrLValue( 12865 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12866 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12867 CGF.EmitStoreOfScalar(Res, PrivLVal); 12868 } 12869 12870 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12871 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12872 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12873 llvm_unreachable("Not supported in SIMD-only mode"); 12874 } 12875 12876 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12877 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12878 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12879 llvm_unreachable("Not supported in SIMD-only mode"); 12880 } 12881 12882 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12883 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12884 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12885 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12886 bool Tied, unsigned &NumberOfParts) { 12887 llvm_unreachable("Not supported in SIMD-only mode"); 12888 } 12889 12890 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12891 SourceLocation Loc, 12892 llvm::Function *OutlinedFn, 12893 ArrayRef<llvm::Value *> CapturedVars, 12894 const Expr *IfCond, 12895 llvm::Value *NumThreads) { 12896 llvm_unreachable("Not supported in SIMD-only mode"); 12897 } 12898 12899 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12900 CodeGenFunction &CGF, StringRef CriticalName, 12901 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12902 const Expr *Hint) { 12903 llvm_unreachable("Not supported in SIMD-only mode"); 12904 } 12905 12906 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12907 const RegionCodeGenTy &MasterOpGen, 12908 SourceLocation Loc) { 12909 llvm_unreachable("Not supported in SIMD-only mode"); 12910 } 12911 12912 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12913 const RegionCodeGenTy &MasterOpGen, 12914 SourceLocation Loc, 12915 const Expr *Filter) { 12916 llvm_unreachable("Not supported in SIMD-only mode"); 12917 } 12918 12919 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12920 SourceLocation Loc) { 12921 llvm_unreachable("Not supported in SIMD-only mode"); 12922 } 12923 12924 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12925 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12926 SourceLocation Loc) { 12927 llvm_unreachable("Not supported in SIMD-only mode"); 12928 } 12929 12930 void CGOpenMPSIMDRuntime::emitSingleRegion( 12931 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12932 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12933 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12934 ArrayRef<const Expr *> AssignmentOps) { 12935 llvm_unreachable("Not supported in SIMD-only mode"); 12936 } 12937 12938 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12939 const RegionCodeGenTy &OrderedOpGen, 12940 SourceLocation Loc, 12941 bool IsThreads) { 12942 llvm_unreachable("Not supported in SIMD-only mode"); 12943 } 12944 12945 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12946 SourceLocation Loc, 12947 OpenMPDirectiveKind Kind, 12948 bool EmitChecks, 12949 bool ForceSimpleCall) { 12950 llvm_unreachable("Not supported in SIMD-only mode"); 12951 } 12952 12953 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12954 CodeGenFunction &CGF, SourceLocation Loc, 12955 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12956 bool Ordered, const DispatchRTInput &DispatchValues) { 12957 llvm_unreachable("Not supported in SIMD-only mode"); 12958 } 12959 12960 void CGOpenMPSIMDRuntime::emitForStaticInit( 12961 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12962 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12963 llvm_unreachable("Not supported in SIMD-only mode"); 12964 } 12965 12966 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12967 CodeGenFunction &CGF, SourceLocation Loc, 12968 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12969 llvm_unreachable("Not supported in SIMD-only mode"); 12970 } 12971 12972 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12973 SourceLocation Loc, 12974 unsigned IVSize, 12975 bool IVSigned) { 12976 llvm_unreachable("Not supported in SIMD-only mode"); 12977 } 12978 12979 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12980 SourceLocation Loc, 12981 OpenMPDirectiveKind DKind) { 12982 llvm_unreachable("Not supported in SIMD-only mode"); 12983 } 12984 12985 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12986 SourceLocation Loc, 12987 unsigned IVSize, bool IVSigned, 12988 Address IL, Address LB, 12989 Address UB, Address ST) { 12990 llvm_unreachable("Not supported in SIMD-only mode"); 12991 } 12992 12993 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12994 llvm::Value *NumThreads, 12995 SourceLocation Loc) { 12996 llvm_unreachable("Not supported in SIMD-only mode"); 12997 } 12998 12999 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 13000 ProcBindKind ProcBind, 13001 SourceLocation Loc) { 13002 llvm_unreachable("Not supported in SIMD-only mode"); 13003 } 13004 13005 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 13006 const VarDecl *VD, 13007 Address VDAddr, 13008 SourceLocation Loc) { 13009 llvm_unreachable("Not supported in SIMD-only mode"); 13010 } 13011 13012 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 13013 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 13014 CodeGenFunction *CGF) { 13015 llvm_unreachable("Not supported in SIMD-only mode"); 13016 } 13017 13018 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 13019 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 13020 llvm_unreachable("Not supported in SIMD-only mode"); 13021 } 13022 13023 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13024 ArrayRef<const Expr *> Vars, 13025 SourceLocation Loc, 13026 llvm::AtomicOrdering AO) { 13027 llvm_unreachable("Not supported in SIMD-only mode"); 13028 } 13029 13030 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13031 const OMPExecutableDirective &D, 13032 llvm::Function *TaskFunction, 13033 QualType SharedsTy, Address Shareds, 13034 const Expr *IfCond, 13035 const OMPTaskDataTy &Data) { 13036 llvm_unreachable("Not supported in SIMD-only mode"); 13037 } 13038 13039 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13040 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13041 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13042 const Expr *IfCond, const OMPTaskDataTy &Data) { 13043 llvm_unreachable("Not supported in SIMD-only mode"); 13044 } 13045 13046 void CGOpenMPSIMDRuntime::emitReduction( 13047 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13048 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13049 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13050 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13051 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13052 ReductionOps, Options); 13053 } 13054 13055 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13056 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13057 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13058 llvm_unreachable("Not supported in SIMD-only mode"); 13059 } 13060 13061 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13062 SourceLocation Loc, 13063 bool IsWorksharingReduction) { 13064 llvm_unreachable("Not supported in SIMD-only mode"); 13065 } 13066 13067 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13068 SourceLocation Loc, 13069 ReductionCodeGen &RCG, 13070 unsigned N) { 13071 llvm_unreachable("Not supported in SIMD-only mode"); 13072 } 13073 13074 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13075 SourceLocation Loc, 13076 llvm::Value *ReductionsPtr, 13077 LValue SharedLVal) { 13078 llvm_unreachable("Not supported in SIMD-only mode"); 13079 } 13080 13081 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13082 SourceLocation Loc, 13083 const OMPTaskDataTy &Data) { 13084 llvm_unreachable("Not supported in SIMD-only mode"); 13085 } 13086 13087 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13088 CodeGenFunction &CGF, SourceLocation Loc, 13089 OpenMPDirectiveKind CancelRegion) { 13090 llvm_unreachable("Not supported in SIMD-only mode"); 13091 } 13092 13093 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13094 SourceLocation Loc, const Expr *IfCond, 13095 OpenMPDirectiveKind CancelRegion) { 13096 llvm_unreachable("Not supported in SIMD-only mode"); 13097 } 13098 13099 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13100 const OMPExecutableDirective &D, StringRef ParentName, 13101 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13102 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13103 llvm_unreachable("Not supported in SIMD-only mode"); 13104 } 13105 13106 void CGOpenMPSIMDRuntime::emitTargetCall( 13107 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13108 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13109 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13110 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13111 const OMPLoopDirective &D)> 13112 SizeEmitter) { 13113 llvm_unreachable("Not supported in SIMD-only mode"); 13114 } 13115 13116 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13117 llvm_unreachable("Not supported in SIMD-only mode"); 13118 } 13119 13120 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13121 llvm_unreachable("Not supported in SIMD-only mode"); 13122 } 13123 13124 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13125 return false; 13126 } 13127 13128 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13129 const OMPExecutableDirective &D, 13130 SourceLocation Loc, 13131 llvm::Function *OutlinedFn, 13132 ArrayRef<llvm::Value *> CapturedVars) { 13133 llvm_unreachable("Not supported in SIMD-only mode"); 13134 } 13135 13136 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13137 const Expr *NumTeams, 13138 const Expr *ThreadLimit, 13139 SourceLocation Loc) { 13140 llvm_unreachable("Not supported in SIMD-only mode"); 13141 } 13142 13143 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13144 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13145 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13146 llvm_unreachable("Not supported in SIMD-only mode"); 13147 } 13148 13149 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13150 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13151 const Expr *Device) { 13152 llvm_unreachable("Not supported in SIMD-only mode"); 13153 } 13154 13155 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13156 const OMPLoopDirective &D, 13157 ArrayRef<Expr *> NumIterations) { 13158 llvm_unreachable("Not supported in SIMD-only mode"); 13159 } 13160 13161 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13162 const OMPDependClause *C) { 13163 llvm_unreachable("Not supported in SIMD-only mode"); 13164 } 13165 13166 const VarDecl * 13167 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13168 const VarDecl *NativeParam) const { 13169 llvm_unreachable("Not supported in SIMD-only mode"); 13170 } 13171 13172 Address 13173 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13174 const VarDecl *NativeParam, 13175 const VarDecl *TargetParam) const { 13176 llvm_unreachable("Not supported in SIMD-only mode"); 13177 } 13178