1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/StringExtras.h" 33 #include "llvm/Bitcode/BitcodeReader.h" 34 #include "llvm/IR/Constants.h" 35 #include "llvm/IR/DerivedTypes.h" 36 #include "llvm/IR/GlobalValue.h" 37 #include "llvm/IR/Value.h" 38 #include "llvm/Support/AtomicOrdering.h" 39 #include "llvm/Support/Format.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <cassert> 42 #include <numeric> 43 44 using namespace clang; 45 using namespace CodeGen; 46 using namespace llvm::omp; 47 48 namespace { 49 /// Base class for handling code generation inside OpenMP regions. 50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 51 public: 52 /// Kinds of OpenMP regions used in codegen. 53 enum CGOpenMPRegionKind { 54 /// Region with outlined function for standalone 'parallel' 55 /// directive. 56 ParallelOutlinedRegion, 57 /// Region with outlined function for standalone 'task' directive. 58 TaskOutlinedRegion, 59 /// Region for constructs that do not require function outlining, 60 /// like 'for', 'sections', 'atomic' etc. directives. 61 InlinedRegion, 62 /// Region with outlined function for standalone 'target' directive. 63 TargetRegion, 64 }; 65 66 CGOpenMPRegionInfo(const CapturedStmt &CS, 67 const CGOpenMPRegionKind RegionKind, 68 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 69 bool HasCancel) 70 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 71 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 72 73 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 75 bool HasCancel) 76 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 77 Kind(Kind), HasCancel(HasCancel) {} 78 79 /// Get a variable or parameter for storing global thread id 80 /// inside OpenMP construct. 81 virtual const VarDecl *getThreadIDVariable() const = 0; 82 83 /// Emit the captured statement body. 84 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 85 86 /// Get an LValue for the current ThreadID variable. 87 /// \return LValue for thread id variable. This LValue always has type int32*. 88 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 89 90 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 91 92 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 93 94 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 95 96 bool hasCancel() const { return HasCancel; } 97 98 static bool classof(const CGCapturedStmtInfo *Info) { 99 return Info->getKind() == CR_OpenMP; 100 } 101 102 ~CGOpenMPRegionInfo() override = default; 103 104 protected: 105 CGOpenMPRegionKind RegionKind; 106 RegionCodeGenTy CodeGen; 107 OpenMPDirectiveKind Kind; 108 bool HasCancel; 109 }; 110 111 /// API for captured statement code generation in OpenMP constructs. 112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 113 public: 114 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 115 const RegionCodeGenTy &CodeGen, 116 OpenMPDirectiveKind Kind, bool HasCancel, 117 StringRef HelperName) 118 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 119 HasCancel), 120 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 121 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 122 } 123 124 /// Get a variable or parameter for storing global thread id 125 /// inside OpenMP construct. 126 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 127 128 /// Get the name of the capture helper. 129 StringRef getHelperName() const override { return HelperName; } 130 131 static bool classof(const CGCapturedStmtInfo *Info) { 132 return CGOpenMPRegionInfo::classof(Info) && 133 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 134 ParallelOutlinedRegion; 135 } 136 137 private: 138 /// A variable or parameter storing global thread id for OpenMP 139 /// constructs. 140 const VarDecl *ThreadIDVar; 141 StringRef HelperName; 142 }; 143 144 /// API for captured statement code generation in OpenMP constructs. 145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 146 public: 147 class UntiedTaskActionTy final : public PrePostActionTy { 148 bool Untied; 149 const VarDecl *PartIDVar; 150 const RegionCodeGenTy UntiedCodeGen; 151 llvm::SwitchInst *UntiedSwitch = nullptr; 152 153 public: 154 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 155 const RegionCodeGenTy &UntiedCodeGen) 156 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 157 void Enter(CodeGenFunction &CGF) override { 158 if (Untied) { 159 // Emit task switching point. 160 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 161 CGF.GetAddrOfLocalVar(PartIDVar), 162 PartIDVar->getType()->castAs<PointerType>()); 163 llvm::Value *Res = 164 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 165 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 166 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 167 CGF.EmitBlock(DoneBB); 168 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 169 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 170 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 171 CGF.Builder.GetInsertBlock()); 172 emitUntiedSwitch(CGF); 173 } 174 } 175 void emitUntiedSwitch(CodeGenFunction &CGF) const { 176 if (Untied) { 177 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 178 CGF.GetAddrOfLocalVar(PartIDVar), 179 PartIDVar->getType()->castAs<PointerType>()); 180 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 181 PartIdLVal); 182 UntiedCodeGen(CGF); 183 CodeGenFunction::JumpDest CurPoint = 184 CGF.getJumpDestInCurrentScope(".untied.next."); 185 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 186 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 187 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 188 CGF.Builder.GetInsertBlock()); 189 CGF.EmitBranchThroughCleanup(CurPoint); 190 CGF.EmitBlock(CurPoint.getBlock()); 191 } 192 } 193 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 194 }; 195 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 196 const VarDecl *ThreadIDVar, 197 const RegionCodeGenTy &CodeGen, 198 OpenMPDirectiveKind Kind, bool HasCancel, 199 const UntiedTaskActionTy &Action) 200 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 201 ThreadIDVar(ThreadIDVar), Action(Action) { 202 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 203 } 204 205 /// Get a variable or parameter for storing global thread id 206 /// inside OpenMP construct. 207 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 208 209 /// Get an LValue for the current ThreadID variable. 210 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 211 212 /// Get the name of the capture helper. 213 StringRef getHelperName() const override { return ".omp_outlined."; } 214 215 void emitUntiedSwitch(CodeGenFunction &CGF) override { 216 Action.emitUntiedSwitch(CGF); 217 } 218 219 static bool classof(const CGCapturedStmtInfo *Info) { 220 return CGOpenMPRegionInfo::classof(Info) && 221 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 222 TaskOutlinedRegion; 223 } 224 225 private: 226 /// A variable or parameter storing global thread id for OpenMP 227 /// constructs. 228 const VarDecl *ThreadIDVar; 229 /// Action for emitting code for untied tasks. 230 const UntiedTaskActionTy &Action; 231 }; 232 233 /// API for inlined captured statement code generation in OpenMP 234 /// constructs. 235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 236 public: 237 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 238 const RegionCodeGenTy &CodeGen, 239 OpenMPDirectiveKind Kind, bool HasCancel) 240 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 241 OldCSI(OldCSI), 242 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 243 244 // Retrieve the value of the context parameter. 245 llvm::Value *getContextValue() const override { 246 if (OuterRegionInfo) 247 return OuterRegionInfo->getContextValue(); 248 llvm_unreachable("No context value for inlined OpenMP region"); 249 } 250 251 void setContextValue(llvm::Value *V) override { 252 if (OuterRegionInfo) { 253 OuterRegionInfo->setContextValue(V); 254 return; 255 } 256 llvm_unreachable("No context value for inlined OpenMP region"); 257 } 258 259 /// Lookup the captured field decl for a variable. 260 const FieldDecl *lookup(const VarDecl *VD) const override { 261 if (OuterRegionInfo) 262 return OuterRegionInfo->lookup(VD); 263 // If there is no outer outlined region,no need to lookup in a list of 264 // captured variables, we can use the original one. 265 return nullptr; 266 } 267 268 FieldDecl *getThisFieldDecl() const override { 269 if (OuterRegionInfo) 270 return OuterRegionInfo->getThisFieldDecl(); 271 return nullptr; 272 } 273 274 /// Get a variable or parameter for storing global thread id 275 /// inside OpenMP construct. 276 const VarDecl *getThreadIDVariable() const override { 277 if (OuterRegionInfo) 278 return OuterRegionInfo->getThreadIDVariable(); 279 return nullptr; 280 } 281 282 /// Get an LValue for the current ThreadID variable. 283 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 284 if (OuterRegionInfo) 285 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 286 llvm_unreachable("No LValue for inlined OpenMP construct"); 287 } 288 289 /// Get the name of the capture helper. 290 StringRef getHelperName() const override { 291 if (auto *OuterRegionInfo = getOldCSI()) 292 return OuterRegionInfo->getHelperName(); 293 llvm_unreachable("No helper name for inlined OpenMP construct"); 294 } 295 296 void emitUntiedSwitch(CodeGenFunction &CGF) override { 297 if (OuterRegionInfo) 298 OuterRegionInfo->emitUntiedSwitch(CGF); 299 } 300 301 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 302 303 static bool classof(const CGCapturedStmtInfo *Info) { 304 return CGOpenMPRegionInfo::classof(Info) && 305 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 306 } 307 308 ~CGOpenMPInlinedRegionInfo() override = default; 309 310 private: 311 /// CodeGen info about outer OpenMP region. 312 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 313 CGOpenMPRegionInfo *OuterRegionInfo; 314 }; 315 316 /// API for captured statement code generation in OpenMP target 317 /// constructs. For this captures, implicit parameters are used instead of the 318 /// captured fields. The name of the target region has to be unique in a given 319 /// application so it is provided by the client, because only the client has 320 /// the information to generate that. 321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 322 public: 323 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 324 const RegionCodeGenTy &CodeGen, StringRef HelperName) 325 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 326 /*HasCancel=*/false), 327 HelperName(HelperName) {} 328 329 /// This is unused for target regions because each starts executing 330 /// with a single thread. 331 const VarDecl *getThreadIDVariable() const override { return nullptr; } 332 333 /// Get the name of the capture helper. 334 StringRef getHelperName() const override { return HelperName; } 335 336 static bool classof(const CGCapturedStmtInfo *Info) { 337 return CGOpenMPRegionInfo::classof(Info) && 338 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 339 } 340 341 private: 342 StringRef HelperName; 343 }; 344 345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 346 llvm_unreachable("No codegen for expressions"); 347 } 348 /// API for generation of expressions captured in a innermost OpenMP 349 /// region. 350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 351 public: 352 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 353 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 354 OMPD_unknown, 355 /*HasCancel=*/false), 356 PrivScope(CGF) { 357 // Make sure the globals captured in the provided statement are local by 358 // using the privatization logic. We assume the same variable is not 359 // captured more than once. 360 for (const auto &C : CS.captures()) { 361 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 362 continue; 363 364 const VarDecl *VD = C.getCapturedVar(); 365 if (VD->isLocalVarDeclOrParm()) 366 continue; 367 368 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 369 /*RefersToEnclosingVariableOrCapture=*/false, 370 VD->getType().getNonReferenceType(), VK_LValue, 371 C.getLocation()); 372 PrivScope.addPrivate( 373 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 374 } 375 (void)PrivScope.Privatize(); 376 } 377 378 /// Lookup the captured field decl for a variable. 379 const FieldDecl *lookup(const VarDecl *VD) const override { 380 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 381 return FD; 382 return nullptr; 383 } 384 385 /// Emit the captured statement body. 386 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 387 llvm_unreachable("No body for expressions"); 388 } 389 390 /// Get a variable or parameter for storing global thread id 391 /// inside OpenMP construct. 392 const VarDecl *getThreadIDVariable() const override { 393 llvm_unreachable("No thread id for expressions"); 394 } 395 396 /// Get the name of the capture helper. 397 StringRef getHelperName() const override { 398 llvm_unreachable("No helper name for expressions"); 399 } 400 401 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 402 403 private: 404 /// Private scope to capture global variables. 405 CodeGenFunction::OMPPrivateScope PrivScope; 406 }; 407 408 /// RAII for emitting code of OpenMP constructs. 409 class InlinedOpenMPRegionRAII { 410 CodeGenFunction &CGF; 411 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 412 FieldDecl *LambdaThisCaptureField = nullptr; 413 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 414 bool NoInheritance = false; 415 416 public: 417 /// Constructs region for combined constructs. 418 /// \param CodeGen Code generation sequence for combined directives. Includes 419 /// a list of functions used for code generation of implicitly inlined 420 /// regions. 421 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 422 OpenMPDirectiveKind Kind, bool HasCancel, 423 bool NoInheritance = true) 424 : CGF(CGF), NoInheritance(NoInheritance) { 425 // Start emission for the construct. 426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 428 if (NoInheritance) { 429 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 430 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 431 CGF.LambdaThisCaptureField = nullptr; 432 BlockInfo = CGF.BlockInfo; 433 CGF.BlockInfo = nullptr; 434 } 435 } 436 437 ~InlinedOpenMPRegionRAII() { 438 // Restore original CapturedStmtInfo only if we're done with code emission. 439 auto *OldCSI = 440 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 441 delete CGF.CapturedStmtInfo; 442 CGF.CapturedStmtInfo = OldCSI; 443 if (NoInheritance) { 444 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 445 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 446 CGF.BlockInfo = BlockInfo; 447 } 448 } 449 }; 450 451 /// Values for bit flags used in the ident_t to describe the fields. 452 /// All enumeric elements are named and described in accordance with the code 453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 454 enum OpenMPLocationFlags : unsigned { 455 /// Use trampoline for internal microtask. 456 OMP_IDENT_IMD = 0x01, 457 /// Use c-style ident structure. 458 OMP_IDENT_KMPC = 0x02, 459 /// Atomic reduction option for kmpc_reduce. 460 OMP_ATOMIC_REDUCE = 0x10, 461 /// Explicit 'barrier' directive. 462 OMP_IDENT_BARRIER_EXPL = 0x20, 463 /// Implicit barrier in code. 464 OMP_IDENT_BARRIER_IMPL = 0x40, 465 /// Implicit barrier in 'for' directive. 466 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 467 /// Implicit barrier in 'sections' directive. 468 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 469 /// Implicit barrier in 'single' directive. 470 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 471 /// Call of __kmp_for_static_init for static loop. 472 OMP_IDENT_WORK_LOOP = 0x200, 473 /// Call of __kmp_for_static_init for sections. 474 OMP_IDENT_WORK_SECTIONS = 0x400, 475 /// Call of __kmp_for_static_init for distribute. 476 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 478 }; 479 480 namespace { 481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 482 /// Values for bit flags for marking which requires clauses have been used. 483 enum OpenMPOffloadingRequiresDirFlags : int64_t { 484 /// flag undefined. 485 OMP_REQ_UNDEFINED = 0x000, 486 /// no requires clause present. 487 OMP_REQ_NONE = 0x001, 488 /// reverse_offload clause. 489 OMP_REQ_REVERSE_OFFLOAD = 0x002, 490 /// unified_address clause. 491 OMP_REQ_UNIFIED_ADDRESS = 0x004, 492 /// unified_shared_memory clause. 493 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 494 /// dynamic_allocators clause. 495 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 496 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 497 }; 498 499 enum OpenMPOffloadingReservedDeviceIDs { 500 /// Device ID if the device was not defined, runtime should get it 501 /// from environment variables in the spec. 502 OMP_DEVICEID_UNDEF = -1, 503 }; 504 } // anonymous namespace 505 506 /// Describes ident structure that describes a source location. 507 /// All descriptions are taken from 508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 509 /// Original structure: 510 /// typedef struct ident { 511 /// kmp_int32 reserved_1; /**< might be used in Fortran; 512 /// see above */ 513 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 514 /// KMP_IDENT_KMPC identifies this union 515 /// member */ 516 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 517 /// see above */ 518 ///#if USE_ITT_BUILD 519 /// /* but currently used for storing 520 /// region-specific ITT */ 521 /// /* contextual information. */ 522 ///#endif /* USE_ITT_BUILD */ 523 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 524 /// C++ */ 525 /// char const *psource; /**< String describing the source location. 526 /// The string is composed of semi-colon separated 527 // fields which describe the source file, 528 /// the function and a pair of line numbers that 529 /// delimit the construct. 530 /// */ 531 /// } ident_t; 532 enum IdentFieldIndex { 533 /// might be used in Fortran 534 IdentField_Reserved_1, 535 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 536 IdentField_Flags, 537 /// Not really used in Fortran any more 538 IdentField_Reserved_2, 539 /// Source[4] in Fortran, do not use for C++ 540 IdentField_Reserved_3, 541 /// String describing the source location. The string is composed of 542 /// semi-colon separated fields which describe the source file, the function 543 /// and a pair of line numbers that delimit the construct. 544 IdentField_PSource 545 }; 546 547 /// Schedule types for 'omp for' loops (these enumerators are taken from 548 /// the enum sched_type in kmp.h). 549 enum OpenMPSchedType { 550 /// Lower bound for default (unordered) versions. 551 OMP_sch_lower = 32, 552 OMP_sch_static_chunked = 33, 553 OMP_sch_static = 34, 554 OMP_sch_dynamic_chunked = 35, 555 OMP_sch_guided_chunked = 36, 556 OMP_sch_runtime = 37, 557 OMP_sch_auto = 38, 558 /// static with chunk adjustment (e.g., simd) 559 OMP_sch_static_balanced_chunked = 45, 560 /// Lower bound for 'ordered' versions. 561 OMP_ord_lower = 64, 562 OMP_ord_static_chunked = 65, 563 OMP_ord_static = 66, 564 OMP_ord_dynamic_chunked = 67, 565 OMP_ord_guided_chunked = 68, 566 OMP_ord_runtime = 69, 567 OMP_ord_auto = 70, 568 OMP_sch_default = OMP_sch_static, 569 /// dist_schedule types 570 OMP_dist_sch_static_chunked = 91, 571 OMP_dist_sch_static = 92, 572 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 573 /// Set if the monotonic schedule modifier was present. 574 OMP_sch_modifier_monotonic = (1 << 29), 575 /// Set if the nonmonotonic schedule modifier was present. 576 OMP_sch_modifier_nonmonotonic = (1 << 30), 577 }; 578 579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 580 /// region. 581 class CleanupTy final : public EHScopeStack::Cleanup { 582 PrePostActionTy *Action; 583 584 public: 585 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 586 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 587 if (!CGF.HaveInsertPoint()) 588 return; 589 Action->Exit(CGF); 590 } 591 }; 592 593 } // anonymous namespace 594 595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 596 CodeGenFunction::RunCleanupsScope Scope(CGF); 597 if (PrePostAction) { 598 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 599 Callback(CodeGen, CGF, *PrePostAction); 600 } else { 601 PrePostActionTy Action; 602 Callback(CodeGen, CGF, Action); 603 } 604 } 605 606 /// Check if the combiner is a call to UDR combiner and if it is so return the 607 /// UDR decl used for reduction. 608 static const OMPDeclareReductionDecl * 609 getReductionInit(const Expr *ReductionOp) { 610 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 611 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 612 if (const auto *DRE = 613 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 614 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 615 return DRD; 616 return nullptr; 617 } 618 619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 620 const OMPDeclareReductionDecl *DRD, 621 const Expr *InitOp, 622 Address Private, Address Original, 623 QualType Ty) { 624 if (DRD->getInitializer()) { 625 std::pair<llvm::Function *, llvm::Function *> Reduction = 626 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 627 const auto *CE = cast<CallExpr>(InitOp); 628 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 629 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 630 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 631 const auto *LHSDRE = 632 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 633 const auto *RHSDRE = 634 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 635 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 636 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 637 [=]() { return Private; }); 638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 639 [=]() { return Original; }); 640 (void)PrivateScope.Privatize(); 641 RValue Func = RValue::get(Reduction.second); 642 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 643 CGF.EmitIgnoredExpr(InitOp); 644 } else { 645 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 646 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 647 auto *GV = new llvm::GlobalVariable( 648 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 649 llvm::GlobalValue::PrivateLinkage, Init, Name); 650 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 651 RValue InitRVal; 652 switch (CGF.getEvaluationKind(Ty)) { 653 case TEK_Scalar: 654 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 655 break; 656 case TEK_Complex: 657 InitRVal = 658 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 659 break; 660 case TEK_Aggregate: { 661 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 662 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 663 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 664 /*IsInitializer=*/false); 665 return; 666 } 667 } 668 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 669 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 670 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 671 /*IsInitializer=*/false); 672 } 673 } 674 675 /// Emit initialization of arrays of complex types. 676 /// \param DestAddr Address of the array. 677 /// \param Type Type of array. 678 /// \param Init Initial expression of array. 679 /// \param SrcAddr Address of the original array. 680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 681 QualType Type, bool EmitDeclareReductionInit, 682 const Expr *Init, 683 const OMPDeclareReductionDecl *DRD, 684 Address SrcAddr = Address::invalid()) { 685 // Perform element-by-element initialization. 686 QualType ElementTy; 687 688 // Drill down to the base element type on both arrays. 689 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 690 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 691 if (DRD) 692 SrcAddr = 693 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 694 695 llvm::Value *SrcBegin = nullptr; 696 if (DRD) 697 SrcBegin = SrcAddr.getPointer(); 698 llvm::Value *DestBegin = DestAddr.getPointer(); 699 // Cast from pointer to array type to pointer to single element. 700 llvm::Value *DestEnd = 701 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 702 // The basic structure here is a while-do loop. 703 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 704 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 705 llvm::Value *IsEmpty = 706 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 707 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 708 709 // Enter the loop body, making that address the current address. 710 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 711 CGF.EmitBlock(BodyBB); 712 713 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 714 715 llvm::PHINode *SrcElementPHI = nullptr; 716 Address SrcElementCurrent = Address::invalid(); 717 if (DRD) { 718 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 719 "omp.arraycpy.srcElementPast"); 720 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 721 SrcElementCurrent = 722 Address(SrcElementPHI, SrcAddr.getElementType(), 723 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 724 } 725 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 726 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 727 DestElementPHI->addIncoming(DestBegin, EntryBB); 728 Address DestElementCurrent = 729 Address(DestElementPHI, DestAddr.getElementType(), 730 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 731 732 // Emit copy. 733 { 734 CodeGenFunction::RunCleanupsScope InitScope(CGF); 735 if (EmitDeclareReductionInit) { 736 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 737 SrcElementCurrent, ElementTy); 738 } else 739 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 740 /*IsInitializer=*/false); 741 } 742 743 if (DRD) { 744 // Shift the address forward by one element. 745 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 746 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 747 "omp.arraycpy.dest.element"); 748 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 749 } 750 751 // Shift the address forward by one element. 752 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 753 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 754 "omp.arraycpy.dest.element"); 755 // Check whether we've reached the end. 756 llvm::Value *Done = 757 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 758 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 759 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 760 761 // Done. 762 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 763 } 764 765 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 766 return CGF.EmitOMPSharedLValue(E); 767 } 768 769 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 770 const Expr *E) { 771 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 772 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 773 return LValue(); 774 } 775 776 void ReductionCodeGen::emitAggregateInitialization( 777 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 778 const OMPDeclareReductionDecl *DRD) { 779 // Emit VarDecl with copy init for arrays. 780 // Get the address of the original variable captured in current 781 // captured region. 782 const auto *PrivateVD = 783 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 784 bool EmitDeclareReductionInit = 785 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 786 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 787 EmitDeclareReductionInit, 788 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 789 : PrivateVD->getInit(), 790 DRD, SharedAddr); 791 } 792 793 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 794 ArrayRef<const Expr *> Origs, 795 ArrayRef<const Expr *> Privates, 796 ArrayRef<const Expr *> ReductionOps) { 797 ClausesData.reserve(Shareds.size()); 798 SharedAddresses.reserve(Shareds.size()); 799 Sizes.reserve(Shareds.size()); 800 BaseDecls.reserve(Shareds.size()); 801 const auto *IOrig = Origs.begin(); 802 const auto *IPriv = Privates.begin(); 803 const auto *IRed = ReductionOps.begin(); 804 for (const Expr *Ref : Shareds) { 805 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 806 std::advance(IOrig, 1); 807 std::advance(IPriv, 1); 808 std::advance(IRed, 1); 809 } 810 } 811 812 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 813 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 814 "Number of generated lvalues must be exactly N."); 815 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 816 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 817 SharedAddresses.emplace_back(First, Second); 818 if (ClausesData[N].Shared == ClausesData[N].Ref) { 819 OrigAddresses.emplace_back(First, Second); 820 } else { 821 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 822 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 823 OrigAddresses.emplace_back(First, Second); 824 } 825 } 826 827 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 828 const auto *PrivateVD = 829 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 830 QualType PrivateType = PrivateVD->getType(); 831 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 832 if (!PrivateType->isVariablyModifiedType()) { 833 Sizes.emplace_back( 834 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 835 nullptr); 836 return; 837 } 838 llvm::Value *Size; 839 llvm::Value *SizeInChars; 840 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 841 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 842 if (AsArraySection) { 843 Size = CGF.Builder.CreatePtrDiff(ElemType, 844 OrigAddresses[N].second.getPointer(CGF), 845 OrigAddresses[N].first.getPointer(CGF)); 846 Size = CGF.Builder.CreateNUWAdd( 847 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 848 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 849 } else { 850 SizeInChars = 851 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 852 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 853 } 854 Sizes.emplace_back(SizeInChars, Size); 855 CodeGenFunction::OpaqueValueMapping OpaqueMap( 856 CGF, 857 cast<OpaqueValueExpr>( 858 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 859 RValue::get(Size)); 860 CGF.EmitVariablyModifiedType(PrivateType); 861 } 862 863 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 864 llvm::Value *Size) { 865 const auto *PrivateVD = 866 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 867 QualType PrivateType = PrivateVD->getType(); 868 if (!PrivateType->isVariablyModifiedType()) { 869 assert(!Size && !Sizes[N].second && 870 "Size should be nullptr for non-variably modified reduction " 871 "items."); 872 return; 873 } 874 CodeGenFunction::OpaqueValueMapping OpaqueMap( 875 CGF, 876 cast<OpaqueValueExpr>( 877 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 878 RValue::get(Size)); 879 CGF.EmitVariablyModifiedType(PrivateType); 880 } 881 882 void ReductionCodeGen::emitInitialization( 883 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 884 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 885 assert(SharedAddresses.size() > N && "No variable was generated"); 886 const auto *PrivateVD = 887 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 888 const OMPDeclareReductionDecl *DRD = 889 getReductionInit(ClausesData[N].ReductionOp); 890 QualType PrivateType = PrivateVD->getType(); 891 PrivateAddr = CGF.Builder.CreateElementBitCast( 892 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 893 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 894 if (DRD && DRD->getInitializer()) 895 (void)DefaultInit(CGF); 896 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 897 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 898 (void)DefaultInit(CGF); 899 QualType SharedType = SharedAddresses[N].first.getType(); 900 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 901 PrivateAddr, SharedAddr, SharedType); 902 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 903 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 904 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 905 PrivateVD->getType().getQualifiers(), 906 /*IsInitializer=*/false); 907 } 908 } 909 910 bool ReductionCodeGen::needCleanups(unsigned N) { 911 const auto *PrivateVD = 912 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 913 QualType PrivateType = PrivateVD->getType(); 914 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 915 return DTorKind != QualType::DK_none; 916 } 917 918 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 919 Address PrivateAddr) { 920 const auto *PrivateVD = 921 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 922 QualType PrivateType = PrivateVD->getType(); 923 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 924 if (needCleanups(N)) { 925 PrivateAddr = CGF.Builder.CreateElementBitCast( 926 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 927 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 928 } 929 } 930 931 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 932 LValue BaseLV) { 933 BaseTy = BaseTy.getNonReferenceType(); 934 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 935 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 936 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 937 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 938 } else { 939 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 940 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 941 } 942 BaseTy = BaseTy->getPointeeType(); 943 } 944 return CGF.MakeAddrLValue( 945 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 946 CGF.ConvertTypeForMem(ElTy)), 947 BaseLV.getType(), BaseLV.getBaseInfo(), 948 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 949 } 950 951 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 952 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 953 llvm::Value *Addr) { 954 Address Tmp = Address::invalid(); 955 Address TopTmp = Address::invalid(); 956 Address MostTopTmp = Address::invalid(); 957 BaseTy = BaseTy.getNonReferenceType(); 958 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 959 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 960 Tmp = CGF.CreateMemTemp(BaseTy); 961 if (TopTmp.isValid()) 962 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 963 else 964 MostTopTmp = Tmp; 965 TopTmp = Tmp; 966 BaseTy = BaseTy->getPointeeType(); 967 } 968 llvm::Type *Ty = BaseLVType; 969 if (Tmp.isValid()) 970 Ty = Tmp.getElementType(); 971 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 972 if (Tmp.isValid()) { 973 CGF.Builder.CreateStore(Addr, Tmp); 974 return MostTopTmp; 975 } 976 return Address::deprecated(Addr, BaseLVAlignment); 977 } 978 979 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 980 const VarDecl *OrigVD = nullptr; 981 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 982 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 983 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 984 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 986 Base = TempASE->getBase()->IgnoreParenImpCasts(); 987 DE = cast<DeclRefExpr>(Base); 988 OrigVD = cast<VarDecl>(DE->getDecl()); 989 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 990 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 991 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 992 Base = TempASE->getBase()->IgnoreParenImpCasts(); 993 DE = cast<DeclRefExpr>(Base); 994 OrigVD = cast<VarDecl>(DE->getDecl()); 995 } 996 return OrigVD; 997 } 998 999 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1000 Address PrivateAddr) { 1001 const DeclRefExpr *DE; 1002 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1003 BaseDecls.emplace_back(OrigVD); 1004 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1005 LValue BaseLValue = 1006 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1007 OriginalBaseLValue); 1008 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1009 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1010 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 1011 SharedAddr.getPointer()); 1012 llvm::Value *PrivatePointer = 1013 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1014 PrivateAddr.getPointer(), SharedAddr.getType()); 1015 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1016 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1017 return castToBase(CGF, OrigVD->getType(), 1018 SharedAddresses[N].first.getType(), 1019 OriginalBaseLValue.getAddress(CGF).getType(), 1020 OriginalBaseLValue.getAlignment(), Ptr); 1021 } 1022 BaseDecls.emplace_back( 1023 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1024 return PrivateAddr; 1025 } 1026 1027 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1028 const OMPDeclareReductionDecl *DRD = 1029 getReductionInit(ClausesData[N].ReductionOp); 1030 return DRD && DRD->getInitializer(); 1031 } 1032 1033 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1034 return CGF.EmitLoadOfPointerLValue( 1035 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1036 getThreadIDVariable()->getType()->castAs<PointerType>()); 1037 } 1038 1039 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1040 if (!CGF.HaveInsertPoint()) 1041 return; 1042 // 1.2.2 OpenMP Language Terminology 1043 // Structured block - An executable statement with a single entry at the 1044 // top and a single exit at the bottom. 1045 // The point of exit cannot be a branch out of the structured block. 1046 // longjmp() and throw() must not violate the entry/exit criteria. 1047 CGF.EHStack.pushTerminate(); 1048 if (S) 1049 CGF.incrementProfileCounter(S); 1050 CodeGen(CGF); 1051 CGF.EHStack.popTerminate(); 1052 } 1053 1054 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1055 CodeGenFunction &CGF) { 1056 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1057 getThreadIDVariable()->getType(), 1058 AlignmentSource::Decl); 1059 } 1060 1061 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1062 QualType FieldTy) { 1063 auto *Field = FieldDecl::Create( 1064 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1065 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1066 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1067 Field->setAccess(AS_public); 1068 DC->addDecl(Field); 1069 return Field; 1070 } 1071 1072 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1073 StringRef Separator) 1074 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1075 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1076 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1077 1078 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1079 OMPBuilder.initialize(); 1080 loadOffloadInfoMetadata(); 1081 } 1082 1083 void CGOpenMPRuntime::clear() { 1084 InternalVars.clear(); 1085 // Clean non-target variable declarations possibly used only in debug info. 1086 for (const auto &Data : EmittedNonTargetVariables) { 1087 if (!Data.getValue().pointsToAliveValue()) 1088 continue; 1089 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1090 if (!GV) 1091 continue; 1092 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1093 continue; 1094 GV->eraseFromParent(); 1095 } 1096 } 1097 1098 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1099 SmallString<128> Buffer; 1100 llvm::raw_svector_ostream OS(Buffer); 1101 StringRef Sep = FirstSeparator; 1102 for (StringRef Part : Parts) { 1103 OS << Sep << Part; 1104 Sep = Separator; 1105 } 1106 return std::string(OS.str()); 1107 } 1108 1109 static llvm::Function * 1110 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1111 const Expr *CombinerInitializer, const VarDecl *In, 1112 const VarDecl *Out, bool IsCombiner) { 1113 // void .omp_combiner.(Ty *in, Ty *out); 1114 ASTContext &C = CGM.getContext(); 1115 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1116 FunctionArgList Args; 1117 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1118 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1119 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1120 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1121 Args.push_back(&OmpOutParm); 1122 Args.push_back(&OmpInParm); 1123 const CGFunctionInfo &FnInfo = 1124 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1125 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1126 std::string Name = CGM.getOpenMPRuntime().getName( 1127 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1128 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1129 Name, &CGM.getModule()); 1130 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1131 if (CGM.getLangOpts().Optimize) { 1132 Fn->removeFnAttr(llvm::Attribute::NoInline); 1133 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1134 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1135 } 1136 CodeGenFunction CGF(CGM); 1137 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1138 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1139 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1140 Out->getLocation()); 1141 CodeGenFunction::OMPPrivateScope Scope(CGF); 1142 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1143 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1144 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1145 .getAddress(CGF); 1146 }); 1147 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1148 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1149 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1150 .getAddress(CGF); 1151 }); 1152 (void)Scope.Privatize(); 1153 if (!IsCombiner && Out->hasInit() && 1154 !CGF.isTrivialInitializer(Out->getInit())) { 1155 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1156 Out->getType().getQualifiers(), 1157 /*IsInitializer=*/true); 1158 } 1159 if (CombinerInitializer) 1160 CGF.EmitIgnoredExpr(CombinerInitializer); 1161 Scope.ForceCleanup(); 1162 CGF.FinishFunction(); 1163 return Fn; 1164 } 1165 1166 void CGOpenMPRuntime::emitUserDefinedReduction( 1167 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1168 if (UDRMap.count(D) > 0) 1169 return; 1170 llvm::Function *Combiner = emitCombinerOrInitializer( 1171 CGM, D->getType(), D->getCombiner(), 1172 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1173 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1174 /*IsCombiner=*/true); 1175 llvm::Function *Initializer = nullptr; 1176 if (const Expr *Init = D->getInitializer()) { 1177 Initializer = emitCombinerOrInitializer( 1178 CGM, D->getType(), 1179 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1180 : nullptr, 1181 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1182 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1183 /*IsCombiner=*/false); 1184 } 1185 UDRMap.try_emplace(D, Combiner, Initializer); 1186 if (CGF) { 1187 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1188 Decls.second.push_back(D); 1189 } 1190 } 1191 1192 std::pair<llvm::Function *, llvm::Function *> 1193 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1194 auto I = UDRMap.find(D); 1195 if (I != UDRMap.end()) 1196 return I->second; 1197 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1198 return UDRMap.lookup(D); 1199 } 1200 1201 namespace { 1202 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1203 // Builder if one is present. 1204 struct PushAndPopStackRAII { 1205 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1206 bool HasCancel, llvm::omp::Directive Kind) 1207 : OMPBuilder(OMPBuilder) { 1208 if (!OMPBuilder) 1209 return; 1210 1211 // The following callback is the crucial part of clangs cleanup process. 1212 // 1213 // NOTE: 1214 // Once the OpenMPIRBuilder is used to create parallel regions (and 1215 // similar), the cancellation destination (Dest below) is determined via 1216 // IP. That means if we have variables to finalize we split the block at IP, 1217 // use the new block (=BB) as destination to build a JumpDest (via 1218 // getJumpDestInCurrentScope(BB)) which then is fed to 1219 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1220 // to push & pop an FinalizationInfo object. 1221 // The FiniCB will still be needed but at the point where the 1222 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1223 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1224 assert(IP.getBlock()->end() == IP.getPoint() && 1225 "Clang CG should cause non-terminated block!"); 1226 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1227 CGF.Builder.restoreIP(IP); 1228 CodeGenFunction::JumpDest Dest = 1229 CGF.getOMPCancelDestination(OMPD_parallel); 1230 CGF.EmitBranchThroughCleanup(Dest); 1231 }; 1232 1233 // TODO: Remove this once we emit parallel regions through the 1234 // OpenMPIRBuilder as it can do this setup internally. 1235 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1236 OMPBuilder->pushFinalizationCB(std::move(FI)); 1237 } 1238 ~PushAndPopStackRAII() { 1239 if (OMPBuilder) 1240 OMPBuilder->popFinalizationCB(); 1241 } 1242 llvm::OpenMPIRBuilder *OMPBuilder; 1243 }; 1244 } // namespace 1245 1246 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1247 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1248 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1249 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1250 assert(ThreadIDVar->getType()->isPointerType() && 1251 "thread id variable must be of type kmp_int32 *"); 1252 CodeGenFunction CGF(CGM, true); 1253 bool HasCancel = false; 1254 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1255 HasCancel = OPD->hasCancel(); 1256 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1257 HasCancel = OPD->hasCancel(); 1258 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1259 HasCancel = OPSD->hasCancel(); 1260 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1261 HasCancel = OPFD->hasCancel(); 1262 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1263 HasCancel = OPFD->hasCancel(); 1264 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1265 HasCancel = OPFD->hasCancel(); 1266 else if (const auto *OPFD = 1267 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 else if (const auto *OPFD = 1270 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 1273 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1274 // parallel region to make cancellation barriers work properly. 1275 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1276 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1277 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1278 HasCancel, OutlinedHelperName); 1279 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1280 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1281 } 1282 1283 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1285 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1286 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1287 return emitParallelOrTeamsOutlinedFunction( 1288 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1289 } 1290 1291 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1292 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1294 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1295 return emitParallelOrTeamsOutlinedFunction( 1296 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1297 } 1298 1299 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1300 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1301 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1302 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1303 bool Tied, unsigned &NumberOfParts) { 1304 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1305 PrePostActionTy &) { 1306 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1307 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1308 llvm::Value *TaskArgs[] = { 1309 UpLoc, ThreadID, 1310 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1311 TaskTVar->getType()->castAs<PointerType>()) 1312 .getPointer(CGF)}; 1313 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1314 CGM.getModule(), OMPRTL___kmpc_omp_task), 1315 TaskArgs); 1316 }; 1317 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1318 UntiedCodeGen); 1319 CodeGen.setAction(Action); 1320 assert(!ThreadIDVar->getType()->isPointerType() && 1321 "thread id variable must be of type kmp_int32 for tasks"); 1322 const OpenMPDirectiveKind Region = 1323 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1324 : OMPD_task; 1325 const CapturedStmt *CS = D.getCapturedStmt(Region); 1326 bool HasCancel = false; 1327 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1328 HasCancel = TD->hasCancel(); 1329 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1330 HasCancel = TD->hasCancel(); 1331 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1332 HasCancel = TD->hasCancel(); 1333 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1334 HasCancel = TD->hasCancel(); 1335 1336 CodeGenFunction CGF(CGM, true); 1337 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1338 InnermostKind, HasCancel, Action); 1339 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1340 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1341 if (!Tied) 1342 NumberOfParts = Action.getNumberOfParts(); 1343 return Res; 1344 } 1345 1346 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1347 const RecordDecl *RD, const CGRecordLayout &RL, 1348 ArrayRef<llvm::Constant *> Data) { 1349 llvm::StructType *StructTy = RL.getLLVMType(); 1350 unsigned PrevIdx = 0; 1351 ConstantInitBuilder CIBuilder(CGM); 1352 auto DI = Data.begin(); 1353 for (const FieldDecl *FD : RD->fields()) { 1354 unsigned Idx = RL.getLLVMFieldNo(FD); 1355 // Fill the alignment. 1356 for (unsigned I = PrevIdx; I < Idx; ++I) 1357 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1358 PrevIdx = Idx + 1; 1359 Fields.add(*DI); 1360 ++DI; 1361 } 1362 } 1363 1364 template <class... As> 1365 static llvm::GlobalVariable * 1366 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1367 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1368 As &&... Args) { 1369 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1370 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1371 ConstantInitBuilder CIBuilder(CGM); 1372 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1373 buildStructValue(Fields, CGM, RD, RL, Data); 1374 return Fields.finishAndCreateGlobal( 1375 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1376 std::forward<As>(Args)...); 1377 } 1378 1379 template <typename T> 1380 static void 1381 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1382 ArrayRef<llvm::Constant *> Data, 1383 T &Parent) { 1384 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1385 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1386 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1387 buildStructValue(Fields, CGM, RD, RL, Data); 1388 Fields.finishAndAddTo(Parent); 1389 } 1390 1391 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1392 bool AtCurrentPoint) { 1393 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1394 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1395 1396 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1397 if (AtCurrentPoint) { 1398 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1399 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1400 } else { 1401 Elem.second.ServiceInsertPt = 1402 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1403 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1404 } 1405 } 1406 1407 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1408 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1409 if (Elem.second.ServiceInsertPt) { 1410 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1411 Elem.second.ServiceInsertPt = nullptr; 1412 Ptr->eraseFromParent(); 1413 } 1414 } 1415 1416 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1417 SourceLocation Loc, 1418 SmallString<128> &Buffer) { 1419 llvm::raw_svector_ostream OS(Buffer); 1420 // Build debug location 1421 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1422 OS << ";" << PLoc.getFilename() << ";"; 1423 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1424 OS << FD->getQualifiedNameAsString(); 1425 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1426 return OS.str(); 1427 } 1428 1429 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1430 SourceLocation Loc, 1431 unsigned Flags) { 1432 uint32_t SrcLocStrSize; 1433 llvm::Constant *SrcLocStr; 1434 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1435 Loc.isInvalid()) { 1436 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1437 } else { 1438 std::string FunctionName; 1439 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1440 FunctionName = FD->getQualifiedNameAsString(); 1441 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1442 const char *FileName = PLoc.getFilename(); 1443 unsigned Line = PLoc.getLine(); 1444 unsigned Column = PLoc.getColumn(); 1445 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1446 Column, SrcLocStrSize); 1447 } 1448 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1449 return OMPBuilder.getOrCreateIdent( 1450 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1451 } 1452 1453 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1454 SourceLocation Loc) { 1455 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1456 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1457 // the clang invariants used below might be broken. 1458 if (CGM.getLangOpts().OpenMPIRBuilder) { 1459 SmallString<128> Buffer; 1460 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1461 uint32_t SrcLocStrSize; 1462 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1463 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1464 return OMPBuilder.getOrCreateThreadID( 1465 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1466 } 1467 1468 llvm::Value *ThreadID = nullptr; 1469 // Check whether we've already cached a load of the thread id in this 1470 // function. 1471 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1472 if (I != OpenMPLocThreadIDMap.end()) { 1473 ThreadID = I->second.ThreadID; 1474 if (ThreadID != nullptr) 1475 return ThreadID; 1476 } 1477 // If exceptions are enabled, do not use parameter to avoid possible crash. 1478 if (auto *OMPRegionInfo = 1479 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1480 if (OMPRegionInfo->getThreadIDVariable()) { 1481 // Check if this an outlined function with thread id passed as argument. 1482 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1483 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1484 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1485 !CGF.getLangOpts().CXXExceptions || 1486 CGF.Builder.GetInsertBlock() == TopBlock || 1487 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1488 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1489 TopBlock || 1490 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1491 CGF.Builder.GetInsertBlock()) { 1492 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1493 // If value loaded in entry block, cache it and use it everywhere in 1494 // function. 1495 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1496 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1497 Elem.second.ThreadID = ThreadID; 1498 } 1499 return ThreadID; 1500 } 1501 } 1502 } 1503 1504 // This is not an outlined function region - need to call __kmpc_int32 1505 // kmpc_global_thread_num(ident_t *loc). 1506 // Generate thread id value and cache this value for use across the 1507 // function. 1508 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1509 if (!Elem.second.ServiceInsertPt) 1510 setLocThreadIdInsertPt(CGF); 1511 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1512 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1513 llvm::CallInst *Call = CGF.Builder.CreateCall( 1514 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1515 OMPRTL___kmpc_global_thread_num), 1516 emitUpdateLocation(CGF, Loc)); 1517 Call->setCallingConv(CGF.getRuntimeCC()); 1518 Elem.second.ThreadID = Call; 1519 return Call; 1520 } 1521 1522 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1523 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1524 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1525 clearLocThreadIdInsertPt(CGF); 1526 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1527 } 1528 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1529 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1530 UDRMap.erase(D); 1531 FunctionUDRMap.erase(CGF.CurFn); 1532 } 1533 auto I = FunctionUDMMap.find(CGF.CurFn); 1534 if (I != FunctionUDMMap.end()) { 1535 for(const auto *D : I->second) 1536 UDMMap.erase(D); 1537 FunctionUDMMap.erase(I); 1538 } 1539 LastprivateConditionalToTypes.erase(CGF.CurFn); 1540 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1541 } 1542 1543 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1544 return OMPBuilder.IdentPtr; 1545 } 1546 1547 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1548 if (!Kmpc_MicroTy) { 1549 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1550 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1551 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1552 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1553 } 1554 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1555 } 1556 1557 llvm::FunctionCallee 1558 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1559 bool IsGPUDistribute) { 1560 assert((IVSize == 32 || IVSize == 64) && 1561 "IV size is not compatible with the omp runtime"); 1562 StringRef Name; 1563 if (IsGPUDistribute) 1564 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1565 : "__kmpc_distribute_static_init_4u") 1566 : (IVSigned ? "__kmpc_distribute_static_init_8" 1567 : "__kmpc_distribute_static_init_8u"); 1568 else 1569 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1570 : "__kmpc_for_static_init_4u") 1571 : (IVSigned ? "__kmpc_for_static_init_8" 1572 : "__kmpc_for_static_init_8u"); 1573 1574 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1575 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1576 llvm::Type *TypeParams[] = { 1577 getIdentTyPointerTy(), // loc 1578 CGM.Int32Ty, // tid 1579 CGM.Int32Ty, // schedtype 1580 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1581 PtrTy, // p_lower 1582 PtrTy, // p_upper 1583 PtrTy, // p_stride 1584 ITy, // incr 1585 ITy // chunk 1586 }; 1587 auto *FnTy = 1588 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1589 return CGM.CreateRuntimeFunction(FnTy, Name); 1590 } 1591 1592 llvm::FunctionCallee 1593 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1594 assert((IVSize == 32 || IVSize == 64) && 1595 "IV size is not compatible with the omp runtime"); 1596 StringRef Name = 1597 IVSize == 32 1598 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1599 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1600 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1601 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1602 CGM.Int32Ty, // tid 1603 CGM.Int32Ty, // schedtype 1604 ITy, // lower 1605 ITy, // upper 1606 ITy, // stride 1607 ITy // chunk 1608 }; 1609 auto *FnTy = 1610 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1611 return CGM.CreateRuntimeFunction(FnTy, Name); 1612 } 1613 1614 llvm::FunctionCallee 1615 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1616 assert((IVSize == 32 || IVSize == 64) && 1617 "IV size is not compatible with the omp runtime"); 1618 StringRef Name = 1619 IVSize == 32 1620 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1621 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1622 llvm::Type *TypeParams[] = { 1623 getIdentTyPointerTy(), // loc 1624 CGM.Int32Ty, // tid 1625 }; 1626 auto *FnTy = 1627 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1628 return CGM.CreateRuntimeFunction(FnTy, Name); 1629 } 1630 1631 llvm::FunctionCallee 1632 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1633 assert((IVSize == 32 || IVSize == 64) && 1634 "IV size is not compatible with the omp runtime"); 1635 StringRef Name = 1636 IVSize == 32 1637 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1638 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1639 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1640 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1641 llvm::Type *TypeParams[] = { 1642 getIdentTyPointerTy(), // loc 1643 CGM.Int32Ty, // tid 1644 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1645 PtrTy, // p_lower 1646 PtrTy, // p_upper 1647 PtrTy // p_stride 1648 }; 1649 auto *FnTy = 1650 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1651 return CGM.CreateRuntimeFunction(FnTy, Name); 1652 } 1653 1654 /// Obtain information that uniquely identifies a target entry. This 1655 /// consists of the file and device IDs as well as line number associated with 1656 /// the relevant entry source location. 1657 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1658 unsigned &DeviceID, unsigned &FileID, 1659 unsigned &LineNum) { 1660 SourceManager &SM = C.getSourceManager(); 1661 1662 // The loc should be always valid and have a file ID (the user cannot use 1663 // #pragma directives in macros) 1664 1665 assert(Loc.isValid() && "Source location is expected to be always valid."); 1666 1667 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1668 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1669 1670 llvm::sys::fs::UniqueID ID; 1671 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1672 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1673 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1674 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1675 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1676 << PLoc.getFilename() << EC.message(); 1677 } 1678 1679 DeviceID = ID.getDevice(); 1680 FileID = ID.getFile(); 1681 LineNum = PLoc.getLine(); 1682 } 1683 1684 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1685 if (CGM.getLangOpts().OpenMPSimd) 1686 return Address::invalid(); 1687 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1688 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1689 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1690 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1691 HasRequiresUnifiedSharedMemory))) { 1692 SmallString<64> PtrName; 1693 { 1694 llvm::raw_svector_ostream OS(PtrName); 1695 OS << CGM.getMangledName(GlobalDecl(VD)); 1696 if (!VD->isExternallyVisible()) { 1697 unsigned DeviceID, FileID, Line; 1698 getTargetEntryUniqueInfo(CGM.getContext(), 1699 VD->getCanonicalDecl()->getBeginLoc(), 1700 DeviceID, FileID, Line); 1701 OS << llvm::format("_%x", FileID); 1702 } 1703 OS << "_decl_tgt_ref_ptr"; 1704 } 1705 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1706 if (!Ptr) { 1707 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1708 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1709 PtrName); 1710 1711 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1712 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1713 1714 if (!CGM.getLangOpts().OpenMPIsDevice) 1715 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1716 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1717 } 1718 return Address::deprecated(Ptr, CGM.getContext().getDeclAlign(VD)); 1719 } 1720 return Address::invalid(); 1721 } 1722 1723 llvm::Constant * 1724 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1725 assert(!CGM.getLangOpts().OpenMPUseTLS || 1726 !CGM.getContext().getTargetInfo().isTLSSupported()); 1727 // Lookup the entry, lazily creating it if necessary. 1728 std::string Suffix = getName({"cache", ""}); 1729 return getOrCreateInternalVariable( 1730 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1731 } 1732 1733 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1734 const VarDecl *VD, 1735 Address VDAddr, 1736 SourceLocation Loc) { 1737 if (CGM.getLangOpts().OpenMPUseTLS && 1738 CGM.getContext().getTargetInfo().isTLSSupported()) 1739 return VDAddr; 1740 1741 llvm::Type *VarTy = VDAddr.getElementType(); 1742 llvm::Value *Args[] = { 1743 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1744 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), 1745 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1746 getOrCreateThreadPrivateCache(VD)}; 1747 return Address::deprecated( 1748 CGF.EmitRuntimeCall( 1749 OMPBuilder.getOrCreateRuntimeFunction( 1750 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1751 Args), 1752 VDAddr.getAlignment()); 1753 } 1754 1755 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1756 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1757 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1758 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1759 // library. 1760 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1761 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1762 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1763 OMPLoc); 1764 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1765 // to register constructor/destructor for variable. 1766 llvm::Value *Args[] = { 1767 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1768 Ctor, CopyCtor, Dtor}; 1769 CGF.EmitRuntimeCall( 1770 OMPBuilder.getOrCreateRuntimeFunction( 1771 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1772 Args); 1773 } 1774 1775 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1776 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1777 bool PerformInit, CodeGenFunction *CGF) { 1778 if (CGM.getLangOpts().OpenMPUseTLS && 1779 CGM.getContext().getTargetInfo().isTLSSupported()) 1780 return nullptr; 1781 1782 VD = VD->getDefinition(CGM.getContext()); 1783 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1784 QualType ASTTy = VD->getType(); 1785 1786 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1787 const Expr *Init = VD->getAnyInitializer(); 1788 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1789 // Generate function that re-emits the declaration's initializer into the 1790 // threadprivate copy of the variable VD 1791 CodeGenFunction CtorCGF(CGM); 1792 FunctionArgList Args; 1793 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1794 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1795 ImplicitParamDecl::Other); 1796 Args.push_back(&Dst); 1797 1798 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1799 CGM.getContext().VoidPtrTy, Args); 1800 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1801 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1802 llvm::Function *Fn = 1803 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1804 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1805 Args, Loc, Loc); 1806 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1807 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1808 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1809 Address Arg = Address::deprecated(ArgVal, VDAddr.getAlignment()); 1810 Arg = CtorCGF.Builder.CreateElementBitCast( 1811 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1812 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1813 /*IsInitializer=*/true); 1814 ArgVal = CtorCGF.EmitLoadOfScalar( 1815 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1816 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1817 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1818 CtorCGF.FinishFunction(); 1819 Ctor = Fn; 1820 } 1821 if (VD->getType().isDestructedType() != QualType::DK_none) { 1822 // Generate function that emits destructor call for the threadprivate copy 1823 // of the variable VD 1824 CodeGenFunction DtorCGF(CGM); 1825 FunctionArgList Args; 1826 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1827 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1828 ImplicitParamDecl::Other); 1829 Args.push_back(&Dst); 1830 1831 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1832 CGM.getContext().VoidTy, Args); 1833 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1834 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1835 llvm::Function *Fn = 1836 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1837 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1838 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1839 Loc, Loc); 1840 // Create a scope with an artificial location for the body of this function. 1841 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1842 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1843 DtorCGF.GetAddrOfLocalVar(&Dst), 1844 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1845 DtorCGF.emitDestroy(Address::deprecated(ArgVal, VDAddr.getAlignment()), 1846 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1847 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1848 DtorCGF.FinishFunction(); 1849 Dtor = Fn; 1850 } 1851 // Do not emit init function if it is not required. 1852 if (!Ctor && !Dtor) 1853 return nullptr; 1854 1855 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1856 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1857 /*isVarArg=*/false) 1858 ->getPointerTo(); 1859 // Copying constructor for the threadprivate variable. 1860 // Must be NULL - reserved by runtime, but currently it requires that this 1861 // parameter is always NULL. Otherwise it fires assertion. 1862 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1863 if (Ctor == nullptr) { 1864 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1865 /*isVarArg=*/false) 1866 ->getPointerTo(); 1867 Ctor = llvm::Constant::getNullValue(CtorTy); 1868 } 1869 if (Dtor == nullptr) { 1870 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1871 /*isVarArg=*/false) 1872 ->getPointerTo(); 1873 Dtor = llvm::Constant::getNullValue(DtorTy); 1874 } 1875 if (!CGF) { 1876 auto *InitFunctionTy = 1877 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1878 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1879 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1880 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1881 CodeGenFunction InitCGF(CGM); 1882 FunctionArgList ArgList; 1883 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1884 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1885 Loc, Loc); 1886 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1887 InitCGF.FinishFunction(); 1888 return InitFunction; 1889 } 1890 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1891 } 1892 return nullptr; 1893 } 1894 1895 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1896 llvm::GlobalVariable *Addr, 1897 bool PerformInit) { 1898 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1899 !CGM.getLangOpts().OpenMPIsDevice) 1900 return false; 1901 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1902 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1903 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1904 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1905 HasRequiresUnifiedSharedMemory)) 1906 return CGM.getLangOpts().OpenMPIsDevice; 1907 VD = VD->getDefinition(CGM.getContext()); 1908 assert(VD && "Unknown VarDecl"); 1909 1910 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1911 return CGM.getLangOpts().OpenMPIsDevice; 1912 1913 QualType ASTTy = VD->getType(); 1914 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1915 1916 // Produce the unique prefix to identify the new target regions. We use 1917 // the source location of the variable declaration which we know to not 1918 // conflict with any target region. 1919 unsigned DeviceID; 1920 unsigned FileID; 1921 unsigned Line; 1922 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1923 SmallString<128> Buffer, Out; 1924 { 1925 llvm::raw_svector_ostream OS(Buffer); 1926 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1927 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1928 } 1929 1930 const Expr *Init = VD->getAnyInitializer(); 1931 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1932 llvm::Constant *Ctor; 1933 llvm::Constant *ID; 1934 if (CGM.getLangOpts().OpenMPIsDevice) { 1935 // Generate function that re-emits the declaration's initializer into 1936 // the threadprivate copy of the variable VD 1937 CodeGenFunction CtorCGF(CGM); 1938 1939 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1940 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1941 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1942 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1943 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1944 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1945 FunctionArgList(), Loc, Loc); 1946 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1947 CtorCGF.EmitAnyExprToMem( 1948 Init, Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)), 1949 Init->getType().getQualifiers(), 1950 /*IsInitializer=*/true); 1951 CtorCGF.FinishFunction(); 1952 Ctor = Fn; 1953 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1954 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1955 } else { 1956 Ctor = new llvm::GlobalVariable( 1957 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1958 llvm::GlobalValue::PrivateLinkage, 1959 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1960 ID = Ctor; 1961 } 1962 1963 // Register the information for the entry associated with the constructor. 1964 Out.clear(); 1965 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1966 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1967 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1968 } 1969 if (VD->getType().isDestructedType() != QualType::DK_none) { 1970 llvm::Constant *Dtor; 1971 llvm::Constant *ID; 1972 if (CGM.getLangOpts().OpenMPIsDevice) { 1973 // Generate function that emits destructor call for the threadprivate 1974 // copy of the variable VD 1975 CodeGenFunction DtorCGF(CGM); 1976 1977 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1978 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1979 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1980 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1981 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1982 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1983 FunctionArgList(), Loc, Loc); 1984 // Create a scope with an artificial location for the body of this 1985 // function. 1986 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1987 DtorCGF.emitDestroy( 1988 Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)), ASTTy, 1989 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1990 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1991 DtorCGF.FinishFunction(); 1992 Dtor = Fn; 1993 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1994 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1995 } else { 1996 Dtor = new llvm::GlobalVariable( 1997 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1998 llvm::GlobalValue::PrivateLinkage, 1999 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2000 ID = Dtor; 2001 } 2002 // Register the information for the entry associated with the destructor. 2003 Out.clear(); 2004 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2005 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2006 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2007 } 2008 return CGM.getLangOpts().OpenMPIsDevice; 2009 } 2010 2011 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2012 QualType VarType, 2013 StringRef Name) { 2014 std::string Suffix = getName({"artificial", ""}); 2015 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2016 llvm::GlobalVariable *GAddr = 2017 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2018 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2019 CGM.getTarget().isTLSSupported()) { 2020 GAddr->setThreadLocal(/*Val=*/true); 2021 return Address(GAddr, GAddr->getValueType(), 2022 CGM.getContext().getTypeAlignInChars(VarType)); 2023 } 2024 std::string CacheSuffix = getName({"cache", ""}); 2025 llvm::Value *Args[] = { 2026 emitUpdateLocation(CGF, SourceLocation()), 2027 getThreadID(CGF, SourceLocation()), 2028 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2029 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2030 /*isSigned=*/false), 2031 getOrCreateInternalVariable( 2032 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2033 return Address( 2034 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2035 CGF.EmitRuntimeCall( 2036 OMPBuilder.getOrCreateRuntimeFunction( 2037 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2038 Args), 2039 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2040 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 2041 } 2042 2043 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2044 const RegionCodeGenTy &ThenGen, 2045 const RegionCodeGenTy &ElseGen) { 2046 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2047 2048 // If the condition constant folds and can be elided, try to avoid emitting 2049 // the condition and the dead arm of the if/else. 2050 bool CondConstant; 2051 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2052 if (CondConstant) 2053 ThenGen(CGF); 2054 else 2055 ElseGen(CGF); 2056 return; 2057 } 2058 2059 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2060 // emit the conditional branch. 2061 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2062 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2063 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2064 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2065 2066 // Emit the 'then' code. 2067 CGF.EmitBlock(ThenBlock); 2068 ThenGen(CGF); 2069 CGF.EmitBranch(ContBlock); 2070 // Emit the 'else' code if present. 2071 // There is no need to emit line number for unconditional branch. 2072 (void)ApplyDebugLocation::CreateEmpty(CGF); 2073 CGF.EmitBlock(ElseBlock); 2074 ElseGen(CGF); 2075 // There is no need to emit line number for unconditional branch. 2076 (void)ApplyDebugLocation::CreateEmpty(CGF); 2077 CGF.EmitBranch(ContBlock); 2078 // Emit the continuation block for code after the if. 2079 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2080 } 2081 2082 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2083 llvm::Function *OutlinedFn, 2084 ArrayRef<llvm::Value *> CapturedVars, 2085 const Expr *IfCond, 2086 llvm::Value *NumThreads) { 2087 if (!CGF.HaveInsertPoint()) 2088 return; 2089 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2090 auto &M = CGM.getModule(); 2091 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2092 this](CodeGenFunction &CGF, PrePostActionTy &) { 2093 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2094 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2095 llvm::Value *Args[] = { 2096 RTLoc, 2097 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2098 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2099 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2100 RealArgs.append(std::begin(Args), std::end(Args)); 2101 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2102 2103 llvm::FunctionCallee RTLFn = 2104 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2105 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2106 }; 2107 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2108 this](CodeGenFunction &CGF, PrePostActionTy &) { 2109 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2110 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2111 // Build calls: 2112 // __kmpc_serialized_parallel(&Loc, GTid); 2113 llvm::Value *Args[] = {RTLoc, ThreadID}; 2114 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2115 M, OMPRTL___kmpc_serialized_parallel), 2116 Args); 2117 2118 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2119 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2120 Address ZeroAddrBound = 2121 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2122 /*Name=*/".bound.zero.addr"); 2123 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2124 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2125 // ThreadId for serialized parallels is 0. 2126 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2127 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2128 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2129 2130 // Ensure we do not inline the function. This is trivially true for the ones 2131 // passed to __kmpc_fork_call but the ones called in serialized regions 2132 // could be inlined. This is not a perfect but it is closer to the invariant 2133 // we want, namely, every data environment starts with a new function. 2134 // TODO: We should pass the if condition to the runtime function and do the 2135 // handling there. Much cleaner code. 2136 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2137 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2138 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2139 2140 // __kmpc_end_serialized_parallel(&Loc, GTid); 2141 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2142 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2143 M, OMPRTL___kmpc_end_serialized_parallel), 2144 EndArgs); 2145 }; 2146 if (IfCond) { 2147 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2148 } else { 2149 RegionCodeGenTy ThenRCG(ThenGen); 2150 ThenRCG(CGF); 2151 } 2152 } 2153 2154 // If we're inside an (outlined) parallel region, use the region info's 2155 // thread-ID variable (it is passed in a first argument of the outlined function 2156 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2157 // regular serial code region, get thread ID by calling kmp_int32 2158 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2159 // return the address of that temp. 2160 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2161 SourceLocation Loc) { 2162 if (auto *OMPRegionInfo = 2163 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2164 if (OMPRegionInfo->getThreadIDVariable()) 2165 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2166 2167 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2168 QualType Int32Ty = 2169 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2170 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2171 CGF.EmitStoreOfScalar(ThreadID, 2172 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2173 2174 return ThreadIDTemp; 2175 } 2176 2177 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2178 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2179 SmallString<256> Buffer; 2180 llvm::raw_svector_ostream Out(Buffer); 2181 Out << Name; 2182 StringRef RuntimeName = Out.str(); 2183 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2184 if (Elem.second) { 2185 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2186 "OMP internal variable has different type than requested"); 2187 return &*Elem.second; 2188 } 2189 2190 return Elem.second = new llvm::GlobalVariable( 2191 CGM.getModule(), Ty, /*IsConstant*/ false, 2192 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2193 Elem.first(), /*InsertBefore=*/nullptr, 2194 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2195 } 2196 2197 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2198 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2199 std::string Name = getName({Prefix, "var"}); 2200 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2201 } 2202 2203 namespace { 2204 /// Common pre(post)-action for different OpenMP constructs. 2205 class CommonActionTy final : public PrePostActionTy { 2206 llvm::FunctionCallee EnterCallee; 2207 ArrayRef<llvm::Value *> EnterArgs; 2208 llvm::FunctionCallee ExitCallee; 2209 ArrayRef<llvm::Value *> ExitArgs; 2210 bool Conditional; 2211 llvm::BasicBlock *ContBlock = nullptr; 2212 2213 public: 2214 CommonActionTy(llvm::FunctionCallee EnterCallee, 2215 ArrayRef<llvm::Value *> EnterArgs, 2216 llvm::FunctionCallee ExitCallee, 2217 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2218 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2219 ExitArgs(ExitArgs), Conditional(Conditional) {} 2220 void Enter(CodeGenFunction &CGF) override { 2221 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2222 if (Conditional) { 2223 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2224 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2225 ContBlock = CGF.createBasicBlock("omp_if.end"); 2226 // Generate the branch (If-stmt) 2227 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2228 CGF.EmitBlock(ThenBlock); 2229 } 2230 } 2231 void Done(CodeGenFunction &CGF) { 2232 // Emit the rest of blocks/branches 2233 CGF.EmitBranch(ContBlock); 2234 CGF.EmitBlock(ContBlock, true); 2235 } 2236 void Exit(CodeGenFunction &CGF) override { 2237 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2238 } 2239 }; 2240 } // anonymous namespace 2241 2242 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2243 StringRef CriticalName, 2244 const RegionCodeGenTy &CriticalOpGen, 2245 SourceLocation Loc, const Expr *Hint) { 2246 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2247 // CriticalOpGen(); 2248 // __kmpc_end_critical(ident_t *, gtid, Lock); 2249 // Prepare arguments and build a call to __kmpc_critical 2250 if (!CGF.HaveInsertPoint()) 2251 return; 2252 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2253 getCriticalRegionLock(CriticalName)}; 2254 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2255 std::end(Args)); 2256 if (Hint) { 2257 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2258 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2259 } 2260 CommonActionTy Action( 2261 OMPBuilder.getOrCreateRuntimeFunction( 2262 CGM.getModule(), 2263 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2264 EnterArgs, 2265 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2266 OMPRTL___kmpc_end_critical), 2267 Args); 2268 CriticalOpGen.setAction(Action); 2269 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2270 } 2271 2272 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2273 const RegionCodeGenTy &MasterOpGen, 2274 SourceLocation Loc) { 2275 if (!CGF.HaveInsertPoint()) 2276 return; 2277 // if(__kmpc_master(ident_t *, gtid)) { 2278 // MasterOpGen(); 2279 // __kmpc_end_master(ident_t *, gtid); 2280 // } 2281 // Prepare arguments and build a call to __kmpc_master 2282 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2283 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2284 CGM.getModule(), OMPRTL___kmpc_master), 2285 Args, 2286 OMPBuilder.getOrCreateRuntimeFunction( 2287 CGM.getModule(), OMPRTL___kmpc_end_master), 2288 Args, 2289 /*Conditional=*/true); 2290 MasterOpGen.setAction(Action); 2291 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2292 Action.Done(CGF); 2293 } 2294 2295 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2296 const RegionCodeGenTy &MaskedOpGen, 2297 SourceLocation Loc, const Expr *Filter) { 2298 if (!CGF.HaveInsertPoint()) 2299 return; 2300 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2301 // MaskedOpGen(); 2302 // __kmpc_end_masked(iden_t *, gtid); 2303 // } 2304 // Prepare arguments and build a call to __kmpc_masked 2305 llvm::Value *FilterVal = Filter 2306 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2307 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2308 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2309 FilterVal}; 2310 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2311 getThreadID(CGF, Loc)}; 2312 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2313 CGM.getModule(), OMPRTL___kmpc_masked), 2314 Args, 2315 OMPBuilder.getOrCreateRuntimeFunction( 2316 CGM.getModule(), OMPRTL___kmpc_end_masked), 2317 ArgsEnd, 2318 /*Conditional=*/true); 2319 MaskedOpGen.setAction(Action); 2320 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2321 Action.Done(CGF); 2322 } 2323 2324 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2325 SourceLocation Loc) { 2326 if (!CGF.HaveInsertPoint()) 2327 return; 2328 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2329 OMPBuilder.createTaskyield(CGF.Builder); 2330 } else { 2331 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2332 llvm::Value *Args[] = { 2333 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2334 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2335 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2336 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2337 Args); 2338 } 2339 2340 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2341 Region->emitUntiedSwitch(CGF); 2342 } 2343 2344 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2345 const RegionCodeGenTy &TaskgroupOpGen, 2346 SourceLocation Loc) { 2347 if (!CGF.HaveInsertPoint()) 2348 return; 2349 // __kmpc_taskgroup(ident_t *, gtid); 2350 // TaskgroupOpGen(); 2351 // __kmpc_end_taskgroup(ident_t *, gtid); 2352 // Prepare arguments and build a call to __kmpc_taskgroup 2353 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2354 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2355 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2356 Args, 2357 OMPBuilder.getOrCreateRuntimeFunction( 2358 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2359 Args); 2360 TaskgroupOpGen.setAction(Action); 2361 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2362 } 2363 2364 /// Given an array of pointers to variables, project the address of a 2365 /// given variable. 2366 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2367 unsigned Index, const VarDecl *Var) { 2368 // Pull out the pointer to the variable. 2369 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2370 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2371 2372 Address Addr = Address::deprecated(Ptr, CGF.getContext().getDeclAlign(Var)); 2373 Addr = CGF.Builder.CreateElementBitCast( 2374 Addr, CGF.ConvertTypeForMem(Var->getType())); 2375 return Addr; 2376 } 2377 2378 static llvm::Value *emitCopyprivateCopyFunction( 2379 CodeGenModule &CGM, llvm::Type *ArgsType, 2380 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2381 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2382 SourceLocation Loc) { 2383 ASTContext &C = CGM.getContext(); 2384 // void copy_func(void *LHSArg, void *RHSArg); 2385 FunctionArgList Args; 2386 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2387 ImplicitParamDecl::Other); 2388 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2389 ImplicitParamDecl::Other); 2390 Args.push_back(&LHSArg); 2391 Args.push_back(&RHSArg); 2392 const auto &CGFI = 2393 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2394 std::string Name = 2395 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2396 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2397 llvm::GlobalValue::InternalLinkage, Name, 2398 &CGM.getModule()); 2399 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2400 Fn->setDoesNotRecurse(); 2401 CodeGenFunction CGF(CGM); 2402 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2403 // Dest = (void*[n])(LHSArg); 2404 // Src = (void*[n])(RHSArg); 2405 Address LHS = Address::deprecated( 2406 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2407 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType), 2408 CGF.getPointerAlign()); 2409 Address RHS = Address::deprecated( 2410 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2411 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType), 2412 CGF.getPointerAlign()); 2413 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2414 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2415 // ... 2416 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2417 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2418 const auto *DestVar = 2419 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2420 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2421 2422 const auto *SrcVar = 2423 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2424 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2425 2426 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2427 QualType Type = VD->getType(); 2428 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2429 } 2430 CGF.FinishFunction(); 2431 return Fn; 2432 } 2433 2434 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2435 const RegionCodeGenTy &SingleOpGen, 2436 SourceLocation Loc, 2437 ArrayRef<const Expr *> CopyprivateVars, 2438 ArrayRef<const Expr *> SrcExprs, 2439 ArrayRef<const Expr *> DstExprs, 2440 ArrayRef<const Expr *> AssignmentOps) { 2441 if (!CGF.HaveInsertPoint()) 2442 return; 2443 assert(CopyprivateVars.size() == SrcExprs.size() && 2444 CopyprivateVars.size() == DstExprs.size() && 2445 CopyprivateVars.size() == AssignmentOps.size()); 2446 ASTContext &C = CGM.getContext(); 2447 // int32 did_it = 0; 2448 // if(__kmpc_single(ident_t *, gtid)) { 2449 // SingleOpGen(); 2450 // __kmpc_end_single(ident_t *, gtid); 2451 // did_it = 1; 2452 // } 2453 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2454 // <copy_func>, did_it); 2455 2456 Address DidIt = Address::invalid(); 2457 if (!CopyprivateVars.empty()) { 2458 // int32 did_it = 0; 2459 QualType KmpInt32Ty = 2460 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2461 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2462 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2463 } 2464 // Prepare arguments and build a call to __kmpc_single 2465 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2466 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2467 CGM.getModule(), OMPRTL___kmpc_single), 2468 Args, 2469 OMPBuilder.getOrCreateRuntimeFunction( 2470 CGM.getModule(), OMPRTL___kmpc_end_single), 2471 Args, 2472 /*Conditional=*/true); 2473 SingleOpGen.setAction(Action); 2474 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2475 if (DidIt.isValid()) { 2476 // did_it = 1; 2477 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2478 } 2479 Action.Done(CGF); 2480 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2481 // <copy_func>, did_it); 2482 if (DidIt.isValid()) { 2483 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2484 QualType CopyprivateArrayTy = C.getConstantArrayType( 2485 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2486 /*IndexTypeQuals=*/0); 2487 // Create a list of all private variables for copyprivate. 2488 Address CopyprivateList = 2489 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2490 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2491 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2492 CGF.Builder.CreateStore( 2493 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2494 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2495 CGF.VoidPtrTy), 2496 Elem); 2497 } 2498 // Build function that copies private values from single region to all other 2499 // threads in the corresponding parallel region. 2500 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2501 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2502 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2503 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2504 Address CL = 2505 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2506 CGF.VoidPtrTy); 2507 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2508 llvm::Value *Args[] = { 2509 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2510 getThreadID(CGF, Loc), // i32 <gtid> 2511 BufSize, // size_t <buf_size> 2512 CL.getPointer(), // void *<copyprivate list> 2513 CpyFn, // void (*) (void *, void *) <copy_func> 2514 DidItVal // i32 did_it 2515 }; 2516 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2517 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2518 Args); 2519 } 2520 } 2521 2522 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2523 const RegionCodeGenTy &OrderedOpGen, 2524 SourceLocation Loc, bool IsThreads) { 2525 if (!CGF.HaveInsertPoint()) 2526 return; 2527 // __kmpc_ordered(ident_t *, gtid); 2528 // OrderedOpGen(); 2529 // __kmpc_end_ordered(ident_t *, gtid); 2530 // Prepare arguments and build a call to __kmpc_ordered 2531 if (IsThreads) { 2532 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2533 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2534 CGM.getModule(), OMPRTL___kmpc_ordered), 2535 Args, 2536 OMPBuilder.getOrCreateRuntimeFunction( 2537 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2538 Args); 2539 OrderedOpGen.setAction(Action); 2540 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2541 return; 2542 } 2543 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2544 } 2545 2546 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2547 unsigned Flags; 2548 if (Kind == OMPD_for) 2549 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2550 else if (Kind == OMPD_sections) 2551 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2552 else if (Kind == OMPD_single) 2553 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2554 else if (Kind == OMPD_barrier) 2555 Flags = OMP_IDENT_BARRIER_EXPL; 2556 else 2557 Flags = OMP_IDENT_BARRIER_IMPL; 2558 return Flags; 2559 } 2560 2561 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2562 CodeGenFunction &CGF, const OMPLoopDirective &S, 2563 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2564 // Check if the loop directive is actually a doacross loop directive. In this 2565 // case choose static, 1 schedule. 2566 if (llvm::any_of( 2567 S.getClausesOfKind<OMPOrderedClause>(), 2568 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2569 ScheduleKind = OMPC_SCHEDULE_static; 2570 // Chunk size is 1 in this case. 2571 llvm::APInt ChunkSize(32, 1); 2572 ChunkExpr = IntegerLiteral::Create( 2573 CGF.getContext(), ChunkSize, 2574 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2575 SourceLocation()); 2576 } 2577 } 2578 2579 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2580 OpenMPDirectiveKind Kind, bool EmitChecks, 2581 bool ForceSimpleCall) { 2582 // Check if we should use the OMPBuilder 2583 auto *OMPRegionInfo = 2584 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2585 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2586 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2587 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2588 return; 2589 } 2590 2591 if (!CGF.HaveInsertPoint()) 2592 return; 2593 // Build call __kmpc_cancel_barrier(loc, thread_id); 2594 // Build call __kmpc_barrier(loc, thread_id); 2595 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2596 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2597 // thread_id); 2598 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2599 getThreadID(CGF, Loc)}; 2600 if (OMPRegionInfo) { 2601 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2602 llvm::Value *Result = CGF.EmitRuntimeCall( 2603 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2604 OMPRTL___kmpc_cancel_barrier), 2605 Args); 2606 if (EmitChecks) { 2607 // if (__kmpc_cancel_barrier()) { 2608 // exit from construct; 2609 // } 2610 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2611 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2612 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2613 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2614 CGF.EmitBlock(ExitBB); 2615 // exit from construct; 2616 CodeGenFunction::JumpDest CancelDestination = 2617 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2618 CGF.EmitBranchThroughCleanup(CancelDestination); 2619 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2620 } 2621 return; 2622 } 2623 } 2624 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2625 CGM.getModule(), OMPRTL___kmpc_barrier), 2626 Args); 2627 } 2628 2629 /// Map the OpenMP loop schedule to the runtime enumeration. 2630 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2631 bool Chunked, bool Ordered) { 2632 switch (ScheduleKind) { 2633 case OMPC_SCHEDULE_static: 2634 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2635 : (Ordered ? OMP_ord_static : OMP_sch_static); 2636 case OMPC_SCHEDULE_dynamic: 2637 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2638 case OMPC_SCHEDULE_guided: 2639 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2640 case OMPC_SCHEDULE_runtime: 2641 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2642 case OMPC_SCHEDULE_auto: 2643 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2644 case OMPC_SCHEDULE_unknown: 2645 assert(!Chunked && "chunk was specified but schedule kind not known"); 2646 return Ordered ? OMP_ord_static : OMP_sch_static; 2647 } 2648 llvm_unreachable("Unexpected runtime schedule"); 2649 } 2650 2651 /// Map the OpenMP distribute schedule to the runtime enumeration. 2652 static OpenMPSchedType 2653 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2654 // only static is allowed for dist_schedule 2655 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2656 } 2657 2658 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2659 bool Chunked) const { 2660 OpenMPSchedType Schedule = 2661 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2662 return Schedule == OMP_sch_static; 2663 } 2664 2665 bool CGOpenMPRuntime::isStaticNonchunked( 2666 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2667 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2668 return Schedule == OMP_dist_sch_static; 2669 } 2670 2671 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2672 bool Chunked) const { 2673 OpenMPSchedType Schedule = 2674 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2675 return Schedule == OMP_sch_static_chunked; 2676 } 2677 2678 bool CGOpenMPRuntime::isStaticChunked( 2679 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2680 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2681 return Schedule == OMP_dist_sch_static_chunked; 2682 } 2683 2684 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2685 OpenMPSchedType Schedule = 2686 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2687 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2688 return Schedule != OMP_sch_static; 2689 } 2690 2691 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2692 OpenMPScheduleClauseModifier M1, 2693 OpenMPScheduleClauseModifier M2) { 2694 int Modifier = 0; 2695 switch (M1) { 2696 case OMPC_SCHEDULE_MODIFIER_monotonic: 2697 Modifier = OMP_sch_modifier_monotonic; 2698 break; 2699 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2700 Modifier = OMP_sch_modifier_nonmonotonic; 2701 break; 2702 case OMPC_SCHEDULE_MODIFIER_simd: 2703 if (Schedule == OMP_sch_static_chunked) 2704 Schedule = OMP_sch_static_balanced_chunked; 2705 break; 2706 case OMPC_SCHEDULE_MODIFIER_last: 2707 case OMPC_SCHEDULE_MODIFIER_unknown: 2708 break; 2709 } 2710 switch (M2) { 2711 case OMPC_SCHEDULE_MODIFIER_monotonic: 2712 Modifier = OMP_sch_modifier_monotonic; 2713 break; 2714 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2715 Modifier = OMP_sch_modifier_nonmonotonic; 2716 break; 2717 case OMPC_SCHEDULE_MODIFIER_simd: 2718 if (Schedule == OMP_sch_static_chunked) 2719 Schedule = OMP_sch_static_balanced_chunked; 2720 break; 2721 case OMPC_SCHEDULE_MODIFIER_last: 2722 case OMPC_SCHEDULE_MODIFIER_unknown: 2723 break; 2724 } 2725 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2726 // If the static schedule kind is specified or if the ordered clause is 2727 // specified, and if the nonmonotonic modifier is not specified, the effect is 2728 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2729 // modifier is specified, the effect is as if the nonmonotonic modifier is 2730 // specified. 2731 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2732 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2733 Schedule == OMP_sch_static_balanced_chunked || 2734 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2735 Schedule == OMP_dist_sch_static_chunked || 2736 Schedule == OMP_dist_sch_static)) 2737 Modifier = OMP_sch_modifier_nonmonotonic; 2738 } 2739 return Schedule | Modifier; 2740 } 2741 2742 void CGOpenMPRuntime::emitForDispatchInit( 2743 CodeGenFunction &CGF, SourceLocation Loc, 2744 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2745 bool Ordered, const DispatchRTInput &DispatchValues) { 2746 if (!CGF.HaveInsertPoint()) 2747 return; 2748 OpenMPSchedType Schedule = getRuntimeSchedule( 2749 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2750 assert(Ordered || 2751 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2752 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2753 Schedule != OMP_sch_static_balanced_chunked)); 2754 // Call __kmpc_dispatch_init( 2755 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2756 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2757 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2758 2759 // If the Chunk was not specified in the clause - use default value 1. 2760 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2761 : CGF.Builder.getIntN(IVSize, 1); 2762 llvm::Value *Args[] = { 2763 emitUpdateLocation(CGF, Loc), 2764 getThreadID(CGF, Loc), 2765 CGF.Builder.getInt32(addMonoNonMonoModifier( 2766 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2767 DispatchValues.LB, // Lower 2768 DispatchValues.UB, // Upper 2769 CGF.Builder.getIntN(IVSize, 1), // Stride 2770 Chunk // Chunk 2771 }; 2772 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2773 } 2774 2775 static void emitForStaticInitCall( 2776 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2777 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2778 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2779 const CGOpenMPRuntime::StaticRTInput &Values) { 2780 if (!CGF.HaveInsertPoint()) 2781 return; 2782 2783 assert(!Values.Ordered); 2784 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2785 Schedule == OMP_sch_static_balanced_chunked || 2786 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2787 Schedule == OMP_dist_sch_static || 2788 Schedule == OMP_dist_sch_static_chunked); 2789 2790 // Call __kmpc_for_static_init( 2791 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2792 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2793 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2794 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2795 llvm::Value *Chunk = Values.Chunk; 2796 if (Chunk == nullptr) { 2797 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2798 Schedule == OMP_dist_sch_static) && 2799 "expected static non-chunked schedule"); 2800 // If the Chunk was not specified in the clause - use default value 1. 2801 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2802 } else { 2803 assert((Schedule == OMP_sch_static_chunked || 2804 Schedule == OMP_sch_static_balanced_chunked || 2805 Schedule == OMP_ord_static_chunked || 2806 Schedule == OMP_dist_sch_static_chunked) && 2807 "expected static chunked schedule"); 2808 } 2809 llvm::Value *Args[] = { 2810 UpdateLocation, 2811 ThreadId, 2812 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2813 M2)), // Schedule type 2814 Values.IL.getPointer(), // &isLastIter 2815 Values.LB.getPointer(), // &LB 2816 Values.UB.getPointer(), // &UB 2817 Values.ST.getPointer(), // &Stride 2818 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2819 Chunk // Chunk 2820 }; 2821 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2822 } 2823 2824 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2825 SourceLocation Loc, 2826 OpenMPDirectiveKind DKind, 2827 const OpenMPScheduleTy &ScheduleKind, 2828 const StaticRTInput &Values) { 2829 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2830 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2831 assert(isOpenMPWorksharingDirective(DKind) && 2832 "Expected loop-based or sections-based directive."); 2833 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2834 isOpenMPLoopDirective(DKind) 2835 ? OMP_IDENT_WORK_LOOP 2836 : OMP_IDENT_WORK_SECTIONS); 2837 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2838 llvm::FunctionCallee StaticInitFunction = 2839 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2840 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2841 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2842 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2843 } 2844 2845 void CGOpenMPRuntime::emitDistributeStaticInit( 2846 CodeGenFunction &CGF, SourceLocation Loc, 2847 OpenMPDistScheduleClauseKind SchedKind, 2848 const CGOpenMPRuntime::StaticRTInput &Values) { 2849 OpenMPSchedType ScheduleNum = 2850 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2851 llvm::Value *UpdatedLocation = 2852 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2853 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2854 llvm::FunctionCallee StaticInitFunction; 2855 bool isGPUDistribute = 2856 CGM.getLangOpts().OpenMPIsDevice && 2857 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2858 StaticInitFunction = createForStaticInitFunction( 2859 Values.IVSize, Values.IVSigned, isGPUDistribute); 2860 2861 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2862 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2863 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2864 } 2865 2866 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2867 SourceLocation Loc, 2868 OpenMPDirectiveKind DKind) { 2869 if (!CGF.HaveInsertPoint()) 2870 return; 2871 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2872 llvm::Value *Args[] = { 2873 emitUpdateLocation(CGF, Loc, 2874 isOpenMPDistributeDirective(DKind) 2875 ? OMP_IDENT_WORK_DISTRIBUTE 2876 : isOpenMPLoopDirective(DKind) 2877 ? OMP_IDENT_WORK_LOOP 2878 : OMP_IDENT_WORK_SECTIONS), 2879 getThreadID(CGF, Loc)}; 2880 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2881 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2882 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2883 CGF.EmitRuntimeCall( 2884 OMPBuilder.getOrCreateRuntimeFunction( 2885 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2886 Args); 2887 else 2888 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2889 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2890 Args); 2891 } 2892 2893 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2894 SourceLocation Loc, 2895 unsigned IVSize, 2896 bool IVSigned) { 2897 if (!CGF.HaveInsertPoint()) 2898 return; 2899 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2900 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2901 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2902 } 2903 2904 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2905 SourceLocation Loc, unsigned IVSize, 2906 bool IVSigned, Address IL, 2907 Address LB, Address UB, 2908 Address ST) { 2909 // Call __kmpc_dispatch_next( 2910 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2911 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2912 // kmp_int[32|64] *p_stride); 2913 llvm::Value *Args[] = { 2914 emitUpdateLocation(CGF, Loc), 2915 getThreadID(CGF, Loc), 2916 IL.getPointer(), // &isLastIter 2917 LB.getPointer(), // &Lower 2918 UB.getPointer(), // &Upper 2919 ST.getPointer() // &Stride 2920 }; 2921 llvm::Value *Call = 2922 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2923 return CGF.EmitScalarConversion( 2924 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2925 CGF.getContext().BoolTy, Loc); 2926 } 2927 2928 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2929 llvm::Value *NumThreads, 2930 SourceLocation Loc) { 2931 if (!CGF.HaveInsertPoint()) 2932 return; 2933 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2934 llvm::Value *Args[] = { 2935 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2936 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2937 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2938 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2939 Args); 2940 } 2941 2942 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2943 ProcBindKind ProcBind, 2944 SourceLocation Loc) { 2945 if (!CGF.HaveInsertPoint()) 2946 return; 2947 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2948 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2949 llvm::Value *Args[] = { 2950 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2951 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2952 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2953 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2954 Args); 2955 } 2956 2957 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2958 SourceLocation Loc, llvm::AtomicOrdering AO) { 2959 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2960 OMPBuilder.createFlush(CGF.Builder); 2961 } else { 2962 if (!CGF.HaveInsertPoint()) 2963 return; 2964 // Build call void __kmpc_flush(ident_t *loc) 2965 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2966 CGM.getModule(), OMPRTL___kmpc_flush), 2967 emitUpdateLocation(CGF, Loc)); 2968 } 2969 } 2970 2971 namespace { 2972 /// Indexes of fields for type kmp_task_t. 2973 enum KmpTaskTFields { 2974 /// List of shared variables. 2975 KmpTaskTShareds, 2976 /// Task routine. 2977 KmpTaskTRoutine, 2978 /// Partition id for the untied tasks. 2979 KmpTaskTPartId, 2980 /// Function with call of destructors for private variables. 2981 Data1, 2982 /// Task priority. 2983 Data2, 2984 /// (Taskloops only) Lower bound. 2985 KmpTaskTLowerBound, 2986 /// (Taskloops only) Upper bound. 2987 KmpTaskTUpperBound, 2988 /// (Taskloops only) Stride. 2989 KmpTaskTStride, 2990 /// (Taskloops only) Is last iteration flag. 2991 KmpTaskTLastIter, 2992 /// (Taskloops only) Reduction data. 2993 KmpTaskTReductions, 2994 }; 2995 } // anonymous namespace 2996 2997 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2998 return OffloadEntriesTargetRegion.empty() && 2999 OffloadEntriesDeviceGlobalVar.empty(); 3000 } 3001 3002 /// Initialize target region entry. 3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3004 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3005 StringRef ParentName, unsigned LineNum, 3006 unsigned Order) { 3007 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3008 "only required for the device " 3009 "code generation."); 3010 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3011 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3012 OMPTargetRegionEntryTargetRegion); 3013 ++OffloadingEntriesNum; 3014 } 3015 3016 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3017 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3018 StringRef ParentName, unsigned LineNum, 3019 llvm::Constant *Addr, llvm::Constant *ID, 3020 OMPTargetRegionEntryKind Flags) { 3021 // If we are emitting code for a target, the entry is already initialized, 3022 // only has to be registered. 3023 if (CGM.getLangOpts().OpenMPIsDevice) { 3024 // This could happen if the device compilation is invoked standalone. 3025 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3026 return; 3027 auto &Entry = 3028 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3029 Entry.setAddress(Addr); 3030 Entry.setID(ID); 3031 Entry.setFlags(Flags); 3032 } else { 3033 if (Flags == 3034 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3035 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3036 /*IgnoreAddressId*/ true)) 3037 return; 3038 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3039 "Target region entry already registered!"); 3040 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3041 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3042 ++OffloadingEntriesNum; 3043 } 3044 } 3045 3046 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3047 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3048 bool IgnoreAddressId) const { 3049 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3050 if (PerDevice == OffloadEntriesTargetRegion.end()) 3051 return false; 3052 auto PerFile = PerDevice->second.find(FileID); 3053 if (PerFile == PerDevice->second.end()) 3054 return false; 3055 auto PerParentName = PerFile->second.find(ParentName); 3056 if (PerParentName == PerFile->second.end()) 3057 return false; 3058 auto PerLine = PerParentName->second.find(LineNum); 3059 if (PerLine == PerParentName->second.end()) 3060 return false; 3061 // Fail if this entry is already registered. 3062 if (!IgnoreAddressId && 3063 (PerLine->second.getAddress() || PerLine->second.getID())) 3064 return false; 3065 return true; 3066 } 3067 3068 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3069 const OffloadTargetRegionEntryInfoActTy &Action) { 3070 // Scan all target region entries and perform the provided action. 3071 for (const auto &D : OffloadEntriesTargetRegion) 3072 for (const auto &F : D.second) 3073 for (const auto &P : F.second) 3074 for (const auto &L : P.second) 3075 Action(D.first, F.first, P.first(), L.first, L.second); 3076 } 3077 3078 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3079 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3080 OMPTargetGlobalVarEntryKind Flags, 3081 unsigned Order) { 3082 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3083 "only required for the device " 3084 "code generation."); 3085 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3086 ++OffloadingEntriesNum; 3087 } 3088 3089 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3090 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3091 CharUnits VarSize, 3092 OMPTargetGlobalVarEntryKind Flags, 3093 llvm::GlobalValue::LinkageTypes Linkage) { 3094 if (CGM.getLangOpts().OpenMPIsDevice) { 3095 // This could happen if the device compilation is invoked standalone. 3096 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3097 return; 3098 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3099 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3100 if (Entry.getVarSize().isZero()) { 3101 Entry.setVarSize(VarSize); 3102 Entry.setLinkage(Linkage); 3103 } 3104 return; 3105 } 3106 Entry.setVarSize(VarSize); 3107 Entry.setLinkage(Linkage); 3108 Entry.setAddress(Addr); 3109 } else { 3110 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3111 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3112 assert(Entry.isValid() && Entry.getFlags() == Flags && 3113 "Entry not initialized!"); 3114 if (Entry.getVarSize().isZero()) { 3115 Entry.setVarSize(VarSize); 3116 Entry.setLinkage(Linkage); 3117 } 3118 return; 3119 } 3120 OffloadEntriesDeviceGlobalVar.try_emplace( 3121 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3122 ++OffloadingEntriesNum; 3123 } 3124 } 3125 3126 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3127 actOnDeviceGlobalVarEntriesInfo( 3128 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3129 // Scan all target region entries and perform the provided action. 3130 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3131 Action(E.getKey(), E.getValue()); 3132 } 3133 3134 void CGOpenMPRuntime::createOffloadEntry( 3135 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3136 llvm::GlobalValue::LinkageTypes Linkage) { 3137 StringRef Name = Addr->getName(); 3138 llvm::Module &M = CGM.getModule(); 3139 llvm::LLVMContext &C = M.getContext(); 3140 3141 // Create constant string with the name. 3142 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3143 3144 std::string StringName = getName({"omp_offloading", "entry_name"}); 3145 auto *Str = new llvm::GlobalVariable( 3146 M, StrPtrInit->getType(), /*isConstant=*/true, 3147 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3148 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3149 3150 llvm::Constant *Data[] = { 3151 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3152 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3153 llvm::ConstantInt::get(CGM.SizeTy, Size), 3154 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3155 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3156 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3157 llvm::GlobalVariable *Entry = createGlobalStruct( 3158 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3159 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3160 3161 // The entry has to be created in the section the linker expects it to be. 3162 Entry->setSection("omp_offloading_entries"); 3163 } 3164 3165 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3166 // Emit the offloading entries and metadata so that the device codegen side 3167 // can easily figure out what to emit. The produced metadata looks like 3168 // this: 3169 // 3170 // !omp_offload.info = !{!1, ...} 3171 // 3172 // Right now we only generate metadata for function that contain target 3173 // regions. 3174 3175 // If we are in simd mode or there are no entries, we don't need to do 3176 // anything. 3177 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3178 return; 3179 3180 llvm::Module &M = CGM.getModule(); 3181 llvm::LLVMContext &C = M.getContext(); 3182 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3183 SourceLocation, StringRef>, 3184 16> 3185 OrderedEntries(OffloadEntriesInfoManager.size()); 3186 llvm::SmallVector<StringRef, 16> ParentFunctions( 3187 OffloadEntriesInfoManager.size()); 3188 3189 // Auxiliary methods to create metadata values and strings. 3190 auto &&GetMDInt = [this](unsigned V) { 3191 return llvm::ConstantAsMetadata::get( 3192 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3193 }; 3194 3195 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3196 3197 // Create the offloading info metadata node. 3198 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3199 3200 // Create function that emits metadata for each target region entry; 3201 auto &&TargetRegionMetadataEmitter = 3202 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3203 &GetMDString]( 3204 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3205 unsigned Line, 3206 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3207 // Generate metadata for target regions. Each entry of this metadata 3208 // contains: 3209 // - Entry 0 -> Kind of this type of metadata (0). 3210 // - Entry 1 -> Device ID of the file where the entry was identified. 3211 // - Entry 2 -> File ID of the file where the entry was identified. 3212 // - Entry 3 -> Mangled name of the function where the entry was 3213 // identified. 3214 // - Entry 4 -> Line in the file where the entry was identified. 3215 // - Entry 5 -> Order the entry was created. 3216 // The first element of the metadata node is the kind. 3217 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3218 GetMDInt(FileID), GetMDString(ParentName), 3219 GetMDInt(Line), GetMDInt(E.getOrder())}; 3220 3221 SourceLocation Loc; 3222 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3223 E = CGM.getContext().getSourceManager().fileinfo_end(); 3224 I != E; ++I) { 3225 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3226 I->getFirst()->getUniqueID().getFile() == FileID) { 3227 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3228 I->getFirst(), Line, 1); 3229 break; 3230 } 3231 } 3232 // Save this entry in the right position of the ordered entries array. 3233 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3234 ParentFunctions[E.getOrder()] = ParentName; 3235 3236 // Add metadata to the named metadata node. 3237 MD->addOperand(llvm::MDNode::get(C, Ops)); 3238 }; 3239 3240 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3241 TargetRegionMetadataEmitter); 3242 3243 // Create function that emits metadata for each device global variable entry; 3244 auto &&DeviceGlobalVarMetadataEmitter = 3245 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3246 MD](StringRef MangledName, 3247 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3248 &E) { 3249 // Generate metadata for global variables. Each entry of this metadata 3250 // contains: 3251 // - Entry 0 -> Kind of this type of metadata (1). 3252 // - Entry 1 -> Mangled name of the variable. 3253 // - Entry 2 -> Declare target kind. 3254 // - Entry 3 -> Order the entry was created. 3255 // The first element of the metadata node is the kind. 3256 llvm::Metadata *Ops[] = { 3257 GetMDInt(E.getKind()), GetMDString(MangledName), 3258 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3259 3260 // Save this entry in the right position of the ordered entries array. 3261 OrderedEntries[E.getOrder()] = 3262 std::make_tuple(&E, SourceLocation(), MangledName); 3263 3264 // Add metadata to the named metadata node. 3265 MD->addOperand(llvm::MDNode::get(C, Ops)); 3266 }; 3267 3268 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3269 DeviceGlobalVarMetadataEmitter); 3270 3271 for (const auto &E : OrderedEntries) { 3272 assert(std::get<0>(E) && "All ordered entries must exist!"); 3273 if (const auto *CE = 3274 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3275 std::get<0>(E))) { 3276 if (!CE->getID() || !CE->getAddress()) { 3277 // Do not blame the entry if the parent funtion is not emitted. 3278 StringRef FnName = ParentFunctions[CE->getOrder()]; 3279 if (!CGM.GetGlobalValue(FnName)) 3280 continue; 3281 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3282 DiagnosticsEngine::Error, 3283 "Offloading entry for target region in %0 is incorrect: either the " 3284 "address or the ID is invalid."); 3285 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3286 continue; 3287 } 3288 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3289 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3290 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3291 OffloadEntryInfoDeviceGlobalVar>( 3292 std::get<0>(E))) { 3293 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3294 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3295 CE->getFlags()); 3296 switch (Flags) { 3297 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3298 if (CGM.getLangOpts().OpenMPIsDevice && 3299 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3300 continue; 3301 if (!CE->getAddress()) { 3302 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3303 DiagnosticsEngine::Error, "Offloading entry for declare target " 3304 "variable %0 is incorrect: the " 3305 "address is invalid."); 3306 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3307 continue; 3308 } 3309 // The vaiable has no definition - no need to add the entry. 3310 if (CE->getVarSize().isZero()) 3311 continue; 3312 break; 3313 } 3314 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3315 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3316 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3317 "Declaret target link address is set."); 3318 if (CGM.getLangOpts().OpenMPIsDevice) 3319 continue; 3320 if (!CE->getAddress()) { 3321 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3322 DiagnosticsEngine::Error, 3323 "Offloading entry for declare target variable is incorrect: the " 3324 "address is invalid."); 3325 CGM.getDiags().Report(DiagID); 3326 continue; 3327 } 3328 break; 3329 } 3330 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3331 CE->getVarSize().getQuantity(), Flags, 3332 CE->getLinkage()); 3333 } else { 3334 llvm_unreachable("Unsupported entry kind."); 3335 } 3336 } 3337 } 3338 3339 /// Loads all the offload entries information from the host IR 3340 /// metadata. 3341 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3342 // If we are in target mode, load the metadata from the host IR. This code has 3343 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3344 3345 if (!CGM.getLangOpts().OpenMPIsDevice) 3346 return; 3347 3348 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3349 return; 3350 3351 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3352 if (auto EC = Buf.getError()) { 3353 CGM.getDiags().Report(diag::err_cannot_open_file) 3354 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3355 return; 3356 } 3357 3358 llvm::LLVMContext C; 3359 auto ME = expectedToErrorOrAndEmitErrors( 3360 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3361 3362 if (auto EC = ME.getError()) { 3363 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3364 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3365 CGM.getDiags().Report(DiagID) 3366 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3367 return; 3368 } 3369 3370 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3371 if (!MD) 3372 return; 3373 3374 for (llvm::MDNode *MN : MD->operands()) { 3375 auto &&GetMDInt = [MN](unsigned Idx) { 3376 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3377 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3378 }; 3379 3380 auto &&GetMDString = [MN](unsigned Idx) { 3381 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3382 return V->getString(); 3383 }; 3384 3385 switch (GetMDInt(0)) { 3386 default: 3387 llvm_unreachable("Unexpected metadata!"); 3388 break; 3389 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3390 OffloadingEntryInfoTargetRegion: 3391 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3392 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3393 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3394 /*Order=*/GetMDInt(5)); 3395 break; 3396 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3397 OffloadingEntryInfoDeviceGlobalVar: 3398 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3399 /*MangledName=*/GetMDString(1), 3400 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3401 /*Flags=*/GetMDInt(2)), 3402 /*Order=*/GetMDInt(3)); 3403 break; 3404 } 3405 } 3406 } 3407 3408 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3409 if (!KmpRoutineEntryPtrTy) { 3410 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3411 ASTContext &C = CGM.getContext(); 3412 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3413 FunctionProtoType::ExtProtoInfo EPI; 3414 KmpRoutineEntryPtrQTy = C.getPointerType( 3415 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3416 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3417 } 3418 } 3419 3420 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3421 // Make sure the type of the entry is already created. This is the type we 3422 // have to create: 3423 // struct __tgt_offload_entry{ 3424 // void *addr; // Pointer to the offload entry info. 3425 // // (function or global) 3426 // char *name; // Name of the function or global. 3427 // size_t size; // Size of the entry info (0 if it a function). 3428 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3429 // int32_t reserved; // Reserved, to use by the runtime library. 3430 // }; 3431 if (TgtOffloadEntryQTy.isNull()) { 3432 ASTContext &C = CGM.getContext(); 3433 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3434 RD->startDefinition(); 3435 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3436 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3437 addFieldToRecordDecl(C, RD, C.getSizeType()); 3438 addFieldToRecordDecl( 3439 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3440 addFieldToRecordDecl( 3441 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3442 RD->completeDefinition(); 3443 RD->addAttr(PackedAttr::CreateImplicit(C)); 3444 TgtOffloadEntryQTy = C.getRecordType(RD); 3445 } 3446 return TgtOffloadEntryQTy; 3447 } 3448 3449 namespace { 3450 struct PrivateHelpersTy { 3451 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3452 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3453 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3454 PrivateElemInit(PrivateElemInit) {} 3455 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3456 const Expr *OriginalRef = nullptr; 3457 const VarDecl *Original = nullptr; 3458 const VarDecl *PrivateCopy = nullptr; 3459 const VarDecl *PrivateElemInit = nullptr; 3460 bool isLocalPrivate() const { 3461 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3462 } 3463 }; 3464 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3465 } // anonymous namespace 3466 3467 static bool isAllocatableDecl(const VarDecl *VD) { 3468 const VarDecl *CVD = VD->getCanonicalDecl(); 3469 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3470 return false; 3471 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3472 // Use the default allocation. 3473 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 3474 !AA->getAllocator()); 3475 } 3476 3477 static RecordDecl * 3478 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3479 if (!Privates.empty()) { 3480 ASTContext &C = CGM.getContext(); 3481 // Build struct .kmp_privates_t. { 3482 // /* private vars */ 3483 // }; 3484 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3485 RD->startDefinition(); 3486 for (const auto &Pair : Privates) { 3487 const VarDecl *VD = Pair.second.Original; 3488 QualType Type = VD->getType().getNonReferenceType(); 3489 // If the private variable is a local variable with lvalue ref type, 3490 // allocate the pointer instead of the pointee type. 3491 if (Pair.second.isLocalPrivate()) { 3492 if (VD->getType()->isLValueReferenceType()) 3493 Type = C.getPointerType(Type); 3494 if (isAllocatableDecl(VD)) 3495 Type = C.getPointerType(Type); 3496 } 3497 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3498 if (VD->hasAttrs()) { 3499 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3500 E(VD->getAttrs().end()); 3501 I != E; ++I) 3502 FD->addAttr(*I); 3503 } 3504 } 3505 RD->completeDefinition(); 3506 return RD; 3507 } 3508 return nullptr; 3509 } 3510 3511 static RecordDecl * 3512 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3513 QualType KmpInt32Ty, 3514 QualType KmpRoutineEntryPointerQTy) { 3515 ASTContext &C = CGM.getContext(); 3516 // Build struct kmp_task_t { 3517 // void * shareds; 3518 // kmp_routine_entry_t routine; 3519 // kmp_int32 part_id; 3520 // kmp_cmplrdata_t data1; 3521 // kmp_cmplrdata_t data2; 3522 // For taskloops additional fields: 3523 // kmp_uint64 lb; 3524 // kmp_uint64 ub; 3525 // kmp_int64 st; 3526 // kmp_int32 liter; 3527 // void * reductions; 3528 // }; 3529 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3530 UD->startDefinition(); 3531 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3532 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3533 UD->completeDefinition(); 3534 QualType KmpCmplrdataTy = C.getRecordType(UD); 3535 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3536 RD->startDefinition(); 3537 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3538 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3539 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3540 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3541 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3542 if (isOpenMPTaskLoopDirective(Kind)) { 3543 QualType KmpUInt64Ty = 3544 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3545 QualType KmpInt64Ty = 3546 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3547 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3548 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3549 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3550 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3551 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3552 } 3553 RD->completeDefinition(); 3554 return RD; 3555 } 3556 3557 static RecordDecl * 3558 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3559 ArrayRef<PrivateDataTy> Privates) { 3560 ASTContext &C = CGM.getContext(); 3561 // Build struct kmp_task_t_with_privates { 3562 // kmp_task_t task_data; 3563 // .kmp_privates_t. privates; 3564 // }; 3565 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3566 RD->startDefinition(); 3567 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3568 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3569 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3570 RD->completeDefinition(); 3571 return RD; 3572 } 3573 3574 /// Emit a proxy function which accepts kmp_task_t as the second 3575 /// argument. 3576 /// \code 3577 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3578 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3579 /// For taskloops: 3580 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3581 /// tt->reductions, tt->shareds); 3582 /// return 0; 3583 /// } 3584 /// \endcode 3585 static llvm::Function * 3586 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3587 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3588 QualType KmpTaskTWithPrivatesPtrQTy, 3589 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3590 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3591 llvm::Value *TaskPrivatesMap) { 3592 ASTContext &C = CGM.getContext(); 3593 FunctionArgList Args; 3594 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3595 ImplicitParamDecl::Other); 3596 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3597 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3598 ImplicitParamDecl::Other); 3599 Args.push_back(&GtidArg); 3600 Args.push_back(&TaskTypeArg); 3601 const auto &TaskEntryFnInfo = 3602 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3603 llvm::FunctionType *TaskEntryTy = 3604 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3605 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3606 auto *TaskEntry = llvm::Function::Create( 3607 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3608 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3609 TaskEntry->setDoesNotRecurse(); 3610 CodeGenFunction CGF(CGM); 3611 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3612 Loc, Loc); 3613 3614 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3615 // tt, 3616 // For taskloops: 3617 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3618 // tt->task_data.shareds); 3619 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3620 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3621 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3622 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3623 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3624 const auto *KmpTaskTWithPrivatesQTyRD = 3625 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3626 LValue Base = 3627 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3628 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3629 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3630 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3631 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3632 3633 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3634 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3635 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3636 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3637 CGF.ConvertTypeForMem(SharedsPtrTy)); 3638 3639 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3640 llvm::Value *PrivatesParam; 3641 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3642 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3643 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3644 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3645 } else { 3646 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3647 } 3648 3649 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3650 TaskPrivatesMap, 3651 CGF.Builder 3652 .CreatePointerBitCastOrAddrSpaceCast( 3653 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3654 .getPointer()}; 3655 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3656 std::end(CommonArgs)); 3657 if (isOpenMPTaskLoopDirective(Kind)) { 3658 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3659 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3660 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3661 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3662 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3663 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3664 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3665 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3666 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3667 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3668 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3669 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3670 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3671 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3672 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3673 CallArgs.push_back(LBParam); 3674 CallArgs.push_back(UBParam); 3675 CallArgs.push_back(StParam); 3676 CallArgs.push_back(LIParam); 3677 CallArgs.push_back(RParam); 3678 } 3679 CallArgs.push_back(SharedsParam); 3680 3681 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3682 CallArgs); 3683 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3684 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3685 CGF.FinishFunction(); 3686 return TaskEntry; 3687 } 3688 3689 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3690 SourceLocation Loc, 3691 QualType KmpInt32Ty, 3692 QualType KmpTaskTWithPrivatesPtrQTy, 3693 QualType KmpTaskTWithPrivatesQTy) { 3694 ASTContext &C = CGM.getContext(); 3695 FunctionArgList Args; 3696 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3697 ImplicitParamDecl::Other); 3698 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3699 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3700 ImplicitParamDecl::Other); 3701 Args.push_back(&GtidArg); 3702 Args.push_back(&TaskTypeArg); 3703 const auto &DestructorFnInfo = 3704 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3705 llvm::FunctionType *DestructorFnTy = 3706 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3707 std::string Name = 3708 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3709 auto *DestructorFn = 3710 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3711 Name, &CGM.getModule()); 3712 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3713 DestructorFnInfo); 3714 DestructorFn->setDoesNotRecurse(); 3715 CodeGenFunction CGF(CGM); 3716 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3717 Args, Loc, Loc); 3718 3719 LValue Base = CGF.EmitLoadOfPointerLValue( 3720 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3721 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3722 const auto *KmpTaskTWithPrivatesQTyRD = 3723 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3724 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3725 Base = CGF.EmitLValueForField(Base, *FI); 3726 for (const auto *Field : 3727 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3728 if (QualType::DestructionKind DtorKind = 3729 Field->getType().isDestructedType()) { 3730 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3731 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3732 } 3733 } 3734 CGF.FinishFunction(); 3735 return DestructorFn; 3736 } 3737 3738 /// Emit a privates mapping function for correct handling of private and 3739 /// firstprivate variables. 3740 /// \code 3741 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3742 /// **noalias priv1,..., <tyn> **noalias privn) { 3743 /// *priv1 = &.privates.priv1; 3744 /// ...; 3745 /// *privn = &.privates.privn; 3746 /// } 3747 /// \endcode 3748 static llvm::Value * 3749 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3750 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3751 ArrayRef<PrivateDataTy> Privates) { 3752 ASTContext &C = CGM.getContext(); 3753 FunctionArgList Args; 3754 ImplicitParamDecl TaskPrivatesArg( 3755 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3756 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3757 ImplicitParamDecl::Other); 3758 Args.push_back(&TaskPrivatesArg); 3759 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3760 unsigned Counter = 1; 3761 for (const Expr *E : Data.PrivateVars) { 3762 Args.push_back(ImplicitParamDecl::Create( 3763 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3764 C.getPointerType(C.getPointerType(E->getType())) 3765 .withConst() 3766 .withRestrict(), 3767 ImplicitParamDecl::Other)); 3768 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3769 PrivateVarsPos[VD] = Counter; 3770 ++Counter; 3771 } 3772 for (const Expr *E : Data.FirstprivateVars) { 3773 Args.push_back(ImplicitParamDecl::Create( 3774 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3775 C.getPointerType(C.getPointerType(E->getType())) 3776 .withConst() 3777 .withRestrict(), 3778 ImplicitParamDecl::Other)); 3779 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3780 PrivateVarsPos[VD] = Counter; 3781 ++Counter; 3782 } 3783 for (const Expr *E : Data.LastprivateVars) { 3784 Args.push_back(ImplicitParamDecl::Create( 3785 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3786 C.getPointerType(C.getPointerType(E->getType())) 3787 .withConst() 3788 .withRestrict(), 3789 ImplicitParamDecl::Other)); 3790 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3791 PrivateVarsPos[VD] = Counter; 3792 ++Counter; 3793 } 3794 for (const VarDecl *VD : Data.PrivateLocals) { 3795 QualType Ty = VD->getType().getNonReferenceType(); 3796 if (VD->getType()->isLValueReferenceType()) 3797 Ty = C.getPointerType(Ty); 3798 if (isAllocatableDecl(VD)) 3799 Ty = C.getPointerType(Ty); 3800 Args.push_back(ImplicitParamDecl::Create( 3801 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3802 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3803 ImplicitParamDecl::Other)); 3804 PrivateVarsPos[VD] = Counter; 3805 ++Counter; 3806 } 3807 const auto &TaskPrivatesMapFnInfo = 3808 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3809 llvm::FunctionType *TaskPrivatesMapTy = 3810 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3811 std::string Name = 3812 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3813 auto *TaskPrivatesMap = llvm::Function::Create( 3814 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3815 &CGM.getModule()); 3816 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3817 TaskPrivatesMapFnInfo); 3818 if (CGM.getLangOpts().Optimize) { 3819 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3820 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3821 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3822 } 3823 CodeGenFunction CGF(CGM); 3824 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3825 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3826 3827 // *privi = &.privates.privi; 3828 LValue Base = CGF.EmitLoadOfPointerLValue( 3829 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3830 TaskPrivatesArg.getType()->castAs<PointerType>()); 3831 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3832 Counter = 0; 3833 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3834 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3835 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3836 LValue RefLVal = 3837 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3838 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3839 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3840 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3841 ++Counter; 3842 } 3843 CGF.FinishFunction(); 3844 return TaskPrivatesMap; 3845 } 3846 3847 /// Emit initialization for private variables in task-based directives. 3848 static void emitPrivatesInit(CodeGenFunction &CGF, 3849 const OMPExecutableDirective &D, 3850 Address KmpTaskSharedsPtr, LValue TDBase, 3851 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3852 QualType SharedsTy, QualType SharedsPtrTy, 3853 const OMPTaskDataTy &Data, 3854 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3855 ASTContext &C = CGF.getContext(); 3856 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3857 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3858 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3859 ? OMPD_taskloop 3860 : OMPD_task; 3861 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3862 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3863 LValue SrcBase; 3864 bool IsTargetTask = 3865 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3866 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3867 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3868 // PointersArray, SizesArray, and MappersArray. The original variables for 3869 // these arrays are not captured and we get their addresses explicitly. 3870 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3871 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3872 SrcBase = CGF.MakeAddrLValue( 3873 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3874 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3875 SharedsTy); 3876 } 3877 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3878 for (const PrivateDataTy &Pair : Privates) { 3879 // Do not initialize private locals. 3880 if (Pair.second.isLocalPrivate()) { 3881 ++FI; 3882 continue; 3883 } 3884 const VarDecl *VD = Pair.second.PrivateCopy; 3885 const Expr *Init = VD->getAnyInitializer(); 3886 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3887 !CGF.isTrivialInitializer(Init)))) { 3888 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3889 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3890 const VarDecl *OriginalVD = Pair.second.Original; 3891 // Check if the variable is the target-based BasePointersArray, 3892 // PointersArray, SizesArray, or MappersArray. 3893 LValue SharedRefLValue; 3894 QualType Type = PrivateLValue.getType(); 3895 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3896 if (IsTargetTask && !SharedField) { 3897 assert(isa<ImplicitParamDecl>(OriginalVD) && 3898 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3899 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3900 ->getNumParams() == 0 && 3901 isa<TranslationUnitDecl>( 3902 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3903 ->getDeclContext()) && 3904 "Expected artificial target data variable."); 3905 SharedRefLValue = 3906 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3907 } else if (ForDup) { 3908 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3909 SharedRefLValue = CGF.MakeAddrLValue( 3910 SharedRefLValue.getAddress(CGF).withAlignment( 3911 C.getDeclAlign(OriginalVD)), 3912 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3913 SharedRefLValue.getTBAAInfo()); 3914 } else if (CGF.LambdaCaptureFields.count( 3915 Pair.second.Original->getCanonicalDecl()) > 0 || 3916 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3917 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3918 } else { 3919 // Processing for implicitly captured variables. 3920 InlinedOpenMPRegionRAII Region( 3921 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3922 /*HasCancel=*/false, /*NoInheritance=*/true); 3923 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3924 } 3925 if (Type->isArrayType()) { 3926 // Initialize firstprivate array. 3927 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3928 // Perform simple memcpy. 3929 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3930 } else { 3931 // Initialize firstprivate array using element-by-element 3932 // initialization. 3933 CGF.EmitOMPAggregateAssign( 3934 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3935 Type, 3936 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3937 Address SrcElement) { 3938 // Clean up any temporaries needed by the initialization. 3939 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3940 InitScope.addPrivate( 3941 Elem, [SrcElement]() -> Address { return SrcElement; }); 3942 (void)InitScope.Privatize(); 3943 // Emit initialization for single element. 3944 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3945 CGF, &CapturesInfo); 3946 CGF.EmitAnyExprToMem(Init, DestElement, 3947 Init->getType().getQualifiers(), 3948 /*IsInitializer=*/false); 3949 }); 3950 } 3951 } else { 3952 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3953 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3954 return SharedRefLValue.getAddress(CGF); 3955 }); 3956 (void)InitScope.Privatize(); 3957 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3958 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3959 /*capturedByInit=*/false); 3960 } 3961 } else { 3962 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3963 } 3964 } 3965 ++FI; 3966 } 3967 } 3968 3969 /// Check if duplication function is required for taskloops. 3970 static bool checkInitIsRequired(CodeGenFunction &CGF, 3971 ArrayRef<PrivateDataTy> Privates) { 3972 bool InitRequired = false; 3973 for (const PrivateDataTy &Pair : Privates) { 3974 if (Pair.second.isLocalPrivate()) 3975 continue; 3976 const VarDecl *VD = Pair.second.PrivateCopy; 3977 const Expr *Init = VD->getAnyInitializer(); 3978 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3979 !CGF.isTrivialInitializer(Init)); 3980 if (InitRequired) 3981 break; 3982 } 3983 return InitRequired; 3984 } 3985 3986 3987 /// Emit task_dup function (for initialization of 3988 /// private/firstprivate/lastprivate vars and last_iter flag) 3989 /// \code 3990 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3991 /// lastpriv) { 3992 /// // setup lastprivate flag 3993 /// task_dst->last = lastpriv; 3994 /// // could be constructor calls here... 3995 /// } 3996 /// \endcode 3997 static llvm::Value * 3998 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3999 const OMPExecutableDirective &D, 4000 QualType KmpTaskTWithPrivatesPtrQTy, 4001 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4002 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4003 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4004 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4005 ASTContext &C = CGM.getContext(); 4006 FunctionArgList Args; 4007 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4008 KmpTaskTWithPrivatesPtrQTy, 4009 ImplicitParamDecl::Other); 4010 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4011 KmpTaskTWithPrivatesPtrQTy, 4012 ImplicitParamDecl::Other); 4013 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4014 ImplicitParamDecl::Other); 4015 Args.push_back(&DstArg); 4016 Args.push_back(&SrcArg); 4017 Args.push_back(&LastprivArg); 4018 const auto &TaskDupFnInfo = 4019 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4020 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4021 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4022 auto *TaskDup = llvm::Function::Create( 4023 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4024 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4025 TaskDup->setDoesNotRecurse(); 4026 CodeGenFunction CGF(CGM); 4027 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4028 Loc); 4029 4030 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4031 CGF.GetAddrOfLocalVar(&DstArg), 4032 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4033 // task_dst->liter = lastpriv; 4034 if (WithLastIter) { 4035 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4036 LValue Base = CGF.EmitLValueForField( 4037 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4038 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4039 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4040 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4041 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4042 } 4043 4044 // Emit initial values for private copies (if any). 4045 assert(!Privates.empty()); 4046 Address KmpTaskSharedsPtr = Address::invalid(); 4047 if (!Data.FirstprivateVars.empty()) { 4048 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4049 CGF.GetAddrOfLocalVar(&SrcArg), 4050 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4051 LValue Base = CGF.EmitLValueForField( 4052 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4053 KmpTaskSharedsPtr = Address::deprecated( 4054 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4055 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4056 KmpTaskTShareds)), 4057 Loc), 4058 CGM.getNaturalTypeAlignment(SharedsTy)); 4059 } 4060 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4061 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4062 CGF.FinishFunction(); 4063 return TaskDup; 4064 } 4065 4066 /// Checks if destructor function is required to be generated. 4067 /// \return true if cleanups are required, false otherwise. 4068 static bool 4069 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4070 ArrayRef<PrivateDataTy> Privates) { 4071 for (const PrivateDataTy &P : Privates) { 4072 if (P.second.isLocalPrivate()) 4073 continue; 4074 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4075 if (Ty.isDestructedType()) 4076 return true; 4077 } 4078 return false; 4079 } 4080 4081 namespace { 4082 /// Loop generator for OpenMP iterator expression. 4083 class OMPIteratorGeneratorScope final 4084 : public CodeGenFunction::OMPPrivateScope { 4085 CodeGenFunction &CGF; 4086 const OMPIteratorExpr *E = nullptr; 4087 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4088 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4089 OMPIteratorGeneratorScope() = delete; 4090 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4091 4092 public: 4093 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4094 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4095 if (!E) 4096 return; 4097 SmallVector<llvm::Value *, 4> Uppers; 4098 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4099 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4100 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4101 addPrivate(VD, [&CGF, VD]() { 4102 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4103 }); 4104 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4105 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4106 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4107 "counter.addr"); 4108 }); 4109 } 4110 Privatize(); 4111 4112 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4113 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4114 LValue CLVal = 4115 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4116 HelperData.CounterVD->getType()); 4117 // Counter = 0; 4118 CGF.EmitStoreOfScalar( 4119 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4120 CLVal); 4121 CodeGenFunction::JumpDest &ContDest = 4122 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4123 CodeGenFunction::JumpDest &ExitDest = 4124 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4125 // N = <number-of_iterations>; 4126 llvm::Value *N = Uppers[I]; 4127 // cont: 4128 // if (Counter < N) goto body; else goto exit; 4129 CGF.EmitBlock(ContDest.getBlock()); 4130 auto *CVal = 4131 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4132 llvm::Value *Cmp = 4133 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4134 ? CGF.Builder.CreateICmpSLT(CVal, N) 4135 : CGF.Builder.CreateICmpULT(CVal, N); 4136 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4137 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4138 // body: 4139 CGF.EmitBlock(BodyBB); 4140 // Iteri = Begini + Counter * Stepi; 4141 CGF.EmitIgnoredExpr(HelperData.Update); 4142 } 4143 } 4144 ~OMPIteratorGeneratorScope() { 4145 if (!E) 4146 return; 4147 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4148 // Counter = Counter + 1; 4149 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4150 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4151 // goto cont; 4152 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4153 // exit: 4154 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4155 } 4156 } 4157 }; 4158 } // namespace 4159 4160 static std::pair<llvm::Value *, llvm::Value *> 4161 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4162 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4163 llvm::Value *Addr; 4164 if (OASE) { 4165 const Expr *Base = OASE->getBase(); 4166 Addr = CGF.EmitScalarExpr(Base); 4167 } else { 4168 Addr = CGF.EmitLValue(E).getPointer(CGF); 4169 } 4170 llvm::Value *SizeVal; 4171 QualType Ty = E->getType(); 4172 if (OASE) { 4173 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4174 for (const Expr *SE : OASE->getDimensions()) { 4175 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4176 Sz = CGF.EmitScalarConversion( 4177 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4178 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4179 } 4180 } else if (const auto *ASE = 4181 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4182 LValue UpAddrLVal = 4183 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4184 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4185 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4186 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4187 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4188 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4189 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4190 } else { 4191 SizeVal = CGF.getTypeSize(Ty); 4192 } 4193 return std::make_pair(Addr, SizeVal); 4194 } 4195 4196 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4197 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4198 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4199 if (KmpTaskAffinityInfoTy.isNull()) { 4200 RecordDecl *KmpAffinityInfoRD = 4201 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4202 KmpAffinityInfoRD->startDefinition(); 4203 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4204 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4205 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4206 KmpAffinityInfoRD->completeDefinition(); 4207 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4208 } 4209 } 4210 4211 CGOpenMPRuntime::TaskResultTy 4212 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4213 const OMPExecutableDirective &D, 4214 llvm::Function *TaskFunction, QualType SharedsTy, 4215 Address Shareds, const OMPTaskDataTy &Data) { 4216 ASTContext &C = CGM.getContext(); 4217 llvm::SmallVector<PrivateDataTy, 4> Privates; 4218 // Aggregate privates and sort them by the alignment. 4219 const auto *I = Data.PrivateCopies.begin(); 4220 for (const Expr *E : Data.PrivateVars) { 4221 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4222 Privates.emplace_back( 4223 C.getDeclAlign(VD), 4224 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4225 /*PrivateElemInit=*/nullptr)); 4226 ++I; 4227 } 4228 I = Data.FirstprivateCopies.begin(); 4229 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4230 for (const Expr *E : Data.FirstprivateVars) { 4231 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4232 Privates.emplace_back( 4233 C.getDeclAlign(VD), 4234 PrivateHelpersTy( 4235 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4236 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4237 ++I; 4238 ++IElemInitRef; 4239 } 4240 I = Data.LastprivateCopies.begin(); 4241 for (const Expr *E : Data.LastprivateVars) { 4242 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4243 Privates.emplace_back( 4244 C.getDeclAlign(VD), 4245 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4246 /*PrivateElemInit=*/nullptr)); 4247 ++I; 4248 } 4249 for (const VarDecl *VD : Data.PrivateLocals) { 4250 if (isAllocatableDecl(VD)) 4251 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4252 else 4253 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4254 } 4255 llvm::stable_sort(Privates, 4256 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4257 return L.first > R.first; 4258 }); 4259 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4260 // Build type kmp_routine_entry_t (if not built yet). 4261 emitKmpRoutineEntryT(KmpInt32Ty); 4262 // Build type kmp_task_t (if not built yet). 4263 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4264 if (SavedKmpTaskloopTQTy.isNull()) { 4265 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4266 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4267 } 4268 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4269 } else { 4270 assert((D.getDirectiveKind() == OMPD_task || 4271 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4272 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4273 "Expected taskloop, task or target directive"); 4274 if (SavedKmpTaskTQTy.isNull()) { 4275 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4276 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4277 } 4278 KmpTaskTQTy = SavedKmpTaskTQTy; 4279 } 4280 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4281 // Build particular struct kmp_task_t for the given task. 4282 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4283 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4284 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4285 QualType KmpTaskTWithPrivatesPtrQTy = 4286 C.getPointerType(KmpTaskTWithPrivatesQTy); 4287 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4288 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4289 KmpTaskTWithPrivatesTy->getPointerTo(); 4290 llvm::Value *KmpTaskTWithPrivatesTySize = 4291 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4292 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4293 4294 // Emit initial values for private copies (if any). 4295 llvm::Value *TaskPrivatesMap = nullptr; 4296 llvm::Type *TaskPrivatesMapTy = 4297 std::next(TaskFunction->arg_begin(), 3)->getType(); 4298 if (!Privates.empty()) { 4299 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4300 TaskPrivatesMap = 4301 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4302 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4303 TaskPrivatesMap, TaskPrivatesMapTy); 4304 } else { 4305 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4306 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4307 } 4308 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4309 // kmp_task_t *tt); 4310 llvm::Function *TaskEntry = emitProxyTaskFunction( 4311 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4312 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4313 TaskPrivatesMap); 4314 4315 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4316 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4317 // kmp_routine_entry_t *task_entry); 4318 // Task flags. Format is taken from 4319 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4320 // description of kmp_tasking_flags struct. 4321 enum { 4322 TiedFlag = 0x1, 4323 FinalFlag = 0x2, 4324 DestructorsFlag = 0x8, 4325 PriorityFlag = 0x20, 4326 DetachableFlag = 0x40, 4327 }; 4328 unsigned Flags = Data.Tied ? TiedFlag : 0; 4329 bool NeedsCleanup = false; 4330 if (!Privates.empty()) { 4331 NeedsCleanup = 4332 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4333 if (NeedsCleanup) 4334 Flags = Flags | DestructorsFlag; 4335 } 4336 if (Data.Priority.getInt()) 4337 Flags = Flags | PriorityFlag; 4338 if (D.hasClausesOfKind<OMPDetachClause>()) 4339 Flags = Flags | DetachableFlag; 4340 llvm::Value *TaskFlags = 4341 Data.Final.getPointer() 4342 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4343 CGF.Builder.getInt32(FinalFlag), 4344 CGF.Builder.getInt32(/*C=*/0)) 4345 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4346 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4347 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4348 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4349 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4350 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4351 TaskEntry, KmpRoutineEntryPtrTy)}; 4352 llvm::Value *NewTask; 4353 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4354 // Check if we have any device clause associated with the directive. 4355 const Expr *Device = nullptr; 4356 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4357 Device = C->getDevice(); 4358 // Emit device ID if any otherwise use default value. 4359 llvm::Value *DeviceID; 4360 if (Device) 4361 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4362 CGF.Int64Ty, /*isSigned=*/true); 4363 else 4364 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4365 AllocArgs.push_back(DeviceID); 4366 NewTask = CGF.EmitRuntimeCall( 4367 OMPBuilder.getOrCreateRuntimeFunction( 4368 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4369 AllocArgs); 4370 } else { 4371 NewTask = 4372 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4373 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4374 AllocArgs); 4375 } 4376 // Emit detach clause initialization. 4377 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4378 // task_descriptor); 4379 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4380 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4381 LValue EvtLVal = CGF.EmitLValue(Evt); 4382 4383 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4384 // int gtid, kmp_task_t *task); 4385 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4386 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4387 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4388 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4389 OMPBuilder.getOrCreateRuntimeFunction( 4390 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4391 {Loc, Tid, NewTask}); 4392 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4393 Evt->getExprLoc()); 4394 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4395 } 4396 // Process affinity clauses. 4397 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4398 // Process list of affinity data. 4399 ASTContext &C = CGM.getContext(); 4400 Address AffinitiesArray = Address::invalid(); 4401 // Calculate number of elements to form the array of affinity data. 4402 llvm::Value *NumOfElements = nullptr; 4403 unsigned NumAffinities = 0; 4404 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4405 if (const Expr *Modifier = C->getModifier()) { 4406 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4407 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4408 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4409 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4410 NumOfElements = 4411 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4412 } 4413 } else { 4414 NumAffinities += C->varlist_size(); 4415 } 4416 } 4417 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4418 // Fields ids in kmp_task_affinity_info record. 4419 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4420 4421 QualType KmpTaskAffinityInfoArrayTy; 4422 if (NumOfElements) { 4423 NumOfElements = CGF.Builder.CreateNUWAdd( 4424 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4425 auto *OVE = new (C) OpaqueValueExpr( 4426 Loc, 4427 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4428 VK_PRValue); 4429 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4430 RValue::get(NumOfElements)); 4431 KmpTaskAffinityInfoArrayTy = 4432 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4433 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4434 // Properly emit variable-sized array. 4435 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4436 ImplicitParamDecl::Other); 4437 CGF.EmitVarDecl(*PD); 4438 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4439 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4440 /*isSigned=*/false); 4441 } else { 4442 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4443 KmpTaskAffinityInfoTy, 4444 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4445 ArrayType::Normal, /*IndexTypeQuals=*/0); 4446 AffinitiesArray = 4447 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4448 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4449 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4450 /*isSigned=*/false); 4451 } 4452 4453 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4454 // Fill array by elements without iterators. 4455 unsigned Pos = 0; 4456 bool HasIterator = false; 4457 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4458 if (C->getModifier()) { 4459 HasIterator = true; 4460 continue; 4461 } 4462 for (const Expr *E : C->varlists()) { 4463 llvm::Value *Addr; 4464 llvm::Value *Size; 4465 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4466 LValue Base = 4467 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4468 KmpTaskAffinityInfoTy); 4469 // affs[i].base_addr = &<Affinities[i].second>; 4470 LValue BaseAddrLVal = CGF.EmitLValueForField( 4471 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4472 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4473 BaseAddrLVal); 4474 // affs[i].len = sizeof(<Affinities[i].second>); 4475 LValue LenLVal = CGF.EmitLValueForField( 4476 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4477 CGF.EmitStoreOfScalar(Size, LenLVal); 4478 ++Pos; 4479 } 4480 } 4481 LValue PosLVal; 4482 if (HasIterator) { 4483 PosLVal = CGF.MakeAddrLValue( 4484 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4485 C.getSizeType()); 4486 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4487 } 4488 // Process elements with iterators. 4489 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4490 const Expr *Modifier = C->getModifier(); 4491 if (!Modifier) 4492 continue; 4493 OMPIteratorGeneratorScope IteratorScope( 4494 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4495 for (const Expr *E : C->varlists()) { 4496 llvm::Value *Addr; 4497 llvm::Value *Size; 4498 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4499 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4500 LValue Base = CGF.MakeAddrLValue( 4501 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4502 // affs[i].base_addr = &<Affinities[i].second>; 4503 LValue BaseAddrLVal = CGF.EmitLValueForField( 4504 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4505 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4506 BaseAddrLVal); 4507 // affs[i].len = sizeof(<Affinities[i].second>); 4508 LValue LenLVal = CGF.EmitLValueForField( 4509 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4510 CGF.EmitStoreOfScalar(Size, LenLVal); 4511 Idx = CGF.Builder.CreateNUWAdd( 4512 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4513 CGF.EmitStoreOfScalar(Idx, PosLVal); 4514 } 4515 } 4516 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4517 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4518 // naffins, kmp_task_affinity_info_t *affin_list); 4519 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4520 llvm::Value *GTid = getThreadID(CGF, Loc); 4521 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4522 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4523 // FIXME: Emit the function and ignore its result for now unless the 4524 // runtime function is properly implemented. 4525 (void)CGF.EmitRuntimeCall( 4526 OMPBuilder.getOrCreateRuntimeFunction( 4527 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4528 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4529 } 4530 llvm::Value *NewTaskNewTaskTTy = 4531 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4532 NewTask, KmpTaskTWithPrivatesPtrTy); 4533 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4534 KmpTaskTWithPrivatesQTy); 4535 LValue TDBase = 4536 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4537 // Fill the data in the resulting kmp_task_t record. 4538 // Copy shareds if there are any. 4539 Address KmpTaskSharedsPtr = Address::invalid(); 4540 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4541 KmpTaskSharedsPtr = Address::deprecated( 4542 CGF.EmitLoadOfScalar( 4543 CGF.EmitLValueForField( 4544 TDBase, 4545 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 4546 Loc), 4547 CGM.getNaturalTypeAlignment(SharedsTy)); 4548 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4549 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4550 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4551 } 4552 // Emit initial values for private copies (if any). 4553 TaskResultTy Result; 4554 if (!Privates.empty()) { 4555 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4556 SharedsTy, SharedsPtrTy, Data, Privates, 4557 /*ForDup=*/false); 4558 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4559 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4560 Result.TaskDupFn = emitTaskDupFunction( 4561 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4562 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4563 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4564 } 4565 } 4566 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4567 enum { Priority = 0, Destructors = 1 }; 4568 // Provide pointer to function with destructors for privates. 4569 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4570 const RecordDecl *KmpCmplrdataUD = 4571 (*FI)->getType()->getAsUnionType()->getDecl(); 4572 if (NeedsCleanup) { 4573 llvm::Value *DestructorFn = emitDestructorsFunction( 4574 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4575 KmpTaskTWithPrivatesQTy); 4576 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4577 LValue DestructorsLV = CGF.EmitLValueForField( 4578 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4579 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4580 DestructorFn, KmpRoutineEntryPtrTy), 4581 DestructorsLV); 4582 } 4583 // Set priority. 4584 if (Data.Priority.getInt()) { 4585 LValue Data2LV = CGF.EmitLValueForField( 4586 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4587 LValue PriorityLV = CGF.EmitLValueForField( 4588 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4589 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4590 } 4591 Result.NewTask = NewTask; 4592 Result.TaskEntry = TaskEntry; 4593 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4594 Result.TDBase = TDBase; 4595 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4596 return Result; 4597 } 4598 4599 namespace { 4600 /// Dependence kind for RTL. 4601 enum RTLDependenceKindTy { 4602 DepIn = 0x01, 4603 DepInOut = 0x3, 4604 DepMutexInOutSet = 0x4, 4605 DepInOutSet = 0x8 4606 }; 4607 /// Fields ids in kmp_depend_info record. 4608 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4609 } // namespace 4610 4611 /// Translates internal dependency kind into the runtime kind. 4612 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4613 RTLDependenceKindTy DepKind; 4614 switch (K) { 4615 case OMPC_DEPEND_in: 4616 DepKind = DepIn; 4617 break; 4618 // Out and InOut dependencies must use the same code. 4619 case OMPC_DEPEND_out: 4620 case OMPC_DEPEND_inout: 4621 DepKind = DepInOut; 4622 break; 4623 case OMPC_DEPEND_mutexinoutset: 4624 DepKind = DepMutexInOutSet; 4625 break; 4626 case OMPC_DEPEND_inoutset: 4627 DepKind = DepInOutSet; 4628 break; 4629 case OMPC_DEPEND_source: 4630 case OMPC_DEPEND_sink: 4631 case OMPC_DEPEND_depobj: 4632 case OMPC_DEPEND_unknown: 4633 llvm_unreachable("Unknown task dependence type"); 4634 } 4635 return DepKind; 4636 } 4637 4638 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4639 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4640 QualType &FlagsTy) { 4641 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4642 if (KmpDependInfoTy.isNull()) { 4643 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4644 KmpDependInfoRD->startDefinition(); 4645 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4646 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4647 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4648 KmpDependInfoRD->completeDefinition(); 4649 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4650 } 4651 } 4652 4653 std::pair<llvm::Value *, LValue> 4654 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4655 SourceLocation Loc) { 4656 ASTContext &C = CGM.getContext(); 4657 QualType FlagsTy; 4658 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4659 RecordDecl *KmpDependInfoRD = 4660 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4661 LValue Base = CGF.EmitLoadOfPointerLValue( 4662 DepobjLVal.getAddress(CGF), 4663 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4664 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4665 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4666 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4667 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4668 Base.getTBAAInfo()); 4669 Address DepObjAddr = CGF.Builder.CreateGEP( 4670 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4671 LValue NumDepsBase = CGF.MakeAddrLValue( 4672 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4673 // NumDeps = deps[i].base_addr; 4674 LValue BaseAddrLVal = CGF.EmitLValueForField( 4675 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4676 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4677 return std::make_pair(NumDeps, Base); 4678 } 4679 4680 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4681 llvm::PointerUnion<unsigned *, LValue *> Pos, 4682 const OMPTaskDataTy::DependData &Data, 4683 Address DependenciesArray) { 4684 CodeGenModule &CGM = CGF.CGM; 4685 ASTContext &C = CGM.getContext(); 4686 QualType FlagsTy; 4687 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4688 RecordDecl *KmpDependInfoRD = 4689 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4690 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4691 4692 OMPIteratorGeneratorScope IteratorScope( 4693 CGF, cast_or_null<OMPIteratorExpr>( 4694 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4695 : nullptr)); 4696 for (const Expr *E : Data.DepExprs) { 4697 llvm::Value *Addr; 4698 llvm::Value *Size; 4699 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4700 LValue Base; 4701 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4702 Base = CGF.MakeAddrLValue( 4703 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4704 } else { 4705 LValue &PosLVal = *Pos.get<LValue *>(); 4706 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4707 Base = CGF.MakeAddrLValue( 4708 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4709 } 4710 // deps[i].base_addr = &<Dependencies[i].second>; 4711 LValue BaseAddrLVal = CGF.EmitLValueForField( 4712 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4713 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4714 BaseAddrLVal); 4715 // deps[i].len = sizeof(<Dependencies[i].second>); 4716 LValue LenLVal = CGF.EmitLValueForField( 4717 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4718 CGF.EmitStoreOfScalar(Size, LenLVal); 4719 // deps[i].flags = <Dependencies[i].first>; 4720 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4721 LValue FlagsLVal = CGF.EmitLValueForField( 4722 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4723 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4724 FlagsLVal); 4725 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4726 ++(*P); 4727 } else { 4728 LValue &PosLVal = *Pos.get<LValue *>(); 4729 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4730 Idx = CGF.Builder.CreateNUWAdd(Idx, 4731 llvm::ConstantInt::get(Idx->getType(), 1)); 4732 CGF.EmitStoreOfScalar(Idx, PosLVal); 4733 } 4734 } 4735 } 4736 4737 static SmallVector<llvm::Value *, 4> 4738 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4739 const OMPTaskDataTy::DependData &Data) { 4740 assert(Data.DepKind == OMPC_DEPEND_depobj && 4741 "Expected depobj dependecy kind."); 4742 SmallVector<llvm::Value *, 4> Sizes; 4743 SmallVector<LValue, 4> SizeLVals; 4744 ASTContext &C = CGF.getContext(); 4745 QualType FlagsTy; 4746 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4747 RecordDecl *KmpDependInfoRD = 4748 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4749 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4750 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4751 { 4752 OMPIteratorGeneratorScope IteratorScope( 4753 CGF, cast_or_null<OMPIteratorExpr>( 4754 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4755 : nullptr)); 4756 for (const Expr *E : Data.DepExprs) { 4757 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4758 LValue Base = CGF.EmitLoadOfPointerLValue( 4759 DepobjLVal.getAddress(CGF), 4760 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4761 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4762 Base.getAddress(CGF), KmpDependInfoPtrT); 4763 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4764 Base.getTBAAInfo()); 4765 Address DepObjAddr = CGF.Builder.CreateGEP( 4766 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4767 LValue NumDepsBase = CGF.MakeAddrLValue( 4768 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4769 // NumDeps = deps[i].base_addr; 4770 LValue BaseAddrLVal = CGF.EmitLValueForField( 4771 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4772 llvm::Value *NumDeps = 4773 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4774 LValue NumLVal = CGF.MakeAddrLValue( 4775 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4776 C.getUIntPtrType()); 4777 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4778 NumLVal.getAddress(CGF)); 4779 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4780 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4781 CGF.EmitStoreOfScalar(Add, NumLVal); 4782 SizeLVals.push_back(NumLVal); 4783 } 4784 } 4785 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4786 llvm::Value *Size = 4787 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4788 Sizes.push_back(Size); 4789 } 4790 return Sizes; 4791 } 4792 4793 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4794 LValue PosLVal, 4795 const OMPTaskDataTy::DependData &Data, 4796 Address DependenciesArray) { 4797 assert(Data.DepKind == OMPC_DEPEND_depobj && 4798 "Expected depobj dependecy kind."); 4799 ASTContext &C = CGF.getContext(); 4800 QualType FlagsTy; 4801 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4802 RecordDecl *KmpDependInfoRD = 4803 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4804 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4805 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4806 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4807 { 4808 OMPIteratorGeneratorScope IteratorScope( 4809 CGF, cast_or_null<OMPIteratorExpr>( 4810 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4811 : nullptr)); 4812 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4813 const Expr *E = Data.DepExprs[I]; 4814 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4815 LValue Base = CGF.EmitLoadOfPointerLValue( 4816 DepobjLVal.getAddress(CGF), 4817 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4818 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4819 Base.getAddress(CGF), KmpDependInfoPtrT); 4820 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4821 Base.getTBAAInfo()); 4822 4823 // Get number of elements in a single depobj. 4824 Address DepObjAddr = CGF.Builder.CreateGEP( 4825 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4826 LValue NumDepsBase = CGF.MakeAddrLValue( 4827 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4828 // NumDeps = deps[i].base_addr; 4829 LValue BaseAddrLVal = CGF.EmitLValueForField( 4830 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4831 llvm::Value *NumDeps = 4832 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4833 4834 // memcopy dependency data. 4835 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4836 ElSize, 4837 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4838 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4839 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4840 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4841 4842 // Increase pos. 4843 // pos += size; 4844 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4845 CGF.EmitStoreOfScalar(Add, PosLVal); 4846 } 4847 } 4848 } 4849 4850 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4851 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4852 SourceLocation Loc) { 4853 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4854 return D.DepExprs.empty(); 4855 })) 4856 return std::make_pair(nullptr, Address::invalid()); 4857 // Process list of dependencies. 4858 ASTContext &C = CGM.getContext(); 4859 Address DependenciesArray = Address::invalid(); 4860 llvm::Value *NumOfElements = nullptr; 4861 unsigned NumDependencies = std::accumulate( 4862 Dependencies.begin(), Dependencies.end(), 0, 4863 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4864 return D.DepKind == OMPC_DEPEND_depobj 4865 ? V 4866 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4867 }); 4868 QualType FlagsTy; 4869 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4870 bool HasDepobjDeps = false; 4871 bool HasRegularWithIterators = false; 4872 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4873 llvm::Value *NumOfRegularWithIterators = 4874 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4875 // Calculate number of depobj dependecies and regular deps with the iterators. 4876 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4877 if (D.DepKind == OMPC_DEPEND_depobj) { 4878 SmallVector<llvm::Value *, 4> Sizes = 4879 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4880 for (llvm::Value *Size : Sizes) { 4881 NumOfDepobjElements = 4882 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4883 } 4884 HasDepobjDeps = true; 4885 continue; 4886 } 4887 // Include number of iterations, if any. 4888 4889 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4890 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4891 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4892 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4893 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4894 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4895 NumOfRegularWithIterators = 4896 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4897 } 4898 HasRegularWithIterators = true; 4899 continue; 4900 } 4901 } 4902 4903 QualType KmpDependInfoArrayTy; 4904 if (HasDepobjDeps || HasRegularWithIterators) { 4905 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4906 /*isSigned=*/false); 4907 if (HasDepobjDeps) { 4908 NumOfElements = 4909 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4910 } 4911 if (HasRegularWithIterators) { 4912 NumOfElements = 4913 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4914 } 4915 auto *OVE = new (C) OpaqueValueExpr( 4916 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4917 VK_PRValue); 4918 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4919 RValue::get(NumOfElements)); 4920 KmpDependInfoArrayTy = 4921 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4922 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4923 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4924 // Properly emit variable-sized array. 4925 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4926 ImplicitParamDecl::Other); 4927 CGF.EmitVarDecl(*PD); 4928 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4929 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4930 /*isSigned=*/false); 4931 } else { 4932 KmpDependInfoArrayTy = C.getConstantArrayType( 4933 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4934 ArrayType::Normal, /*IndexTypeQuals=*/0); 4935 DependenciesArray = 4936 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4937 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4938 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4939 /*isSigned=*/false); 4940 } 4941 unsigned Pos = 0; 4942 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4943 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4944 Dependencies[I].IteratorExpr) 4945 continue; 4946 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4947 DependenciesArray); 4948 } 4949 // Copy regular dependecies with iterators. 4950 LValue PosLVal = CGF.MakeAddrLValue( 4951 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4952 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4953 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4954 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4955 !Dependencies[I].IteratorExpr) 4956 continue; 4957 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4958 DependenciesArray); 4959 } 4960 // Copy final depobj arrays without iterators. 4961 if (HasDepobjDeps) { 4962 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4963 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4964 continue; 4965 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4966 DependenciesArray); 4967 } 4968 } 4969 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4970 DependenciesArray, CGF.VoidPtrTy); 4971 return std::make_pair(NumOfElements, DependenciesArray); 4972 } 4973 4974 Address CGOpenMPRuntime::emitDepobjDependClause( 4975 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4976 SourceLocation Loc) { 4977 if (Dependencies.DepExprs.empty()) 4978 return Address::invalid(); 4979 // Process list of dependencies. 4980 ASTContext &C = CGM.getContext(); 4981 Address DependenciesArray = Address::invalid(); 4982 unsigned NumDependencies = Dependencies.DepExprs.size(); 4983 QualType FlagsTy; 4984 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4985 RecordDecl *KmpDependInfoRD = 4986 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4987 4988 llvm::Value *Size; 4989 // Define type kmp_depend_info[<Dependencies.size()>]; 4990 // For depobj reserve one extra element to store the number of elements. 4991 // It is required to handle depobj(x) update(in) construct. 4992 // kmp_depend_info[<Dependencies.size()>] deps; 4993 llvm::Value *NumDepsVal; 4994 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4995 if (const auto *IE = 4996 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4997 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4998 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4999 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5000 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 5001 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 5002 } 5003 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 5004 NumDepsVal); 5005 CharUnits SizeInBytes = 5006 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5007 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5008 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5009 NumDepsVal = 5010 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5011 } else { 5012 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5013 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5014 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5015 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5016 Size = CGM.getSize(Sz.alignTo(Align)); 5017 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5018 } 5019 // Need to allocate on the dynamic memory. 5020 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5021 // Use default allocator. 5022 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5023 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5024 5025 llvm::Value *Addr = 5026 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5027 CGM.getModule(), OMPRTL___kmpc_alloc), 5028 Args, ".dep.arr.addr"); 5029 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5030 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5031 DependenciesArray = Address::deprecated(Addr, Align); 5032 // Write number of elements in the first element of array for depobj. 5033 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5034 // deps[i].base_addr = NumDependencies; 5035 LValue BaseAddrLVal = CGF.EmitLValueForField( 5036 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5037 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5038 llvm::PointerUnion<unsigned *, LValue *> Pos; 5039 unsigned Idx = 1; 5040 LValue PosLVal; 5041 if (Dependencies.IteratorExpr) { 5042 PosLVal = CGF.MakeAddrLValue( 5043 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5044 C.getSizeType()); 5045 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5046 /*IsInit=*/true); 5047 Pos = &PosLVal; 5048 } else { 5049 Pos = &Idx; 5050 } 5051 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5052 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5053 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5054 return DependenciesArray; 5055 } 5056 5057 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5058 SourceLocation Loc) { 5059 ASTContext &C = CGM.getContext(); 5060 QualType FlagsTy; 5061 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5062 LValue Base = CGF.EmitLoadOfPointerLValue( 5063 DepobjLVal.getAddress(CGF), 5064 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5065 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5066 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5067 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5068 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5069 Addr.getElementType(), Addr.getPointer(), 5070 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5071 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5072 CGF.VoidPtrTy); 5073 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5074 // Use default allocator. 5075 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5076 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5077 5078 // _kmpc_free(gtid, addr, nullptr); 5079 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5080 CGM.getModule(), OMPRTL___kmpc_free), 5081 Args); 5082 } 5083 5084 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5085 OpenMPDependClauseKind NewDepKind, 5086 SourceLocation Loc) { 5087 ASTContext &C = CGM.getContext(); 5088 QualType FlagsTy; 5089 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5090 RecordDecl *KmpDependInfoRD = 5091 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5092 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5093 llvm::Value *NumDeps; 5094 LValue Base; 5095 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5096 5097 Address Begin = Base.getAddress(CGF); 5098 // Cast from pointer to array type to pointer to single element. 5099 llvm::Value *End = CGF.Builder.CreateGEP( 5100 Begin.getElementType(), Begin.getPointer(), NumDeps); 5101 // The basic structure here is a while-do loop. 5102 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5103 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5104 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5105 CGF.EmitBlock(BodyBB); 5106 llvm::PHINode *ElementPHI = 5107 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5108 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5109 Begin = Begin.withPointer(ElementPHI); 5110 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5111 Base.getTBAAInfo()); 5112 // deps[i].flags = NewDepKind; 5113 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5114 LValue FlagsLVal = CGF.EmitLValueForField( 5115 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5116 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5117 FlagsLVal); 5118 5119 // Shift the address forward by one element. 5120 Address ElementNext = 5121 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5122 ElementPHI->addIncoming(ElementNext.getPointer(), 5123 CGF.Builder.GetInsertBlock()); 5124 llvm::Value *IsEmpty = 5125 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5126 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5127 // Done. 5128 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5129 } 5130 5131 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5132 const OMPExecutableDirective &D, 5133 llvm::Function *TaskFunction, 5134 QualType SharedsTy, Address Shareds, 5135 const Expr *IfCond, 5136 const OMPTaskDataTy &Data) { 5137 if (!CGF.HaveInsertPoint()) 5138 return; 5139 5140 TaskResultTy Result = 5141 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5142 llvm::Value *NewTask = Result.NewTask; 5143 llvm::Function *TaskEntry = Result.TaskEntry; 5144 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5145 LValue TDBase = Result.TDBase; 5146 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5147 // Process list of dependences. 5148 Address DependenciesArray = Address::invalid(); 5149 llvm::Value *NumOfElements; 5150 std::tie(NumOfElements, DependenciesArray) = 5151 emitDependClause(CGF, Data.Dependences, Loc); 5152 5153 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5154 // libcall. 5155 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5156 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5157 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5158 // list is not empty 5159 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5160 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5161 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5162 llvm::Value *DepTaskArgs[7]; 5163 if (!Data.Dependences.empty()) { 5164 DepTaskArgs[0] = UpLoc; 5165 DepTaskArgs[1] = ThreadID; 5166 DepTaskArgs[2] = NewTask; 5167 DepTaskArgs[3] = NumOfElements; 5168 DepTaskArgs[4] = DependenciesArray.getPointer(); 5169 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5170 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5171 } 5172 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5173 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5174 if (!Data.Tied) { 5175 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5176 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5177 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5178 } 5179 if (!Data.Dependences.empty()) { 5180 CGF.EmitRuntimeCall( 5181 OMPBuilder.getOrCreateRuntimeFunction( 5182 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5183 DepTaskArgs); 5184 } else { 5185 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5186 CGM.getModule(), OMPRTL___kmpc_omp_task), 5187 TaskArgs); 5188 } 5189 // Check if parent region is untied and build return for untied task; 5190 if (auto *Region = 5191 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5192 Region->emitUntiedSwitch(CGF); 5193 }; 5194 5195 llvm::Value *DepWaitTaskArgs[6]; 5196 if (!Data.Dependences.empty()) { 5197 DepWaitTaskArgs[0] = UpLoc; 5198 DepWaitTaskArgs[1] = ThreadID; 5199 DepWaitTaskArgs[2] = NumOfElements; 5200 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5201 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5202 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5203 } 5204 auto &M = CGM.getModule(); 5205 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5206 TaskEntry, &Data, &DepWaitTaskArgs, 5207 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5208 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5209 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5210 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5211 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5212 // is specified. 5213 if (!Data.Dependences.empty()) 5214 CGF.EmitRuntimeCall( 5215 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5216 DepWaitTaskArgs); 5217 // Call proxy_task_entry(gtid, new_task); 5218 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5219 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5220 Action.Enter(CGF); 5221 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5222 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5223 OutlinedFnArgs); 5224 }; 5225 5226 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5227 // kmp_task_t *new_task); 5228 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5229 // kmp_task_t *new_task); 5230 RegionCodeGenTy RCG(CodeGen); 5231 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5232 M, OMPRTL___kmpc_omp_task_begin_if0), 5233 TaskArgs, 5234 OMPBuilder.getOrCreateRuntimeFunction( 5235 M, OMPRTL___kmpc_omp_task_complete_if0), 5236 TaskArgs); 5237 RCG.setAction(Action); 5238 RCG(CGF); 5239 }; 5240 5241 if (IfCond) { 5242 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5243 } else { 5244 RegionCodeGenTy ThenRCG(ThenCodeGen); 5245 ThenRCG(CGF); 5246 } 5247 } 5248 5249 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5250 const OMPLoopDirective &D, 5251 llvm::Function *TaskFunction, 5252 QualType SharedsTy, Address Shareds, 5253 const Expr *IfCond, 5254 const OMPTaskDataTy &Data) { 5255 if (!CGF.HaveInsertPoint()) 5256 return; 5257 TaskResultTy Result = 5258 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5259 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5260 // libcall. 5261 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5262 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5263 // sched, kmp_uint64 grainsize, void *task_dup); 5264 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5265 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5266 llvm::Value *IfVal; 5267 if (IfCond) { 5268 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5269 /*isSigned=*/true); 5270 } else { 5271 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5272 } 5273 5274 LValue LBLVal = CGF.EmitLValueForField( 5275 Result.TDBase, 5276 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5277 const auto *LBVar = 5278 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5279 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5280 LBLVal.getQuals(), 5281 /*IsInitializer=*/true); 5282 LValue UBLVal = CGF.EmitLValueForField( 5283 Result.TDBase, 5284 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5285 const auto *UBVar = 5286 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5287 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5288 UBLVal.getQuals(), 5289 /*IsInitializer=*/true); 5290 LValue StLVal = CGF.EmitLValueForField( 5291 Result.TDBase, 5292 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5293 const auto *StVar = 5294 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5295 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5296 StLVal.getQuals(), 5297 /*IsInitializer=*/true); 5298 // Store reductions address. 5299 LValue RedLVal = CGF.EmitLValueForField( 5300 Result.TDBase, 5301 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5302 if (Data.Reductions) { 5303 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5304 } else { 5305 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5306 CGF.getContext().VoidPtrTy); 5307 } 5308 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5309 llvm::Value *TaskArgs[] = { 5310 UpLoc, 5311 ThreadID, 5312 Result.NewTask, 5313 IfVal, 5314 LBLVal.getPointer(CGF), 5315 UBLVal.getPointer(CGF), 5316 CGF.EmitLoadOfScalar(StLVal, Loc), 5317 llvm::ConstantInt::getSigned( 5318 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5319 llvm::ConstantInt::getSigned( 5320 CGF.IntTy, Data.Schedule.getPointer() 5321 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5322 : NoSchedule), 5323 Data.Schedule.getPointer() 5324 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5325 /*isSigned=*/false) 5326 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5327 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5328 Result.TaskDupFn, CGF.VoidPtrTy) 5329 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5330 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5331 CGM.getModule(), OMPRTL___kmpc_taskloop), 5332 TaskArgs); 5333 } 5334 5335 /// Emit reduction operation for each element of array (required for 5336 /// array sections) LHS op = RHS. 5337 /// \param Type Type of array. 5338 /// \param LHSVar Variable on the left side of the reduction operation 5339 /// (references element of array in original variable). 5340 /// \param RHSVar Variable on the right side of the reduction operation 5341 /// (references element of array in original variable). 5342 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5343 /// RHSVar. 5344 static void EmitOMPAggregateReduction( 5345 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5346 const VarDecl *RHSVar, 5347 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5348 const Expr *, const Expr *)> &RedOpGen, 5349 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5350 const Expr *UpExpr = nullptr) { 5351 // Perform element-by-element initialization. 5352 QualType ElementTy; 5353 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5354 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5355 5356 // Drill down to the base element type on both arrays. 5357 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5358 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5359 5360 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5361 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5362 // Cast from pointer to array type to pointer to single element. 5363 llvm::Value *LHSEnd = 5364 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5365 // The basic structure here is a while-do loop. 5366 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5367 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5368 llvm::Value *IsEmpty = 5369 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5370 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5371 5372 // Enter the loop body, making that address the current address. 5373 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5374 CGF.EmitBlock(BodyBB); 5375 5376 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5377 5378 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5379 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5380 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5381 Address RHSElementCurrent = Address::deprecated( 5382 RHSElementPHI, 5383 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5384 5385 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5386 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5387 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5388 Address LHSElementCurrent = Address::deprecated( 5389 LHSElementPHI, 5390 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5391 5392 // Emit copy. 5393 CodeGenFunction::OMPPrivateScope Scope(CGF); 5394 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5395 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5396 Scope.Privatize(); 5397 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5398 Scope.ForceCleanup(); 5399 5400 // Shift the address forward by one element. 5401 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5402 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5403 "omp.arraycpy.dest.element"); 5404 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5405 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5406 "omp.arraycpy.src.element"); 5407 // Check whether we've reached the end. 5408 llvm::Value *Done = 5409 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5410 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5411 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5412 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5413 5414 // Done. 5415 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5416 } 5417 5418 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5419 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5420 /// UDR combiner function. 5421 static void emitReductionCombiner(CodeGenFunction &CGF, 5422 const Expr *ReductionOp) { 5423 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5424 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5425 if (const auto *DRE = 5426 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5427 if (const auto *DRD = 5428 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5429 std::pair<llvm::Function *, llvm::Function *> Reduction = 5430 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5431 RValue Func = RValue::get(Reduction.first); 5432 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5433 CGF.EmitIgnoredExpr(ReductionOp); 5434 return; 5435 } 5436 CGF.EmitIgnoredExpr(ReductionOp); 5437 } 5438 5439 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5440 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5441 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5442 ArrayRef<const Expr *> ReductionOps) { 5443 ASTContext &C = CGM.getContext(); 5444 5445 // void reduction_func(void *LHSArg, void *RHSArg); 5446 FunctionArgList Args; 5447 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5448 ImplicitParamDecl::Other); 5449 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5450 ImplicitParamDecl::Other); 5451 Args.push_back(&LHSArg); 5452 Args.push_back(&RHSArg); 5453 const auto &CGFI = 5454 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5455 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5456 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5457 llvm::GlobalValue::InternalLinkage, Name, 5458 &CGM.getModule()); 5459 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5460 Fn->setDoesNotRecurse(); 5461 CodeGenFunction CGF(CGM); 5462 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5463 5464 // Dst = (void*[n])(LHSArg); 5465 // Src = (void*[n])(RHSArg); 5466 Address LHS = Address::deprecated( 5467 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5468 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType), 5469 CGF.getPointerAlign()); 5470 Address RHS = Address::deprecated( 5471 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5472 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType), 5473 CGF.getPointerAlign()); 5474 5475 // ... 5476 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5477 // ... 5478 CodeGenFunction::OMPPrivateScope Scope(CGF); 5479 auto IPriv = Privates.begin(); 5480 unsigned Idx = 0; 5481 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5482 const auto *RHSVar = 5483 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5484 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5485 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5486 }); 5487 const auto *LHSVar = 5488 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5489 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5490 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5491 }); 5492 QualType PrivTy = (*IPriv)->getType(); 5493 if (PrivTy->isVariablyModifiedType()) { 5494 // Get array size and emit VLA type. 5495 ++Idx; 5496 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5497 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5498 const VariableArrayType *VLA = 5499 CGF.getContext().getAsVariableArrayType(PrivTy); 5500 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5501 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5502 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5503 CGF.EmitVariablyModifiedType(PrivTy); 5504 } 5505 } 5506 Scope.Privatize(); 5507 IPriv = Privates.begin(); 5508 auto ILHS = LHSExprs.begin(); 5509 auto IRHS = RHSExprs.begin(); 5510 for (const Expr *E : ReductionOps) { 5511 if ((*IPriv)->getType()->isArrayType()) { 5512 // Emit reduction for array section. 5513 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5514 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5515 EmitOMPAggregateReduction( 5516 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5517 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5518 emitReductionCombiner(CGF, E); 5519 }); 5520 } else { 5521 // Emit reduction for array subscript or single variable. 5522 emitReductionCombiner(CGF, E); 5523 } 5524 ++IPriv; 5525 ++ILHS; 5526 ++IRHS; 5527 } 5528 Scope.ForceCleanup(); 5529 CGF.FinishFunction(); 5530 return Fn; 5531 } 5532 5533 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5534 const Expr *ReductionOp, 5535 const Expr *PrivateRef, 5536 const DeclRefExpr *LHS, 5537 const DeclRefExpr *RHS) { 5538 if (PrivateRef->getType()->isArrayType()) { 5539 // Emit reduction for array section. 5540 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5541 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5542 EmitOMPAggregateReduction( 5543 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5544 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5545 emitReductionCombiner(CGF, ReductionOp); 5546 }); 5547 } else { 5548 // Emit reduction for array subscript or single variable. 5549 emitReductionCombiner(CGF, ReductionOp); 5550 } 5551 } 5552 5553 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5554 ArrayRef<const Expr *> Privates, 5555 ArrayRef<const Expr *> LHSExprs, 5556 ArrayRef<const Expr *> RHSExprs, 5557 ArrayRef<const Expr *> ReductionOps, 5558 ReductionOptionsTy Options) { 5559 if (!CGF.HaveInsertPoint()) 5560 return; 5561 5562 bool WithNowait = Options.WithNowait; 5563 bool SimpleReduction = Options.SimpleReduction; 5564 5565 // Next code should be emitted for reduction: 5566 // 5567 // static kmp_critical_name lock = { 0 }; 5568 // 5569 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5570 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5571 // ... 5572 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5573 // *(Type<n>-1*)rhs[<n>-1]); 5574 // } 5575 // 5576 // ... 5577 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5578 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5579 // RedList, reduce_func, &<lock>)) { 5580 // case 1: 5581 // ... 5582 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5583 // ... 5584 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5585 // break; 5586 // case 2: 5587 // ... 5588 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5589 // ... 5590 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5591 // break; 5592 // default:; 5593 // } 5594 // 5595 // if SimpleReduction is true, only the next code is generated: 5596 // ... 5597 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5598 // ... 5599 5600 ASTContext &C = CGM.getContext(); 5601 5602 if (SimpleReduction) { 5603 CodeGenFunction::RunCleanupsScope Scope(CGF); 5604 auto IPriv = Privates.begin(); 5605 auto ILHS = LHSExprs.begin(); 5606 auto IRHS = RHSExprs.begin(); 5607 for (const Expr *E : ReductionOps) { 5608 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5609 cast<DeclRefExpr>(*IRHS)); 5610 ++IPriv; 5611 ++ILHS; 5612 ++IRHS; 5613 } 5614 return; 5615 } 5616 5617 // 1. Build a list of reduction variables. 5618 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5619 auto Size = RHSExprs.size(); 5620 for (const Expr *E : Privates) { 5621 if (E->getType()->isVariablyModifiedType()) 5622 // Reserve place for array size. 5623 ++Size; 5624 } 5625 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5626 QualType ReductionArrayTy = 5627 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5628 /*IndexTypeQuals=*/0); 5629 Address ReductionList = 5630 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5631 auto IPriv = Privates.begin(); 5632 unsigned Idx = 0; 5633 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5634 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5635 CGF.Builder.CreateStore( 5636 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5637 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5638 Elem); 5639 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5640 // Store array size. 5641 ++Idx; 5642 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5643 llvm::Value *Size = CGF.Builder.CreateIntCast( 5644 CGF.getVLASize( 5645 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5646 .NumElts, 5647 CGF.SizeTy, /*isSigned=*/false); 5648 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5649 Elem); 5650 } 5651 } 5652 5653 // 2. Emit reduce_func(). 5654 llvm::Function *ReductionFn = emitReductionFunction( 5655 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5656 LHSExprs, RHSExprs, ReductionOps); 5657 5658 // 3. Create static kmp_critical_name lock = { 0 }; 5659 std::string Name = getName({"reduction"}); 5660 llvm::Value *Lock = getCriticalRegionLock(Name); 5661 5662 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5663 // RedList, reduce_func, &<lock>); 5664 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5665 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5666 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5667 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5668 ReductionList.getPointer(), CGF.VoidPtrTy); 5669 llvm::Value *Args[] = { 5670 IdentTLoc, // ident_t *<loc> 5671 ThreadId, // i32 <gtid> 5672 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5673 ReductionArrayTySize, // size_type sizeof(RedList) 5674 RL, // void *RedList 5675 ReductionFn, // void (*) (void *, void *) <reduce_func> 5676 Lock // kmp_critical_name *&<lock> 5677 }; 5678 llvm::Value *Res = CGF.EmitRuntimeCall( 5679 OMPBuilder.getOrCreateRuntimeFunction( 5680 CGM.getModule(), 5681 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5682 Args); 5683 5684 // 5. Build switch(res) 5685 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5686 llvm::SwitchInst *SwInst = 5687 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5688 5689 // 6. Build case 1: 5690 // ... 5691 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5692 // ... 5693 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5694 // break; 5695 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5696 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5697 CGF.EmitBlock(Case1BB); 5698 5699 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5700 llvm::Value *EndArgs[] = { 5701 IdentTLoc, // ident_t *<loc> 5702 ThreadId, // i32 <gtid> 5703 Lock // kmp_critical_name *&<lock> 5704 }; 5705 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5706 CodeGenFunction &CGF, PrePostActionTy &Action) { 5707 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5708 auto IPriv = Privates.begin(); 5709 auto ILHS = LHSExprs.begin(); 5710 auto IRHS = RHSExprs.begin(); 5711 for (const Expr *E : ReductionOps) { 5712 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5713 cast<DeclRefExpr>(*IRHS)); 5714 ++IPriv; 5715 ++ILHS; 5716 ++IRHS; 5717 } 5718 }; 5719 RegionCodeGenTy RCG(CodeGen); 5720 CommonActionTy Action( 5721 nullptr, llvm::None, 5722 OMPBuilder.getOrCreateRuntimeFunction( 5723 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5724 : OMPRTL___kmpc_end_reduce), 5725 EndArgs); 5726 RCG.setAction(Action); 5727 RCG(CGF); 5728 5729 CGF.EmitBranch(DefaultBB); 5730 5731 // 7. Build case 2: 5732 // ... 5733 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5734 // ... 5735 // break; 5736 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5737 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5738 CGF.EmitBlock(Case2BB); 5739 5740 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5741 CodeGenFunction &CGF, PrePostActionTy &Action) { 5742 auto ILHS = LHSExprs.begin(); 5743 auto IRHS = RHSExprs.begin(); 5744 auto IPriv = Privates.begin(); 5745 for (const Expr *E : ReductionOps) { 5746 const Expr *XExpr = nullptr; 5747 const Expr *EExpr = nullptr; 5748 const Expr *UpExpr = nullptr; 5749 BinaryOperatorKind BO = BO_Comma; 5750 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5751 if (BO->getOpcode() == BO_Assign) { 5752 XExpr = BO->getLHS(); 5753 UpExpr = BO->getRHS(); 5754 } 5755 } 5756 // Try to emit update expression as a simple atomic. 5757 const Expr *RHSExpr = UpExpr; 5758 if (RHSExpr) { 5759 // Analyze RHS part of the whole expression. 5760 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5761 RHSExpr->IgnoreParenImpCasts())) { 5762 // If this is a conditional operator, analyze its condition for 5763 // min/max reduction operator. 5764 RHSExpr = ACO->getCond(); 5765 } 5766 if (const auto *BORHS = 5767 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5768 EExpr = BORHS->getRHS(); 5769 BO = BORHS->getOpcode(); 5770 } 5771 } 5772 if (XExpr) { 5773 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5774 auto &&AtomicRedGen = [BO, VD, 5775 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5776 const Expr *EExpr, const Expr *UpExpr) { 5777 LValue X = CGF.EmitLValue(XExpr); 5778 RValue E; 5779 if (EExpr) 5780 E = CGF.EmitAnyExpr(EExpr); 5781 CGF.EmitOMPAtomicSimpleUpdateExpr( 5782 X, E, BO, /*IsXLHSInRHSPart=*/true, 5783 llvm::AtomicOrdering::Monotonic, Loc, 5784 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5785 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5786 PrivateScope.addPrivate( 5787 VD, [&CGF, VD, XRValue, Loc]() { 5788 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5789 CGF.emitOMPSimpleStore( 5790 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5791 VD->getType().getNonReferenceType(), Loc); 5792 return LHSTemp; 5793 }); 5794 (void)PrivateScope.Privatize(); 5795 return CGF.EmitAnyExpr(UpExpr); 5796 }); 5797 }; 5798 if ((*IPriv)->getType()->isArrayType()) { 5799 // Emit atomic reduction for array section. 5800 const auto *RHSVar = 5801 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5802 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5803 AtomicRedGen, XExpr, EExpr, UpExpr); 5804 } else { 5805 // Emit atomic reduction for array subscript or single variable. 5806 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5807 } 5808 } else { 5809 // Emit as a critical region. 5810 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5811 const Expr *, const Expr *) { 5812 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5813 std::string Name = RT.getName({"atomic_reduction"}); 5814 RT.emitCriticalRegion( 5815 CGF, Name, 5816 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5817 Action.Enter(CGF); 5818 emitReductionCombiner(CGF, E); 5819 }, 5820 Loc); 5821 }; 5822 if ((*IPriv)->getType()->isArrayType()) { 5823 const auto *LHSVar = 5824 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5825 const auto *RHSVar = 5826 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5827 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5828 CritRedGen); 5829 } else { 5830 CritRedGen(CGF, nullptr, nullptr, nullptr); 5831 } 5832 } 5833 ++ILHS; 5834 ++IRHS; 5835 ++IPriv; 5836 } 5837 }; 5838 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5839 if (!WithNowait) { 5840 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5841 llvm::Value *EndArgs[] = { 5842 IdentTLoc, // ident_t *<loc> 5843 ThreadId, // i32 <gtid> 5844 Lock // kmp_critical_name *&<lock> 5845 }; 5846 CommonActionTy Action(nullptr, llvm::None, 5847 OMPBuilder.getOrCreateRuntimeFunction( 5848 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5849 EndArgs); 5850 AtomicRCG.setAction(Action); 5851 AtomicRCG(CGF); 5852 } else { 5853 AtomicRCG(CGF); 5854 } 5855 5856 CGF.EmitBranch(DefaultBB); 5857 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5858 } 5859 5860 /// Generates unique name for artificial threadprivate variables. 5861 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5862 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5863 const Expr *Ref) { 5864 SmallString<256> Buffer; 5865 llvm::raw_svector_ostream Out(Buffer); 5866 const clang::DeclRefExpr *DE; 5867 const VarDecl *D = ::getBaseDecl(Ref, DE); 5868 if (!D) 5869 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5870 D = D->getCanonicalDecl(); 5871 std::string Name = CGM.getOpenMPRuntime().getName( 5872 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5873 Out << Prefix << Name << "_" 5874 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5875 return std::string(Out.str()); 5876 } 5877 5878 /// Emits reduction initializer function: 5879 /// \code 5880 /// void @.red_init(void* %arg, void* %orig) { 5881 /// %0 = bitcast void* %arg to <type>* 5882 /// store <type> <init>, <type>* %0 5883 /// ret void 5884 /// } 5885 /// \endcode 5886 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5887 SourceLocation Loc, 5888 ReductionCodeGen &RCG, unsigned N) { 5889 ASTContext &C = CGM.getContext(); 5890 QualType VoidPtrTy = C.VoidPtrTy; 5891 VoidPtrTy.addRestrict(); 5892 FunctionArgList Args; 5893 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5894 ImplicitParamDecl::Other); 5895 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5896 ImplicitParamDecl::Other); 5897 Args.emplace_back(&Param); 5898 Args.emplace_back(&ParamOrig); 5899 const auto &FnInfo = 5900 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5901 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5902 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5903 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5904 Name, &CGM.getModule()); 5905 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5906 Fn->setDoesNotRecurse(); 5907 CodeGenFunction CGF(CGM); 5908 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5909 Address PrivateAddr = CGF.EmitLoadOfPointer( 5910 CGF.GetAddrOfLocalVar(&Param), 5911 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5912 llvm::Value *Size = nullptr; 5913 // If the size of the reduction item is non-constant, load it from global 5914 // threadprivate variable. 5915 if (RCG.getSizes(N).second) { 5916 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5917 CGF, CGM.getContext().getSizeType(), 5918 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5919 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5920 CGM.getContext().getSizeType(), Loc); 5921 } 5922 RCG.emitAggregateType(CGF, N, Size); 5923 Address OrigAddr = Address::invalid(); 5924 // If initializer uses initializer from declare reduction construct, emit a 5925 // pointer to the address of the original reduction item (reuired by reduction 5926 // initializer) 5927 if (RCG.usesReductionInitializer(N)) { 5928 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5929 OrigAddr = CGF.EmitLoadOfPointer( 5930 SharedAddr, 5931 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5932 } 5933 // Emit the initializer: 5934 // %0 = bitcast void* %arg to <type>* 5935 // store <type> <init>, <type>* %0 5936 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5937 [](CodeGenFunction &) { return false; }); 5938 CGF.FinishFunction(); 5939 return Fn; 5940 } 5941 5942 /// Emits reduction combiner function: 5943 /// \code 5944 /// void @.red_comb(void* %arg0, void* %arg1) { 5945 /// %lhs = bitcast void* %arg0 to <type>* 5946 /// %rhs = bitcast void* %arg1 to <type>* 5947 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5948 /// store <type> %2, <type>* %lhs 5949 /// ret void 5950 /// } 5951 /// \endcode 5952 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5953 SourceLocation Loc, 5954 ReductionCodeGen &RCG, unsigned N, 5955 const Expr *ReductionOp, 5956 const Expr *LHS, const Expr *RHS, 5957 const Expr *PrivateRef) { 5958 ASTContext &C = CGM.getContext(); 5959 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5960 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5961 FunctionArgList Args; 5962 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5963 C.VoidPtrTy, ImplicitParamDecl::Other); 5964 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5965 ImplicitParamDecl::Other); 5966 Args.emplace_back(&ParamInOut); 5967 Args.emplace_back(&ParamIn); 5968 const auto &FnInfo = 5969 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5970 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5971 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5972 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5973 Name, &CGM.getModule()); 5974 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5975 Fn->setDoesNotRecurse(); 5976 CodeGenFunction CGF(CGM); 5977 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5978 llvm::Value *Size = nullptr; 5979 // If the size of the reduction item is non-constant, load it from global 5980 // threadprivate variable. 5981 if (RCG.getSizes(N).second) { 5982 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5983 CGF, CGM.getContext().getSizeType(), 5984 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5985 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5986 CGM.getContext().getSizeType(), Loc); 5987 } 5988 RCG.emitAggregateType(CGF, N, Size); 5989 // Remap lhs and rhs variables to the addresses of the function arguments. 5990 // %lhs = bitcast void* %arg0 to <type>* 5991 // %rhs = bitcast void* %arg1 to <type>* 5992 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5993 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5994 // Pull out the pointer to the variable. 5995 Address PtrAddr = CGF.EmitLoadOfPointer( 5996 CGF.GetAddrOfLocalVar(&ParamInOut), 5997 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5998 return CGF.Builder.CreateElementBitCast( 5999 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6000 }); 6001 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6002 // Pull out the pointer to the variable. 6003 Address PtrAddr = CGF.EmitLoadOfPointer( 6004 CGF.GetAddrOfLocalVar(&ParamIn), 6005 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6006 return CGF.Builder.CreateElementBitCast( 6007 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6008 }); 6009 PrivateScope.Privatize(); 6010 // Emit the combiner body: 6011 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6012 // store <type> %2, <type>* %lhs 6013 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6014 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6015 cast<DeclRefExpr>(RHS)); 6016 CGF.FinishFunction(); 6017 return Fn; 6018 } 6019 6020 /// Emits reduction finalizer function: 6021 /// \code 6022 /// void @.red_fini(void* %arg) { 6023 /// %0 = bitcast void* %arg to <type>* 6024 /// <destroy>(<type>* %0) 6025 /// ret void 6026 /// } 6027 /// \endcode 6028 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6029 SourceLocation Loc, 6030 ReductionCodeGen &RCG, unsigned N) { 6031 if (!RCG.needCleanups(N)) 6032 return nullptr; 6033 ASTContext &C = CGM.getContext(); 6034 FunctionArgList Args; 6035 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6036 ImplicitParamDecl::Other); 6037 Args.emplace_back(&Param); 6038 const auto &FnInfo = 6039 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6040 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6041 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6042 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6043 Name, &CGM.getModule()); 6044 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6045 Fn->setDoesNotRecurse(); 6046 CodeGenFunction CGF(CGM); 6047 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6048 Address PrivateAddr = CGF.EmitLoadOfPointer( 6049 CGF.GetAddrOfLocalVar(&Param), 6050 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6051 llvm::Value *Size = nullptr; 6052 // If the size of the reduction item is non-constant, load it from global 6053 // threadprivate variable. 6054 if (RCG.getSizes(N).second) { 6055 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6056 CGF, CGM.getContext().getSizeType(), 6057 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6058 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6059 CGM.getContext().getSizeType(), Loc); 6060 } 6061 RCG.emitAggregateType(CGF, N, Size); 6062 // Emit the finalizer body: 6063 // <destroy>(<type>* %0) 6064 RCG.emitCleanups(CGF, N, PrivateAddr); 6065 CGF.FinishFunction(Loc); 6066 return Fn; 6067 } 6068 6069 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6070 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6071 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6072 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6073 return nullptr; 6074 6075 // Build typedef struct: 6076 // kmp_taskred_input { 6077 // void *reduce_shar; // shared reduction item 6078 // void *reduce_orig; // original reduction item used for initialization 6079 // size_t reduce_size; // size of data item 6080 // void *reduce_init; // data initialization routine 6081 // void *reduce_fini; // data finalization routine 6082 // void *reduce_comb; // data combiner routine 6083 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6084 // } kmp_taskred_input_t; 6085 ASTContext &C = CGM.getContext(); 6086 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6087 RD->startDefinition(); 6088 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6089 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6090 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6091 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6092 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6093 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6094 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6095 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6096 RD->completeDefinition(); 6097 QualType RDType = C.getRecordType(RD); 6098 unsigned Size = Data.ReductionVars.size(); 6099 llvm::APInt ArraySize(/*numBits=*/64, Size); 6100 QualType ArrayRDType = C.getConstantArrayType( 6101 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6102 // kmp_task_red_input_t .rd_input.[Size]; 6103 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6104 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6105 Data.ReductionCopies, Data.ReductionOps); 6106 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6107 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6108 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6109 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6110 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6111 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 6112 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6113 ".rd_input.gep."); 6114 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6115 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6116 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6117 RCG.emitSharedOrigLValue(CGF, Cnt); 6118 llvm::Value *CastedShared = 6119 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6120 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6121 // ElemLVal.reduce_orig = &Origs[Cnt]; 6122 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6123 llvm::Value *CastedOrig = 6124 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6125 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6126 RCG.emitAggregateType(CGF, Cnt); 6127 llvm::Value *SizeValInChars; 6128 llvm::Value *SizeVal; 6129 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6130 // We use delayed creation/initialization for VLAs and array sections. It is 6131 // required because runtime does not provide the way to pass the sizes of 6132 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6133 // threadprivate global variables are used to store these values and use 6134 // them in the functions. 6135 bool DelayedCreation = !!SizeVal; 6136 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6137 /*isSigned=*/false); 6138 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6139 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6140 // ElemLVal.reduce_init = init; 6141 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6142 llvm::Value *InitAddr = 6143 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6144 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6145 // ElemLVal.reduce_fini = fini; 6146 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6147 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6148 llvm::Value *FiniAddr = Fini 6149 ? CGF.EmitCastToVoidPtr(Fini) 6150 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6151 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6152 // ElemLVal.reduce_comb = comb; 6153 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6154 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6155 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6156 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6157 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6158 // ElemLVal.flags = 0; 6159 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6160 if (DelayedCreation) { 6161 CGF.EmitStoreOfScalar( 6162 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6163 FlagsLVal); 6164 } else 6165 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6166 FlagsLVal.getType()); 6167 } 6168 if (Data.IsReductionWithTaskMod) { 6169 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6170 // is_ws, int num, void *data); 6171 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6172 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6173 CGM.IntTy, /*isSigned=*/true); 6174 llvm::Value *Args[] = { 6175 IdentTLoc, GTid, 6176 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6177 /*isSigned=*/true), 6178 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6179 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6180 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6181 return CGF.EmitRuntimeCall( 6182 OMPBuilder.getOrCreateRuntimeFunction( 6183 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6184 Args); 6185 } 6186 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6187 llvm::Value *Args[] = { 6188 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6189 /*isSigned=*/true), 6190 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6191 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6192 CGM.VoidPtrTy)}; 6193 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6194 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6195 Args); 6196 } 6197 6198 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6199 SourceLocation Loc, 6200 bool IsWorksharingReduction) { 6201 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6202 // is_ws, int num, void *data); 6203 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6204 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6205 CGM.IntTy, /*isSigned=*/true); 6206 llvm::Value *Args[] = {IdentTLoc, GTid, 6207 llvm::ConstantInt::get(CGM.IntTy, 6208 IsWorksharingReduction ? 1 : 0, 6209 /*isSigned=*/true)}; 6210 (void)CGF.EmitRuntimeCall( 6211 OMPBuilder.getOrCreateRuntimeFunction( 6212 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6213 Args); 6214 } 6215 6216 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6217 SourceLocation Loc, 6218 ReductionCodeGen &RCG, 6219 unsigned N) { 6220 auto Sizes = RCG.getSizes(N); 6221 // Emit threadprivate global variable if the type is non-constant 6222 // (Sizes.second = nullptr). 6223 if (Sizes.second) { 6224 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6225 /*isSigned=*/false); 6226 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6227 CGF, CGM.getContext().getSizeType(), 6228 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6229 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6230 } 6231 } 6232 6233 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6234 SourceLocation Loc, 6235 llvm::Value *ReductionsPtr, 6236 LValue SharedLVal) { 6237 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6238 // *d); 6239 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6240 CGM.IntTy, 6241 /*isSigned=*/true), 6242 ReductionsPtr, 6243 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6244 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6245 return Address::deprecated( 6246 CGF.EmitRuntimeCall( 6247 OMPBuilder.getOrCreateRuntimeFunction( 6248 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6249 Args), 6250 SharedLVal.getAlignment()); 6251 } 6252 6253 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6254 const OMPTaskDataTy &Data) { 6255 if (!CGF.HaveInsertPoint()) 6256 return; 6257 6258 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6259 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6260 OMPBuilder.createTaskwait(CGF.Builder); 6261 } else { 6262 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6263 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6264 auto &M = CGM.getModule(); 6265 Address DependenciesArray = Address::invalid(); 6266 llvm::Value *NumOfElements; 6267 std::tie(NumOfElements, DependenciesArray) = 6268 emitDependClause(CGF, Data.Dependences, Loc); 6269 llvm::Value *DepWaitTaskArgs[6]; 6270 if (!Data.Dependences.empty()) { 6271 DepWaitTaskArgs[0] = UpLoc; 6272 DepWaitTaskArgs[1] = ThreadID; 6273 DepWaitTaskArgs[2] = NumOfElements; 6274 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6275 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6276 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6277 6278 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6279 6280 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6281 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6282 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6283 // is specified. 6284 CGF.EmitRuntimeCall( 6285 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6286 DepWaitTaskArgs); 6287 6288 } else { 6289 6290 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6291 // global_tid); 6292 llvm::Value *Args[] = {UpLoc, ThreadID}; 6293 // Ignore return result until untied tasks are supported. 6294 CGF.EmitRuntimeCall( 6295 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6296 Args); 6297 } 6298 } 6299 6300 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6301 Region->emitUntiedSwitch(CGF); 6302 } 6303 6304 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6305 OpenMPDirectiveKind InnerKind, 6306 const RegionCodeGenTy &CodeGen, 6307 bool HasCancel) { 6308 if (!CGF.HaveInsertPoint()) 6309 return; 6310 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6311 InnerKind != OMPD_critical && 6312 InnerKind != OMPD_master && 6313 InnerKind != OMPD_masked); 6314 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6315 } 6316 6317 namespace { 6318 enum RTCancelKind { 6319 CancelNoreq = 0, 6320 CancelParallel = 1, 6321 CancelLoop = 2, 6322 CancelSections = 3, 6323 CancelTaskgroup = 4 6324 }; 6325 } // anonymous namespace 6326 6327 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6328 RTCancelKind CancelKind = CancelNoreq; 6329 if (CancelRegion == OMPD_parallel) 6330 CancelKind = CancelParallel; 6331 else if (CancelRegion == OMPD_for) 6332 CancelKind = CancelLoop; 6333 else if (CancelRegion == OMPD_sections) 6334 CancelKind = CancelSections; 6335 else { 6336 assert(CancelRegion == OMPD_taskgroup); 6337 CancelKind = CancelTaskgroup; 6338 } 6339 return CancelKind; 6340 } 6341 6342 void CGOpenMPRuntime::emitCancellationPointCall( 6343 CodeGenFunction &CGF, SourceLocation Loc, 6344 OpenMPDirectiveKind CancelRegion) { 6345 if (!CGF.HaveInsertPoint()) 6346 return; 6347 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6348 // global_tid, kmp_int32 cncl_kind); 6349 if (auto *OMPRegionInfo = 6350 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6351 // For 'cancellation point taskgroup', the task region info may not have a 6352 // cancel. This may instead happen in another adjacent task. 6353 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6354 llvm::Value *Args[] = { 6355 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6356 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6357 // Ignore return result until untied tasks are supported. 6358 llvm::Value *Result = CGF.EmitRuntimeCall( 6359 OMPBuilder.getOrCreateRuntimeFunction( 6360 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6361 Args); 6362 // if (__kmpc_cancellationpoint()) { 6363 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6364 // exit from construct; 6365 // } 6366 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6367 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6368 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6369 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6370 CGF.EmitBlock(ExitBB); 6371 if (CancelRegion == OMPD_parallel) 6372 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6373 // exit from construct; 6374 CodeGenFunction::JumpDest CancelDest = 6375 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6376 CGF.EmitBranchThroughCleanup(CancelDest); 6377 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6378 } 6379 } 6380 } 6381 6382 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6383 const Expr *IfCond, 6384 OpenMPDirectiveKind CancelRegion) { 6385 if (!CGF.HaveInsertPoint()) 6386 return; 6387 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6388 // kmp_int32 cncl_kind); 6389 auto &M = CGM.getModule(); 6390 if (auto *OMPRegionInfo = 6391 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6392 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6393 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6394 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6395 llvm::Value *Args[] = { 6396 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6397 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6398 // Ignore return result until untied tasks are supported. 6399 llvm::Value *Result = CGF.EmitRuntimeCall( 6400 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6401 // if (__kmpc_cancel()) { 6402 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6403 // exit from construct; 6404 // } 6405 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6406 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6407 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6408 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6409 CGF.EmitBlock(ExitBB); 6410 if (CancelRegion == OMPD_parallel) 6411 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6412 // exit from construct; 6413 CodeGenFunction::JumpDest CancelDest = 6414 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6415 CGF.EmitBranchThroughCleanup(CancelDest); 6416 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6417 }; 6418 if (IfCond) { 6419 emitIfClause(CGF, IfCond, ThenGen, 6420 [](CodeGenFunction &, PrePostActionTy &) {}); 6421 } else { 6422 RegionCodeGenTy ThenRCG(ThenGen); 6423 ThenRCG(CGF); 6424 } 6425 } 6426 } 6427 6428 namespace { 6429 /// Cleanup action for uses_allocators support. 6430 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6431 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6432 6433 public: 6434 OMPUsesAllocatorsActionTy( 6435 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6436 : Allocators(Allocators) {} 6437 void Enter(CodeGenFunction &CGF) override { 6438 if (!CGF.HaveInsertPoint()) 6439 return; 6440 for (const auto &AllocatorData : Allocators) { 6441 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6442 CGF, AllocatorData.first, AllocatorData.second); 6443 } 6444 } 6445 void Exit(CodeGenFunction &CGF) override { 6446 if (!CGF.HaveInsertPoint()) 6447 return; 6448 for (const auto &AllocatorData : Allocators) { 6449 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6450 AllocatorData.first); 6451 } 6452 } 6453 }; 6454 } // namespace 6455 6456 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6457 const OMPExecutableDirective &D, StringRef ParentName, 6458 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6459 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6460 assert(!ParentName.empty() && "Invalid target region parent name!"); 6461 HasEmittedTargetRegion = true; 6462 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6463 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6464 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6465 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6466 if (!D.AllocatorTraits) 6467 continue; 6468 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6469 } 6470 } 6471 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6472 CodeGen.setAction(UsesAllocatorAction); 6473 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6474 IsOffloadEntry, CodeGen); 6475 } 6476 6477 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6478 const Expr *Allocator, 6479 const Expr *AllocatorTraits) { 6480 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6481 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6482 // Use default memspace handle. 6483 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6484 llvm::Value *NumTraits = llvm::ConstantInt::get( 6485 CGF.IntTy, cast<ConstantArrayType>( 6486 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6487 ->getSize() 6488 .getLimitedValue()); 6489 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6490 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6491 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6492 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6493 AllocatorTraitsLVal.getBaseInfo(), 6494 AllocatorTraitsLVal.getTBAAInfo()); 6495 llvm::Value *Traits = 6496 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6497 6498 llvm::Value *AllocatorVal = 6499 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6500 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6501 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6502 // Store to allocator. 6503 CGF.EmitVarDecl(*cast<VarDecl>( 6504 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6505 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6506 AllocatorVal = 6507 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6508 Allocator->getType(), Allocator->getExprLoc()); 6509 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6510 } 6511 6512 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6513 const Expr *Allocator) { 6514 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6515 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6516 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6517 llvm::Value *AllocatorVal = 6518 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6519 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6520 CGF.getContext().VoidPtrTy, 6521 Allocator->getExprLoc()); 6522 (void)CGF.EmitRuntimeCall( 6523 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6524 OMPRTL___kmpc_destroy_allocator), 6525 {ThreadId, AllocatorVal}); 6526 } 6527 6528 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6529 const OMPExecutableDirective &D, StringRef ParentName, 6530 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6531 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6532 // Create a unique name for the entry function using the source location 6533 // information of the current target region. The name will be something like: 6534 // 6535 // __omp_offloading_DD_FFFF_PP_lBB 6536 // 6537 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6538 // mangled name of the function that encloses the target region and BB is the 6539 // line number of the target region. 6540 6541 unsigned DeviceID; 6542 unsigned FileID; 6543 unsigned Line; 6544 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6545 Line); 6546 SmallString<64> EntryFnName; 6547 { 6548 llvm::raw_svector_ostream OS(EntryFnName); 6549 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6550 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6551 } 6552 6553 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6554 6555 CodeGenFunction CGF(CGM, true); 6556 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6557 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6558 6559 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6560 6561 // If this target outline function is not an offload entry, we don't need to 6562 // register it. 6563 if (!IsOffloadEntry) 6564 return; 6565 6566 // The target region ID is used by the runtime library to identify the current 6567 // target region, so it only has to be unique and not necessarily point to 6568 // anything. It could be the pointer to the outlined function that implements 6569 // the target region, but we aren't using that so that the compiler doesn't 6570 // need to keep that, and could therefore inline the host function if proven 6571 // worthwhile during optimization. In the other hand, if emitting code for the 6572 // device, the ID has to be the function address so that it can retrieved from 6573 // the offloading entry and launched by the runtime library. We also mark the 6574 // outlined function to have external linkage in case we are emitting code for 6575 // the device, because these functions will be entry points to the device. 6576 6577 if (CGM.getLangOpts().OpenMPIsDevice) { 6578 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6579 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6580 OutlinedFn->setDSOLocal(false); 6581 if (CGM.getTriple().isAMDGCN()) 6582 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6583 } else { 6584 std::string Name = getName({EntryFnName, "region_id"}); 6585 OutlinedFnID = new llvm::GlobalVariable( 6586 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6587 llvm::GlobalValue::WeakAnyLinkage, 6588 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6589 } 6590 6591 // Register the information for the entry associated with this target region. 6592 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6593 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6594 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6595 6596 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6597 int32_t DefaultValTeams = -1; 6598 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6599 if (DefaultValTeams > 0) { 6600 OutlinedFn->addFnAttr("omp_target_num_teams", 6601 std::to_string(DefaultValTeams)); 6602 } 6603 int32_t DefaultValThreads = -1; 6604 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6605 if (DefaultValThreads > 0) { 6606 OutlinedFn->addFnAttr("omp_target_thread_limit", 6607 std::to_string(DefaultValThreads)); 6608 } 6609 6610 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6611 } 6612 6613 /// Checks if the expression is constant or does not have non-trivial function 6614 /// calls. 6615 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6616 // We can skip constant expressions. 6617 // We can skip expressions with trivial calls or simple expressions. 6618 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6619 !E->hasNonTrivialCall(Ctx)) && 6620 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6621 } 6622 6623 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6624 const Stmt *Body) { 6625 const Stmt *Child = Body->IgnoreContainers(); 6626 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6627 Child = nullptr; 6628 for (const Stmt *S : C->body()) { 6629 if (const auto *E = dyn_cast<Expr>(S)) { 6630 if (isTrivial(Ctx, E)) 6631 continue; 6632 } 6633 // Some of the statements can be ignored. 6634 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6635 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6636 continue; 6637 // Analyze declarations. 6638 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6639 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6640 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6641 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6642 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6643 isa<UsingDirectiveDecl>(D) || 6644 isa<OMPDeclareReductionDecl>(D) || 6645 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6646 return true; 6647 const auto *VD = dyn_cast<VarDecl>(D); 6648 if (!VD) 6649 return false; 6650 return VD->hasGlobalStorage() || !VD->isUsed(); 6651 })) 6652 continue; 6653 } 6654 // Found multiple children - cannot get the one child only. 6655 if (Child) 6656 return nullptr; 6657 Child = S; 6658 } 6659 if (Child) 6660 Child = Child->IgnoreContainers(); 6661 } 6662 return Child; 6663 } 6664 6665 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6666 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6667 int32_t &DefaultVal) { 6668 6669 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6670 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6671 "Expected target-based executable directive."); 6672 switch (DirectiveKind) { 6673 case OMPD_target: { 6674 const auto *CS = D.getInnermostCapturedStmt(); 6675 const auto *Body = 6676 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6677 const Stmt *ChildStmt = 6678 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6679 if (const auto *NestedDir = 6680 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6681 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6682 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6683 const Expr *NumTeams = 6684 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6685 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6686 if (auto Constant = 6687 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6688 DefaultVal = Constant->getExtValue(); 6689 return NumTeams; 6690 } 6691 DefaultVal = 0; 6692 return nullptr; 6693 } 6694 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6695 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6696 DefaultVal = 1; 6697 return nullptr; 6698 } 6699 DefaultVal = 1; 6700 return nullptr; 6701 } 6702 // A value of -1 is used to check if we need to emit no teams region 6703 DefaultVal = -1; 6704 return nullptr; 6705 } 6706 case OMPD_target_teams: 6707 case OMPD_target_teams_distribute: 6708 case OMPD_target_teams_distribute_simd: 6709 case OMPD_target_teams_distribute_parallel_for: 6710 case OMPD_target_teams_distribute_parallel_for_simd: { 6711 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6712 const Expr *NumTeams = 6713 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6714 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6715 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6716 DefaultVal = Constant->getExtValue(); 6717 return NumTeams; 6718 } 6719 DefaultVal = 0; 6720 return nullptr; 6721 } 6722 case OMPD_target_parallel: 6723 case OMPD_target_parallel_for: 6724 case OMPD_target_parallel_for_simd: 6725 case OMPD_target_simd: 6726 DefaultVal = 1; 6727 return nullptr; 6728 case OMPD_parallel: 6729 case OMPD_for: 6730 case OMPD_parallel_for: 6731 case OMPD_parallel_master: 6732 case OMPD_parallel_sections: 6733 case OMPD_for_simd: 6734 case OMPD_parallel_for_simd: 6735 case OMPD_cancel: 6736 case OMPD_cancellation_point: 6737 case OMPD_ordered: 6738 case OMPD_threadprivate: 6739 case OMPD_allocate: 6740 case OMPD_task: 6741 case OMPD_simd: 6742 case OMPD_tile: 6743 case OMPD_unroll: 6744 case OMPD_sections: 6745 case OMPD_section: 6746 case OMPD_single: 6747 case OMPD_master: 6748 case OMPD_critical: 6749 case OMPD_taskyield: 6750 case OMPD_barrier: 6751 case OMPD_taskwait: 6752 case OMPD_taskgroup: 6753 case OMPD_atomic: 6754 case OMPD_flush: 6755 case OMPD_depobj: 6756 case OMPD_scan: 6757 case OMPD_teams: 6758 case OMPD_target_data: 6759 case OMPD_target_exit_data: 6760 case OMPD_target_enter_data: 6761 case OMPD_distribute: 6762 case OMPD_distribute_simd: 6763 case OMPD_distribute_parallel_for: 6764 case OMPD_distribute_parallel_for_simd: 6765 case OMPD_teams_distribute: 6766 case OMPD_teams_distribute_simd: 6767 case OMPD_teams_distribute_parallel_for: 6768 case OMPD_teams_distribute_parallel_for_simd: 6769 case OMPD_target_update: 6770 case OMPD_declare_simd: 6771 case OMPD_declare_variant: 6772 case OMPD_begin_declare_variant: 6773 case OMPD_end_declare_variant: 6774 case OMPD_declare_target: 6775 case OMPD_end_declare_target: 6776 case OMPD_declare_reduction: 6777 case OMPD_declare_mapper: 6778 case OMPD_taskloop: 6779 case OMPD_taskloop_simd: 6780 case OMPD_master_taskloop: 6781 case OMPD_master_taskloop_simd: 6782 case OMPD_parallel_master_taskloop: 6783 case OMPD_parallel_master_taskloop_simd: 6784 case OMPD_requires: 6785 case OMPD_metadirective: 6786 case OMPD_unknown: 6787 break; 6788 default: 6789 break; 6790 } 6791 llvm_unreachable("Unexpected directive kind."); 6792 } 6793 6794 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6795 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6796 assert(!CGF.getLangOpts().OpenMPIsDevice && 6797 "Clauses associated with the teams directive expected to be emitted " 6798 "only for the host!"); 6799 CGBuilderTy &Bld = CGF.Builder; 6800 int32_t DefaultNT = -1; 6801 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6802 if (NumTeams != nullptr) { 6803 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6804 6805 switch (DirectiveKind) { 6806 case OMPD_target: { 6807 const auto *CS = D.getInnermostCapturedStmt(); 6808 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6809 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6810 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6811 /*IgnoreResultAssign*/ true); 6812 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6813 /*isSigned=*/true); 6814 } 6815 case OMPD_target_teams: 6816 case OMPD_target_teams_distribute: 6817 case OMPD_target_teams_distribute_simd: 6818 case OMPD_target_teams_distribute_parallel_for: 6819 case OMPD_target_teams_distribute_parallel_for_simd: { 6820 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6821 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6822 /*IgnoreResultAssign*/ true); 6823 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6824 /*isSigned=*/true); 6825 } 6826 default: 6827 break; 6828 } 6829 } else if (DefaultNT == -1) { 6830 return nullptr; 6831 } 6832 6833 return Bld.getInt32(DefaultNT); 6834 } 6835 6836 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6837 llvm::Value *DefaultThreadLimitVal) { 6838 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6839 CGF.getContext(), CS->getCapturedStmt()); 6840 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6841 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6842 llvm::Value *NumThreads = nullptr; 6843 llvm::Value *CondVal = nullptr; 6844 // Handle if clause. If if clause present, the number of threads is 6845 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6846 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6847 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6848 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6849 const OMPIfClause *IfClause = nullptr; 6850 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6851 if (C->getNameModifier() == OMPD_unknown || 6852 C->getNameModifier() == OMPD_parallel) { 6853 IfClause = C; 6854 break; 6855 } 6856 } 6857 if (IfClause) { 6858 const Expr *Cond = IfClause->getCondition(); 6859 bool Result; 6860 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6861 if (!Result) 6862 return CGF.Builder.getInt32(1); 6863 } else { 6864 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6865 if (const auto *PreInit = 6866 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6867 for (const auto *I : PreInit->decls()) { 6868 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6869 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6870 } else { 6871 CodeGenFunction::AutoVarEmission Emission = 6872 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6873 CGF.EmitAutoVarCleanups(Emission); 6874 } 6875 } 6876 } 6877 CondVal = CGF.EvaluateExprAsBool(Cond); 6878 } 6879 } 6880 } 6881 // Check the value of num_threads clause iff if clause was not specified 6882 // or is not evaluated to false. 6883 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6884 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6885 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6886 const auto *NumThreadsClause = 6887 Dir->getSingleClause<OMPNumThreadsClause>(); 6888 CodeGenFunction::LexicalScope Scope( 6889 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6890 if (const auto *PreInit = 6891 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6892 for (const auto *I : PreInit->decls()) { 6893 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6894 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6895 } else { 6896 CodeGenFunction::AutoVarEmission Emission = 6897 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6898 CGF.EmitAutoVarCleanups(Emission); 6899 } 6900 } 6901 } 6902 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6903 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6904 /*isSigned=*/false); 6905 if (DefaultThreadLimitVal) 6906 NumThreads = CGF.Builder.CreateSelect( 6907 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6908 DefaultThreadLimitVal, NumThreads); 6909 } else { 6910 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6911 : CGF.Builder.getInt32(0); 6912 } 6913 // Process condition of the if clause. 6914 if (CondVal) { 6915 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6916 CGF.Builder.getInt32(1)); 6917 } 6918 return NumThreads; 6919 } 6920 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6921 return CGF.Builder.getInt32(1); 6922 return DefaultThreadLimitVal; 6923 } 6924 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6925 : CGF.Builder.getInt32(0); 6926 } 6927 6928 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6929 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6930 int32_t &DefaultVal) { 6931 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6932 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6933 "Expected target-based executable directive."); 6934 6935 switch (DirectiveKind) { 6936 case OMPD_target: 6937 // Teams have no clause thread_limit 6938 return nullptr; 6939 case OMPD_target_teams: 6940 case OMPD_target_teams_distribute: 6941 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6942 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6943 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6944 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6945 if (auto Constant = 6946 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6947 DefaultVal = Constant->getExtValue(); 6948 return ThreadLimit; 6949 } 6950 return nullptr; 6951 case OMPD_target_parallel: 6952 case OMPD_target_parallel_for: 6953 case OMPD_target_parallel_for_simd: 6954 case OMPD_target_teams_distribute_parallel_for: 6955 case OMPD_target_teams_distribute_parallel_for_simd: { 6956 Expr *ThreadLimit = nullptr; 6957 Expr *NumThreads = nullptr; 6958 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6959 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6960 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6961 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6962 if (auto Constant = 6963 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6964 DefaultVal = Constant->getExtValue(); 6965 } 6966 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6967 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6968 NumThreads = NumThreadsClause->getNumThreads(); 6969 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6970 if (auto Constant = 6971 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6972 if (Constant->getExtValue() < DefaultVal) { 6973 DefaultVal = Constant->getExtValue(); 6974 ThreadLimit = NumThreads; 6975 } 6976 } 6977 } 6978 } 6979 return ThreadLimit; 6980 } 6981 case OMPD_target_teams_distribute_simd: 6982 case OMPD_target_simd: 6983 DefaultVal = 1; 6984 return nullptr; 6985 case OMPD_parallel: 6986 case OMPD_for: 6987 case OMPD_parallel_for: 6988 case OMPD_parallel_master: 6989 case OMPD_parallel_sections: 6990 case OMPD_for_simd: 6991 case OMPD_parallel_for_simd: 6992 case OMPD_cancel: 6993 case OMPD_cancellation_point: 6994 case OMPD_ordered: 6995 case OMPD_threadprivate: 6996 case OMPD_allocate: 6997 case OMPD_task: 6998 case OMPD_simd: 6999 case OMPD_tile: 7000 case OMPD_unroll: 7001 case OMPD_sections: 7002 case OMPD_section: 7003 case OMPD_single: 7004 case OMPD_master: 7005 case OMPD_critical: 7006 case OMPD_taskyield: 7007 case OMPD_barrier: 7008 case OMPD_taskwait: 7009 case OMPD_taskgroup: 7010 case OMPD_atomic: 7011 case OMPD_flush: 7012 case OMPD_depobj: 7013 case OMPD_scan: 7014 case OMPD_teams: 7015 case OMPD_target_data: 7016 case OMPD_target_exit_data: 7017 case OMPD_target_enter_data: 7018 case OMPD_distribute: 7019 case OMPD_distribute_simd: 7020 case OMPD_distribute_parallel_for: 7021 case OMPD_distribute_parallel_for_simd: 7022 case OMPD_teams_distribute: 7023 case OMPD_teams_distribute_simd: 7024 case OMPD_teams_distribute_parallel_for: 7025 case OMPD_teams_distribute_parallel_for_simd: 7026 case OMPD_target_update: 7027 case OMPD_declare_simd: 7028 case OMPD_declare_variant: 7029 case OMPD_begin_declare_variant: 7030 case OMPD_end_declare_variant: 7031 case OMPD_declare_target: 7032 case OMPD_end_declare_target: 7033 case OMPD_declare_reduction: 7034 case OMPD_declare_mapper: 7035 case OMPD_taskloop: 7036 case OMPD_taskloop_simd: 7037 case OMPD_master_taskloop: 7038 case OMPD_master_taskloop_simd: 7039 case OMPD_parallel_master_taskloop: 7040 case OMPD_parallel_master_taskloop_simd: 7041 case OMPD_requires: 7042 case OMPD_unknown: 7043 break; 7044 default: 7045 break; 7046 } 7047 llvm_unreachable("Unsupported directive kind."); 7048 } 7049 7050 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7051 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7052 assert(!CGF.getLangOpts().OpenMPIsDevice && 7053 "Clauses associated with the teams directive expected to be emitted " 7054 "only for the host!"); 7055 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7056 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7057 "Expected target-based executable directive."); 7058 CGBuilderTy &Bld = CGF.Builder; 7059 llvm::Value *ThreadLimitVal = nullptr; 7060 llvm::Value *NumThreadsVal = nullptr; 7061 switch (DirectiveKind) { 7062 case OMPD_target: { 7063 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7064 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7065 return NumThreads; 7066 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7067 CGF.getContext(), CS->getCapturedStmt()); 7068 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7069 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7070 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7071 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7072 const auto *ThreadLimitClause = 7073 Dir->getSingleClause<OMPThreadLimitClause>(); 7074 CodeGenFunction::LexicalScope Scope( 7075 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7076 if (const auto *PreInit = 7077 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7078 for (const auto *I : PreInit->decls()) { 7079 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7080 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7081 } else { 7082 CodeGenFunction::AutoVarEmission Emission = 7083 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7084 CGF.EmitAutoVarCleanups(Emission); 7085 } 7086 } 7087 } 7088 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7089 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7090 ThreadLimitVal = 7091 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7092 } 7093 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7094 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7095 CS = Dir->getInnermostCapturedStmt(); 7096 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7097 CGF.getContext(), CS->getCapturedStmt()); 7098 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7099 } 7100 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7101 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7102 CS = Dir->getInnermostCapturedStmt(); 7103 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7104 return NumThreads; 7105 } 7106 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7107 return Bld.getInt32(1); 7108 } 7109 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7110 } 7111 case OMPD_target_teams: { 7112 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7113 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7114 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7115 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7116 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7117 ThreadLimitVal = 7118 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7119 } 7120 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7121 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7122 return NumThreads; 7123 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7124 CGF.getContext(), CS->getCapturedStmt()); 7125 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7126 if (Dir->getDirectiveKind() == OMPD_distribute) { 7127 CS = Dir->getInnermostCapturedStmt(); 7128 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7129 return NumThreads; 7130 } 7131 } 7132 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7133 } 7134 case OMPD_target_teams_distribute: 7135 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7136 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7137 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7138 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7139 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7140 ThreadLimitVal = 7141 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7142 } 7143 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7144 case OMPD_target_parallel: 7145 case OMPD_target_parallel_for: 7146 case OMPD_target_parallel_for_simd: 7147 case OMPD_target_teams_distribute_parallel_for: 7148 case OMPD_target_teams_distribute_parallel_for_simd: { 7149 llvm::Value *CondVal = nullptr; 7150 // Handle if clause. If if clause present, the number of threads is 7151 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7152 if (D.hasClausesOfKind<OMPIfClause>()) { 7153 const OMPIfClause *IfClause = nullptr; 7154 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7155 if (C->getNameModifier() == OMPD_unknown || 7156 C->getNameModifier() == OMPD_parallel) { 7157 IfClause = C; 7158 break; 7159 } 7160 } 7161 if (IfClause) { 7162 const Expr *Cond = IfClause->getCondition(); 7163 bool Result; 7164 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7165 if (!Result) 7166 return Bld.getInt32(1); 7167 } else { 7168 CodeGenFunction::RunCleanupsScope Scope(CGF); 7169 CondVal = CGF.EvaluateExprAsBool(Cond); 7170 } 7171 } 7172 } 7173 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7174 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7175 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7176 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7177 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7178 ThreadLimitVal = 7179 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7180 } 7181 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7182 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7183 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7184 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7185 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7186 NumThreadsVal = 7187 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7188 ThreadLimitVal = ThreadLimitVal 7189 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7190 ThreadLimitVal), 7191 NumThreadsVal, ThreadLimitVal) 7192 : NumThreadsVal; 7193 } 7194 if (!ThreadLimitVal) 7195 ThreadLimitVal = Bld.getInt32(0); 7196 if (CondVal) 7197 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7198 return ThreadLimitVal; 7199 } 7200 case OMPD_target_teams_distribute_simd: 7201 case OMPD_target_simd: 7202 return Bld.getInt32(1); 7203 case OMPD_parallel: 7204 case OMPD_for: 7205 case OMPD_parallel_for: 7206 case OMPD_parallel_master: 7207 case OMPD_parallel_sections: 7208 case OMPD_for_simd: 7209 case OMPD_parallel_for_simd: 7210 case OMPD_cancel: 7211 case OMPD_cancellation_point: 7212 case OMPD_ordered: 7213 case OMPD_threadprivate: 7214 case OMPD_allocate: 7215 case OMPD_task: 7216 case OMPD_simd: 7217 case OMPD_tile: 7218 case OMPD_unroll: 7219 case OMPD_sections: 7220 case OMPD_section: 7221 case OMPD_single: 7222 case OMPD_master: 7223 case OMPD_critical: 7224 case OMPD_taskyield: 7225 case OMPD_barrier: 7226 case OMPD_taskwait: 7227 case OMPD_taskgroup: 7228 case OMPD_atomic: 7229 case OMPD_flush: 7230 case OMPD_depobj: 7231 case OMPD_scan: 7232 case OMPD_teams: 7233 case OMPD_target_data: 7234 case OMPD_target_exit_data: 7235 case OMPD_target_enter_data: 7236 case OMPD_distribute: 7237 case OMPD_distribute_simd: 7238 case OMPD_distribute_parallel_for: 7239 case OMPD_distribute_parallel_for_simd: 7240 case OMPD_teams_distribute: 7241 case OMPD_teams_distribute_simd: 7242 case OMPD_teams_distribute_parallel_for: 7243 case OMPD_teams_distribute_parallel_for_simd: 7244 case OMPD_target_update: 7245 case OMPD_declare_simd: 7246 case OMPD_declare_variant: 7247 case OMPD_begin_declare_variant: 7248 case OMPD_end_declare_variant: 7249 case OMPD_declare_target: 7250 case OMPD_end_declare_target: 7251 case OMPD_declare_reduction: 7252 case OMPD_declare_mapper: 7253 case OMPD_taskloop: 7254 case OMPD_taskloop_simd: 7255 case OMPD_master_taskloop: 7256 case OMPD_master_taskloop_simd: 7257 case OMPD_parallel_master_taskloop: 7258 case OMPD_parallel_master_taskloop_simd: 7259 case OMPD_requires: 7260 case OMPD_metadirective: 7261 case OMPD_unknown: 7262 break; 7263 default: 7264 break; 7265 } 7266 llvm_unreachable("Unsupported directive kind."); 7267 } 7268 7269 namespace { 7270 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7271 7272 // Utility to handle information from clauses associated with a given 7273 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7274 // It provides a convenient interface to obtain the information and generate 7275 // code for that information. 7276 class MappableExprsHandler { 7277 public: 7278 /// Values for bit flags used to specify the mapping type for 7279 /// offloading. 7280 enum OpenMPOffloadMappingFlags : uint64_t { 7281 /// No flags 7282 OMP_MAP_NONE = 0x0, 7283 /// Allocate memory on the device and move data from host to device. 7284 OMP_MAP_TO = 0x01, 7285 /// Allocate memory on the device and move data from device to host. 7286 OMP_MAP_FROM = 0x02, 7287 /// Always perform the requested mapping action on the element, even 7288 /// if it was already mapped before. 7289 OMP_MAP_ALWAYS = 0x04, 7290 /// Delete the element from the device environment, ignoring the 7291 /// current reference count associated with the element. 7292 OMP_MAP_DELETE = 0x08, 7293 /// The element being mapped is a pointer-pointee pair; both the 7294 /// pointer and the pointee should be mapped. 7295 OMP_MAP_PTR_AND_OBJ = 0x10, 7296 /// This flags signals that the base address of an entry should be 7297 /// passed to the target kernel as an argument. 7298 OMP_MAP_TARGET_PARAM = 0x20, 7299 /// Signal that the runtime library has to return the device pointer 7300 /// in the current position for the data being mapped. Used when we have the 7301 /// use_device_ptr or use_device_addr clause. 7302 OMP_MAP_RETURN_PARAM = 0x40, 7303 /// This flag signals that the reference being passed is a pointer to 7304 /// private data. 7305 OMP_MAP_PRIVATE = 0x80, 7306 /// Pass the element to the device by value. 7307 OMP_MAP_LITERAL = 0x100, 7308 /// Implicit map 7309 OMP_MAP_IMPLICIT = 0x200, 7310 /// Close is a hint to the runtime to allocate memory close to 7311 /// the target device. 7312 OMP_MAP_CLOSE = 0x400, 7313 /// 0x800 is reserved for compatibility with XLC. 7314 /// Produce a runtime error if the data is not already allocated. 7315 OMP_MAP_PRESENT = 0x1000, 7316 // Increment and decrement a separate reference counter so that the data 7317 // cannot be unmapped within the associated region. Thus, this flag is 7318 // intended to be used on 'target' and 'target data' directives because they 7319 // are inherently structured. It is not intended to be used on 'target 7320 // enter data' and 'target exit data' directives because they are inherently 7321 // dynamic. 7322 // This is an OpenMP extension for the sake of OpenACC support. 7323 OMP_MAP_OMPX_HOLD = 0x2000, 7324 /// Signal that the runtime library should use args as an array of 7325 /// descriptor_dim pointers and use args_size as dims. Used when we have 7326 /// non-contiguous list items in target update directive 7327 OMP_MAP_NON_CONTIG = 0x100000000000, 7328 /// The 16 MSBs of the flags indicate whether the entry is member of some 7329 /// struct/class. 7330 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7331 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7332 }; 7333 7334 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7335 static unsigned getFlagMemberOffset() { 7336 unsigned Offset = 0; 7337 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7338 Remain = Remain >> 1) 7339 Offset++; 7340 return Offset; 7341 } 7342 7343 /// Class that holds debugging information for a data mapping to be passed to 7344 /// the runtime library. 7345 class MappingExprInfo { 7346 /// The variable declaration used for the data mapping. 7347 const ValueDecl *MapDecl = nullptr; 7348 /// The original expression used in the map clause, or null if there is 7349 /// none. 7350 const Expr *MapExpr = nullptr; 7351 7352 public: 7353 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7354 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7355 7356 const ValueDecl *getMapDecl() const { return MapDecl; } 7357 const Expr *getMapExpr() const { return MapExpr; } 7358 }; 7359 7360 /// Class that associates information with a base pointer to be passed to the 7361 /// runtime library. 7362 class BasePointerInfo { 7363 /// The base pointer. 7364 llvm::Value *Ptr = nullptr; 7365 /// The base declaration that refers to this device pointer, or null if 7366 /// there is none. 7367 const ValueDecl *DevPtrDecl = nullptr; 7368 7369 public: 7370 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7371 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7372 llvm::Value *operator*() const { return Ptr; } 7373 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7374 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7375 }; 7376 7377 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7378 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7379 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7380 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7381 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7382 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7383 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7384 7385 /// This structure contains combined information generated for mappable 7386 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7387 /// mappers, and non-contiguous information. 7388 struct MapCombinedInfoTy { 7389 struct StructNonContiguousInfo { 7390 bool IsNonContiguous = false; 7391 MapDimArrayTy Dims; 7392 MapNonContiguousArrayTy Offsets; 7393 MapNonContiguousArrayTy Counts; 7394 MapNonContiguousArrayTy Strides; 7395 }; 7396 MapExprsArrayTy Exprs; 7397 MapBaseValuesArrayTy BasePointers; 7398 MapValuesArrayTy Pointers; 7399 MapValuesArrayTy Sizes; 7400 MapFlagsArrayTy Types; 7401 MapMappersArrayTy Mappers; 7402 StructNonContiguousInfo NonContigInfo; 7403 7404 /// Append arrays in \a CurInfo. 7405 void append(MapCombinedInfoTy &CurInfo) { 7406 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7407 BasePointers.append(CurInfo.BasePointers.begin(), 7408 CurInfo.BasePointers.end()); 7409 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7410 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7411 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7412 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7413 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7414 CurInfo.NonContigInfo.Dims.end()); 7415 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7416 CurInfo.NonContigInfo.Offsets.end()); 7417 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7418 CurInfo.NonContigInfo.Counts.end()); 7419 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7420 CurInfo.NonContigInfo.Strides.end()); 7421 } 7422 }; 7423 7424 /// Map between a struct and the its lowest & highest elements which have been 7425 /// mapped. 7426 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7427 /// HE(FieldIndex, Pointer)} 7428 struct StructRangeInfoTy { 7429 MapCombinedInfoTy PreliminaryMapData; 7430 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7431 0, Address::invalid()}; 7432 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7433 0, Address::invalid()}; 7434 Address Base = Address::invalid(); 7435 Address LB = Address::invalid(); 7436 bool IsArraySection = false; 7437 bool HasCompleteRecord = false; 7438 }; 7439 7440 private: 7441 /// Kind that defines how a device pointer has to be returned. 7442 struct MapInfo { 7443 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7444 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7445 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7446 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7447 bool ReturnDevicePointer = false; 7448 bool IsImplicit = false; 7449 const ValueDecl *Mapper = nullptr; 7450 const Expr *VarRef = nullptr; 7451 bool ForDeviceAddr = false; 7452 7453 MapInfo() = default; 7454 MapInfo( 7455 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7456 OpenMPMapClauseKind MapType, 7457 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7458 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7459 bool ReturnDevicePointer, bool IsImplicit, 7460 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7461 bool ForDeviceAddr = false) 7462 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7463 MotionModifiers(MotionModifiers), 7464 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7465 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7466 }; 7467 7468 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7469 /// member and there is no map information about it, then emission of that 7470 /// entry is deferred until the whole struct has been processed. 7471 struct DeferredDevicePtrEntryTy { 7472 const Expr *IE = nullptr; 7473 const ValueDecl *VD = nullptr; 7474 bool ForDeviceAddr = false; 7475 7476 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7477 bool ForDeviceAddr) 7478 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7479 }; 7480 7481 /// The target directive from where the mappable clauses were extracted. It 7482 /// is either a executable directive or a user-defined mapper directive. 7483 llvm::PointerUnion<const OMPExecutableDirective *, 7484 const OMPDeclareMapperDecl *> 7485 CurDir; 7486 7487 /// Function the directive is being generated for. 7488 CodeGenFunction &CGF; 7489 7490 /// Set of all first private variables in the current directive. 7491 /// bool data is set to true if the variable is implicitly marked as 7492 /// firstprivate, false otherwise. 7493 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7494 7495 /// Map between device pointer declarations and their expression components. 7496 /// The key value for declarations in 'this' is null. 7497 llvm::DenseMap< 7498 const ValueDecl *, 7499 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7500 DevPointersMap; 7501 7502 /// Map between lambda declarations and their map type. 7503 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7504 7505 llvm::Value *getExprTypeSize(const Expr *E) const { 7506 QualType ExprTy = E->getType().getCanonicalType(); 7507 7508 // Calculate the size for array shaping expression. 7509 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7510 llvm::Value *Size = 7511 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7512 for (const Expr *SE : OAE->getDimensions()) { 7513 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7514 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7515 CGF.getContext().getSizeType(), 7516 SE->getExprLoc()); 7517 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7518 } 7519 return Size; 7520 } 7521 7522 // Reference types are ignored for mapping purposes. 7523 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7524 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7525 7526 // Given that an array section is considered a built-in type, we need to 7527 // do the calculation based on the length of the section instead of relying 7528 // on CGF.getTypeSize(E->getType()). 7529 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7530 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7531 OAE->getBase()->IgnoreParenImpCasts()) 7532 .getCanonicalType(); 7533 7534 // If there is no length associated with the expression and lower bound is 7535 // not specified too, that means we are using the whole length of the 7536 // base. 7537 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7538 !OAE->getLowerBound()) 7539 return CGF.getTypeSize(BaseTy); 7540 7541 llvm::Value *ElemSize; 7542 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7543 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7544 } else { 7545 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7546 assert(ATy && "Expecting array type if not a pointer type."); 7547 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7548 } 7549 7550 // If we don't have a length at this point, that is because we have an 7551 // array section with a single element. 7552 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7553 return ElemSize; 7554 7555 if (const Expr *LenExpr = OAE->getLength()) { 7556 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7557 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7558 CGF.getContext().getSizeType(), 7559 LenExpr->getExprLoc()); 7560 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7561 } 7562 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7563 OAE->getLowerBound() && "expected array_section[lb:]."); 7564 // Size = sizetype - lb * elemtype; 7565 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7566 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7567 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7568 CGF.getContext().getSizeType(), 7569 OAE->getLowerBound()->getExprLoc()); 7570 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7571 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7572 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7573 LengthVal = CGF.Builder.CreateSelect( 7574 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7575 return LengthVal; 7576 } 7577 return CGF.getTypeSize(ExprTy); 7578 } 7579 7580 /// Return the corresponding bits for a given map clause modifier. Add 7581 /// a flag marking the map as a pointer if requested. Add a flag marking the 7582 /// map as the first one of a series of maps that relate to the same map 7583 /// expression. 7584 OpenMPOffloadMappingFlags getMapTypeBits( 7585 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7586 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7587 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7588 OpenMPOffloadMappingFlags Bits = 7589 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7590 switch (MapType) { 7591 case OMPC_MAP_alloc: 7592 case OMPC_MAP_release: 7593 // alloc and release is the default behavior in the runtime library, i.e. 7594 // if we don't pass any bits alloc/release that is what the runtime is 7595 // going to do. Therefore, we don't need to signal anything for these two 7596 // type modifiers. 7597 break; 7598 case OMPC_MAP_to: 7599 Bits |= OMP_MAP_TO; 7600 break; 7601 case OMPC_MAP_from: 7602 Bits |= OMP_MAP_FROM; 7603 break; 7604 case OMPC_MAP_tofrom: 7605 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7606 break; 7607 case OMPC_MAP_delete: 7608 Bits |= OMP_MAP_DELETE; 7609 break; 7610 case OMPC_MAP_unknown: 7611 llvm_unreachable("Unexpected map type!"); 7612 } 7613 if (AddPtrFlag) 7614 Bits |= OMP_MAP_PTR_AND_OBJ; 7615 if (AddIsTargetParamFlag) 7616 Bits |= OMP_MAP_TARGET_PARAM; 7617 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7618 Bits |= OMP_MAP_ALWAYS; 7619 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7620 Bits |= OMP_MAP_CLOSE; 7621 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7622 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7623 Bits |= OMP_MAP_PRESENT; 7624 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7625 Bits |= OMP_MAP_OMPX_HOLD; 7626 if (IsNonContiguous) 7627 Bits |= OMP_MAP_NON_CONTIG; 7628 return Bits; 7629 } 7630 7631 /// Return true if the provided expression is a final array section. A 7632 /// final array section, is one whose length can't be proved to be one. 7633 bool isFinalArraySectionExpression(const Expr *E) const { 7634 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7635 7636 // It is not an array section and therefore not a unity-size one. 7637 if (!OASE) 7638 return false; 7639 7640 // An array section with no colon always refer to a single element. 7641 if (OASE->getColonLocFirst().isInvalid()) 7642 return false; 7643 7644 const Expr *Length = OASE->getLength(); 7645 7646 // If we don't have a length we have to check if the array has size 1 7647 // for this dimension. Also, we should always expect a length if the 7648 // base type is pointer. 7649 if (!Length) { 7650 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7651 OASE->getBase()->IgnoreParenImpCasts()) 7652 .getCanonicalType(); 7653 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7654 return ATy->getSize().getSExtValue() != 1; 7655 // If we don't have a constant dimension length, we have to consider 7656 // the current section as having any size, so it is not necessarily 7657 // unitary. If it happen to be unity size, that's user fault. 7658 return true; 7659 } 7660 7661 // Check if the length evaluates to 1. 7662 Expr::EvalResult Result; 7663 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7664 return true; // Can have more that size 1. 7665 7666 llvm::APSInt ConstLength = Result.Val.getInt(); 7667 return ConstLength.getSExtValue() != 1; 7668 } 7669 7670 /// Generate the base pointers, section pointers, sizes, map type bits, and 7671 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7672 /// map type, map or motion modifiers, and expression components. 7673 /// \a IsFirstComponent should be set to true if the provided set of 7674 /// components is the first associated with a capture. 7675 void generateInfoForComponentList( 7676 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7677 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7678 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7679 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7680 bool IsFirstComponentList, bool IsImplicit, 7681 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7682 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7683 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7684 OverlappedElements = llvm::None) const { 7685 // The following summarizes what has to be generated for each map and the 7686 // types below. The generated information is expressed in this order: 7687 // base pointer, section pointer, size, flags 7688 // (to add to the ones that come from the map type and modifier). 7689 // 7690 // double d; 7691 // int i[100]; 7692 // float *p; 7693 // 7694 // struct S1 { 7695 // int i; 7696 // float f[50]; 7697 // } 7698 // struct S2 { 7699 // int i; 7700 // float f[50]; 7701 // S1 s; 7702 // double *p; 7703 // struct S2 *ps; 7704 // int &ref; 7705 // } 7706 // S2 s; 7707 // S2 *ps; 7708 // 7709 // map(d) 7710 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7711 // 7712 // map(i) 7713 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7714 // 7715 // map(i[1:23]) 7716 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7717 // 7718 // map(p) 7719 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7720 // 7721 // map(p[1:24]) 7722 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7723 // in unified shared memory mode or for local pointers 7724 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7725 // 7726 // map(s) 7727 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7728 // 7729 // map(s.i) 7730 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7731 // 7732 // map(s.s.f) 7733 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7734 // 7735 // map(s.p) 7736 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7737 // 7738 // map(to: s.p[:22]) 7739 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7740 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7741 // &(s.p), &(s.p[0]), 22*sizeof(double), 7742 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7743 // (*) alloc space for struct members, only this is a target parameter 7744 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7745 // optimizes this entry out, same in the examples below) 7746 // (***) map the pointee (map: to) 7747 // 7748 // map(to: s.ref) 7749 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7750 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7751 // (*) alloc space for struct members, only this is a target parameter 7752 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7753 // optimizes this entry out, same in the examples below) 7754 // (***) map the pointee (map: to) 7755 // 7756 // map(s.ps) 7757 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7758 // 7759 // map(from: s.ps->s.i) 7760 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7761 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7762 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7763 // 7764 // map(to: s.ps->ps) 7765 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7766 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7767 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7768 // 7769 // map(s.ps->ps->ps) 7770 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7771 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7772 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7773 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7774 // 7775 // map(to: s.ps->ps->s.f[:22]) 7776 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7777 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7778 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7779 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7780 // 7781 // map(ps) 7782 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7783 // 7784 // map(ps->i) 7785 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7786 // 7787 // map(ps->s.f) 7788 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7789 // 7790 // map(from: ps->p) 7791 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7792 // 7793 // map(to: ps->p[:22]) 7794 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7795 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7796 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7797 // 7798 // map(ps->ps) 7799 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7800 // 7801 // map(from: ps->ps->s.i) 7802 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7803 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7804 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7805 // 7806 // map(from: ps->ps->ps) 7807 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7808 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7809 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7810 // 7811 // map(ps->ps->ps->ps) 7812 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7813 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7814 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7815 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7816 // 7817 // map(to: ps->ps->ps->s.f[:22]) 7818 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7819 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7820 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7821 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7822 // 7823 // map(to: s.f[:22]) map(from: s.p[:33]) 7824 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7825 // sizeof(double*) (**), TARGET_PARAM 7826 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7827 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7828 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7829 // (*) allocate contiguous space needed to fit all mapped members even if 7830 // we allocate space for members not mapped (in this example, 7831 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7832 // them as well because they fall between &s.f[0] and &s.p) 7833 // 7834 // map(from: s.f[:22]) map(to: ps->p[:33]) 7835 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7836 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7837 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7838 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7839 // (*) the struct this entry pertains to is the 2nd element in the list of 7840 // arguments, hence MEMBER_OF(2) 7841 // 7842 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7843 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7844 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7845 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7846 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7847 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7848 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7849 // (*) the struct this entry pertains to is the 4th element in the list 7850 // of arguments, hence MEMBER_OF(4) 7851 7852 // Track if the map information being generated is the first for a capture. 7853 bool IsCaptureFirstInfo = IsFirstComponentList; 7854 // When the variable is on a declare target link or in a to clause with 7855 // unified memory, a reference is needed to hold the host/device address 7856 // of the variable. 7857 bool RequiresReference = false; 7858 7859 // Scan the components from the base to the complete expression. 7860 auto CI = Components.rbegin(); 7861 auto CE = Components.rend(); 7862 auto I = CI; 7863 7864 // Track if the map information being generated is the first for a list of 7865 // components. 7866 bool IsExpressionFirstInfo = true; 7867 bool FirstPointerInComplexData = false; 7868 Address BP = Address::invalid(); 7869 const Expr *AssocExpr = I->getAssociatedExpression(); 7870 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7871 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7872 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7873 7874 if (isa<MemberExpr>(AssocExpr)) { 7875 // The base is the 'this' pointer. The content of the pointer is going 7876 // to be the base of the field being mapped. 7877 BP = CGF.LoadCXXThisAddress(); 7878 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7879 (OASE && 7880 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7881 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7882 } else if (OAShE && 7883 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7884 BP = Address::deprecated( 7885 CGF.EmitScalarExpr(OAShE->getBase()), 7886 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7887 } else { 7888 // The base is the reference to the variable. 7889 // BP = &Var. 7890 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7891 if (const auto *VD = 7892 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7893 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7894 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7895 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7896 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7897 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7898 RequiresReference = true; 7899 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7900 } 7901 } 7902 } 7903 7904 // If the variable is a pointer and is being dereferenced (i.e. is not 7905 // the last component), the base has to be the pointer itself, not its 7906 // reference. References are ignored for mapping purposes. 7907 QualType Ty = 7908 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7909 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7910 // No need to generate individual map information for the pointer, it 7911 // can be associated with the combined storage if shared memory mode is 7912 // active or the base declaration is not global variable. 7913 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7914 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7915 !VD || VD->hasLocalStorage()) 7916 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7917 else 7918 FirstPointerInComplexData = true; 7919 ++I; 7920 } 7921 } 7922 7923 // Track whether a component of the list should be marked as MEMBER_OF some 7924 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7925 // in a component list should be marked as MEMBER_OF, all subsequent entries 7926 // do not belong to the base struct. E.g. 7927 // struct S2 s; 7928 // s.ps->ps->ps->f[:] 7929 // (1) (2) (3) (4) 7930 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7931 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7932 // is the pointee of ps(2) which is not member of struct s, so it should not 7933 // be marked as such (it is still PTR_AND_OBJ). 7934 // The variable is initialized to false so that PTR_AND_OBJ entries which 7935 // are not struct members are not considered (e.g. array of pointers to 7936 // data). 7937 bool ShouldBeMemberOf = false; 7938 7939 // Variable keeping track of whether or not we have encountered a component 7940 // in the component list which is a member expression. Useful when we have a 7941 // pointer or a final array section, in which case it is the previous 7942 // component in the list which tells us whether we have a member expression. 7943 // E.g. X.f[:] 7944 // While processing the final array section "[:]" it is "f" which tells us 7945 // whether we are dealing with a member of a declared struct. 7946 const MemberExpr *EncounteredME = nullptr; 7947 7948 // Track for the total number of dimension. Start from one for the dummy 7949 // dimension. 7950 uint64_t DimSize = 1; 7951 7952 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7953 bool IsPrevMemberReference = false; 7954 7955 for (; I != CE; ++I) { 7956 // If the current component is member of a struct (parent struct) mark it. 7957 if (!EncounteredME) { 7958 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7959 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7960 // as MEMBER_OF the parent struct. 7961 if (EncounteredME) { 7962 ShouldBeMemberOf = true; 7963 // Do not emit as complex pointer if this is actually not array-like 7964 // expression. 7965 if (FirstPointerInComplexData) { 7966 QualType Ty = std::prev(I) 7967 ->getAssociatedDeclaration() 7968 ->getType() 7969 .getNonReferenceType(); 7970 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7971 FirstPointerInComplexData = false; 7972 } 7973 } 7974 } 7975 7976 auto Next = std::next(I); 7977 7978 // We need to generate the addresses and sizes if this is the last 7979 // component, if the component is a pointer or if it is an array section 7980 // whose length can't be proved to be one. If this is a pointer, it 7981 // becomes the base address for the following components. 7982 7983 // A final array section, is one whose length can't be proved to be one. 7984 // If the map item is non-contiguous then we don't treat any array section 7985 // as final array section. 7986 bool IsFinalArraySection = 7987 !IsNonContiguous && 7988 isFinalArraySectionExpression(I->getAssociatedExpression()); 7989 7990 // If we have a declaration for the mapping use that, otherwise use 7991 // the base declaration of the map clause. 7992 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7993 ? I->getAssociatedDeclaration() 7994 : BaseDecl; 7995 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7996 : MapExpr; 7997 7998 // Get information on whether the element is a pointer. Have to do a 7999 // special treatment for array sections given that they are built-in 8000 // types. 8001 const auto *OASE = 8002 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 8003 const auto *OAShE = 8004 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 8005 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 8006 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 8007 bool IsPointer = 8008 OAShE || 8009 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8010 .getCanonicalType() 8011 ->isAnyPointerType()) || 8012 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8013 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 8014 MapDecl && 8015 MapDecl->getType()->isLValueReferenceType(); 8016 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 8017 8018 if (OASE) 8019 ++DimSize; 8020 8021 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8022 IsFinalArraySection) { 8023 // If this is not the last component, we expect the pointer to be 8024 // associated with an array expression or member expression. 8025 assert((Next == CE || 8026 isa<MemberExpr>(Next->getAssociatedExpression()) || 8027 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8028 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8029 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8030 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8031 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8032 "Unexpected expression"); 8033 8034 Address LB = Address::invalid(); 8035 Address LowestElem = Address::invalid(); 8036 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8037 const MemberExpr *E) { 8038 const Expr *BaseExpr = E->getBase(); 8039 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8040 // scalar. 8041 LValue BaseLV; 8042 if (E->isArrow()) { 8043 LValueBaseInfo BaseInfo; 8044 TBAAAccessInfo TBAAInfo; 8045 Address Addr = 8046 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8047 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8048 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8049 } else { 8050 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8051 } 8052 return BaseLV; 8053 }; 8054 if (OAShE) { 8055 LowestElem = LB = 8056 Address::deprecated(CGF.EmitScalarExpr(OAShE->getBase()), 8057 CGF.getContext().getTypeAlignInChars( 8058 OAShE->getBase()->getType())); 8059 } else if (IsMemberReference) { 8060 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8061 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8062 LowestElem = CGF.EmitLValueForFieldInitialization( 8063 BaseLVal, cast<FieldDecl>(MapDecl)) 8064 .getAddress(CGF); 8065 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8066 .getAddress(CGF); 8067 } else { 8068 LowestElem = LB = 8069 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8070 .getAddress(CGF); 8071 } 8072 8073 // If this component is a pointer inside the base struct then we don't 8074 // need to create any entry for it - it will be combined with the object 8075 // it is pointing to into a single PTR_AND_OBJ entry. 8076 bool IsMemberPointerOrAddr = 8077 EncounteredME && 8078 (((IsPointer || ForDeviceAddr) && 8079 I->getAssociatedExpression() == EncounteredME) || 8080 (IsPrevMemberReference && !IsPointer) || 8081 (IsMemberReference && Next != CE && 8082 !Next->getAssociatedExpression()->getType()->isPointerType())); 8083 if (!OverlappedElements.empty() && Next == CE) { 8084 // Handle base element with the info for overlapped elements. 8085 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8086 assert(!IsPointer && 8087 "Unexpected base element with the pointer type."); 8088 // Mark the whole struct as the struct that requires allocation on the 8089 // device. 8090 PartialStruct.LowestElem = {0, LowestElem}; 8091 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8092 I->getAssociatedExpression()->getType()); 8093 Address HB = CGF.Builder.CreateConstGEP( 8094 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 8095 CGF.VoidPtrTy), 8096 TypeSize.getQuantity() - 1); 8097 PartialStruct.HighestElem = { 8098 std::numeric_limits<decltype( 8099 PartialStruct.HighestElem.first)>::max(), 8100 HB}; 8101 PartialStruct.Base = BP; 8102 PartialStruct.LB = LB; 8103 assert( 8104 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8105 "Overlapped elements must be used only once for the variable."); 8106 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8107 // Emit data for non-overlapped data. 8108 OpenMPOffloadMappingFlags Flags = 8109 OMP_MAP_MEMBER_OF | 8110 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8111 /*AddPtrFlag=*/false, 8112 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8113 llvm::Value *Size = nullptr; 8114 // Do bitcopy of all non-overlapped structure elements. 8115 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8116 Component : OverlappedElements) { 8117 Address ComponentLB = Address::invalid(); 8118 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8119 Component) { 8120 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8121 const auto *FD = dyn_cast<FieldDecl>(VD); 8122 if (FD && FD->getType()->isLValueReferenceType()) { 8123 const auto *ME = 8124 cast<MemberExpr>(MC.getAssociatedExpression()); 8125 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8126 ComponentLB = 8127 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8128 .getAddress(CGF); 8129 } else { 8130 ComponentLB = 8131 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8132 .getAddress(CGF); 8133 } 8134 Size = CGF.Builder.CreatePtrDiff( 8135 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8136 CGF.EmitCastToVoidPtr(LB.getPointer())); 8137 break; 8138 } 8139 } 8140 assert(Size && "Failed to determine structure size"); 8141 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8142 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8143 CombinedInfo.Pointers.push_back(LB.getPointer()); 8144 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8145 Size, CGF.Int64Ty, /*isSigned=*/true)); 8146 CombinedInfo.Types.push_back(Flags); 8147 CombinedInfo.Mappers.push_back(nullptr); 8148 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8149 : 1); 8150 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8151 } 8152 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8153 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8154 CombinedInfo.Pointers.push_back(LB.getPointer()); 8155 Size = CGF.Builder.CreatePtrDiff( 8156 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8157 CGF.EmitCastToVoidPtr(LB.getPointer())); 8158 CombinedInfo.Sizes.push_back( 8159 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8160 CombinedInfo.Types.push_back(Flags); 8161 CombinedInfo.Mappers.push_back(nullptr); 8162 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8163 : 1); 8164 break; 8165 } 8166 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8167 if (!IsMemberPointerOrAddr || 8168 (Next == CE && MapType != OMPC_MAP_unknown)) { 8169 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8170 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8171 CombinedInfo.Pointers.push_back(LB.getPointer()); 8172 CombinedInfo.Sizes.push_back( 8173 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8174 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8175 : 1); 8176 8177 // If Mapper is valid, the last component inherits the mapper. 8178 bool HasMapper = Mapper && Next == CE; 8179 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8180 8181 // We need to add a pointer flag for each map that comes from the 8182 // same expression except for the first one. We also need to signal 8183 // this map is the first one that relates with the current capture 8184 // (there is a set of entries for each capture). 8185 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8186 MapType, MapModifiers, MotionModifiers, IsImplicit, 8187 !IsExpressionFirstInfo || RequiresReference || 8188 FirstPointerInComplexData || IsMemberReference, 8189 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8190 8191 if (!IsExpressionFirstInfo || IsMemberReference) { 8192 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8193 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8194 if (IsPointer || (IsMemberReference && Next != CE)) 8195 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8196 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8197 8198 if (ShouldBeMemberOf) { 8199 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8200 // should be later updated with the correct value of MEMBER_OF. 8201 Flags |= OMP_MAP_MEMBER_OF; 8202 // From now on, all subsequent PTR_AND_OBJ entries should not be 8203 // marked as MEMBER_OF. 8204 ShouldBeMemberOf = false; 8205 } 8206 } 8207 8208 CombinedInfo.Types.push_back(Flags); 8209 } 8210 8211 // If we have encountered a member expression so far, keep track of the 8212 // mapped member. If the parent is "*this", then the value declaration 8213 // is nullptr. 8214 if (EncounteredME) { 8215 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8216 unsigned FieldIndex = FD->getFieldIndex(); 8217 8218 // Update info about the lowest and highest elements for this struct 8219 if (!PartialStruct.Base.isValid()) { 8220 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8221 if (IsFinalArraySection) { 8222 Address HB = 8223 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8224 .getAddress(CGF); 8225 PartialStruct.HighestElem = {FieldIndex, HB}; 8226 } else { 8227 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8228 } 8229 PartialStruct.Base = BP; 8230 PartialStruct.LB = BP; 8231 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8232 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8233 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8234 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8235 } 8236 } 8237 8238 // Need to emit combined struct for array sections. 8239 if (IsFinalArraySection || IsNonContiguous) 8240 PartialStruct.IsArraySection = true; 8241 8242 // If we have a final array section, we are done with this expression. 8243 if (IsFinalArraySection) 8244 break; 8245 8246 // The pointer becomes the base for the next element. 8247 if (Next != CE) 8248 BP = IsMemberReference ? LowestElem : LB; 8249 8250 IsExpressionFirstInfo = false; 8251 IsCaptureFirstInfo = false; 8252 FirstPointerInComplexData = false; 8253 IsPrevMemberReference = IsMemberReference; 8254 } else if (FirstPointerInComplexData) { 8255 QualType Ty = Components.rbegin() 8256 ->getAssociatedDeclaration() 8257 ->getType() 8258 .getNonReferenceType(); 8259 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8260 FirstPointerInComplexData = false; 8261 } 8262 } 8263 // If ran into the whole component - allocate the space for the whole 8264 // record. 8265 if (!EncounteredME) 8266 PartialStruct.HasCompleteRecord = true; 8267 8268 if (!IsNonContiguous) 8269 return; 8270 8271 const ASTContext &Context = CGF.getContext(); 8272 8273 // For supporting stride in array section, we need to initialize the first 8274 // dimension size as 1, first offset as 0, and first count as 1 8275 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8276 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8277 MapValuesArrayTy CurStrides; 8278 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8279 uint64_t ElementTypeSize; 8280 8281 // Collect Size information for each dimension and get the element size as 8282 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8283 // should be [10, 10] and the first stride is 4 btyes. 8284 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8285 Components) { 8286 const Expr *AssocExpr = Component.getAssociatedExpression(); 8287 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8288 8289 if (!OASE) 8290 continue; 8291 8292 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8293 auto *CAT = Context.getAsConstantArrayType(Ty); 8294 auto *VAT = Context.getAsVariableArrayType(Ty); 8295 8296 // We need all the dimension size except for the last dimension. 8297 assert((VAT || CAT || &Component == &*Components.begin()) && 8298 "Should be either ConstantArray or VariableArray if not the " 8299 "first Component"); 8300 8301 // Get element size if CurStrides is empty. 8302 if (CurStrides.empty()) { 8303 const Type *ElementType = nullptr; 8304 if (CAT) 8305 ElementType = CAT->getElementType().getTypePtr(); 8306 else if (VAT) 8307 ElementType = VAT->getElementType().getTypePtr(); 8308 else 8309 assert(&Component == &*Components.begin() && 8310 "Only expect pointer (non CAT or VAT) when this is the " 8311 "first Component"); 8312 // If ElementType is null, then it means the base is a pointer 8313 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8314 // for next iteration. 8315 if (ElementType) { 8316 // For the case that having pointer as base, we need to remove one 8317 // level of indirection. 8318 if (&Component != &*Components.begin()) 8319 ElementType = ElementType->getPointeeOrArrayElementType(); 8320 ElementTypeSize = 8321 Context.getTypeSizeInChars(ElementType).getQuantity(); 8322 CurStrides.push_back( 8323 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8324 } 8325 } 8326 // Get dimension value except for the last dimension since we don't need 8327 // it. 8328 if (DimSizes.size() < Components.size() - 1) { 8329 if (CAT) 8330 DimSizes.push_back(llvm::ConstantInt::get( 8331 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8332 else if (VAT) 8333 DimSizes.push_back(CGF.Builder.CreateIntCast( 8334 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8335 /*IsSigned=*/false)); 8336 } 8337 } 8338 8339 // Skip the dummy dimension since we have already have its information. 8340 auto DI = DimSizes.begin() + 1; 8341 // Product of dimension. 8342 llvm::Value *DimProd = 8343 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8344 8345 // Collect info for non-contiguous. Notice that offset, count, and stride 8346 // are only meaningful for array-section, so we insert a null for anything 8347 // other than array-section. 8348 // Also, the size of offset, count, and stride are not the same as 8349 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8350 // count, and stride are the same as the number of non-contiguous 8351 // declaration in target update to/from clause. 8352 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8353 Components) { 8354 const Expr *AssocExpr = Component.getAssociatedExpression(); 8355 8356 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8357 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8358 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8359 /*isSigned=*/false); 8360 CurOffsets.push_back(Offset); 8361 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8362 CurStrides.push_back(CurStrides.back()); 8363 continue; 8364 } 8365 8366 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8367 8368 if (!OASE) 8369 continue; 8370 8371 // Offset 8372 const Expr *OffsetExpr = OASE->getLowerBound(); 8373 llvm::Value *Offset = nullptr; 8374 if (!OffsetExpr) { 8375 // If offset is absent, then we just set it to zero. 8376 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8377 } else { 8378 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8379 CGF.Int64Ty, 8380 /*isSigned=*/false); 8381 } 8382 CurOffsets.push_back(Offset); 8383 8384 // Count 8385 const Expr *CountExpr = OASE->getLength(); 8386 llvm::Value *Count = nullptr; 8387 if (!CountExpr) { 8388 // In Clang, once a high dimension is an array section, we construct all 8389 // the lower dimension as array section, however, for case like 8390 // arr[0:2][2], Clang construct the inner dimension as an array section 8391 // but it actually is not in an array section form according to spec. 8392 if (!OASE->getColonLocFirst().isValid() && 8393 !OASE->getColonLocSecond().isValid()) { 8394 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8395 } else { 8396 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8397 // When the length is absent it defaults to ⌈(size − 8398 // lower-bound)/stride⌉, where size is the size of the array 8399 // dimension. 8400 const Expr *StrideExpr = OASE->getStride(); 8401 llvm::Value *Stride = 8402 StrideExpr 8403 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8404 CGF.Int64Ty, /*isSigned=*/false) 8405 : nullptr; 8406 if (Stride) 8407 Count = CGF.Builder.CreateUDiv( 8408 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8409 else 8410 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8411 } 8412 } else { 8413 Count = CGF.EmitScalarExpr(CountExpr); 8414 } 8415 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8416 CurCounts.push_back(Count); 8417 8418 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8419 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8420 // Offset Count Stride 8421 // D0 0 1 4 (int) <- dummy dimension 8422 // D1 0 2 8 (2 * (1) * 4) 8423 // D2 1 2 20 (1 * (1 * 5) * 4) 8424 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8425 const Expr *StrideExpr = OASE->getStride(); 8426 llvm::Value *Stride = 8427 StrideExpr 8428 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8429 CGF.Int64Ty, /*isSigned=*/false) 8430 : nullptr; 8431 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8432 if (Stride) 8433 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8434 else 8435 CurStrides.push_back(DimProd); 8436 if (DI != DimSizes.end()) 8437 ++DI; 8438 } 8439 8440 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8441 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8442 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8443 } 8444 8445 /// Return the adjusted map modifiers if the declaration a capture refers to 8446 /// appears in a first-private clause. This is expected to be used only with 8447 /// directives that start with 'target'. 8448 MappableExprsHandler::OpenMPOffloadMappingFlags 8449 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8450 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8451 8452 // A first private variable captured by reference will use only the 8453 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8454 // declaration is known as first-private in this handler. 8455 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8456 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8457 return MappableExprsHandler::OMP_MAP_TO | 8458 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8459 return MappableExprsHandler::OMP_MAP_PRIVATE | 8460 MappableExprsHandler::OMP_MAP_TO; 8461 } 8462 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8463 if (I != LambdasMap.end()) 8464 // for map(to: lambda): using user specified map type. 8465 return getMapTypeBits( 8466 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8467 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8468 /*AddPtrFlag=*/false, 8469 /*AddIsTargetParamFlag=*/false, 8470 /*isNonContiguous=*/false); 8471 return MappableExprsHandler::OMP_MAP_TO | 8472 MappableExprsHandler::OMP_MAP_FROM; 8473 } 8474 8475 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8476 // Rotate by getFlagMemberOffset() bits. 8477 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8478 << getFlagMemberOffset()); 8479 } 8480 8481 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8482 OpenMPOffloadMappingFlags MemberOfFlag) { 8483 // If the entry is PTR_AND_OBJ but has not been marked with the special 8484 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8485 // marked as MEMBER_OF. 8486 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8487 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8488 return; 8489 8490 // Reset the placeholder value to prepare the flag for the assignment of the 8491 // proper MEMBER_OF value. 8492 Flags &= ~OMP_MAP_MEMBER_OF; 8493 Flags |= MemberOfFlag; 8494 } 8495 8496 void getPlainLayout(const CXXRecordDecl *RD, 8497 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8498 bool AsBase) const { 8499 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8500 8501 llvm::StructType *St = 8502 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8503 8504 unsigned NumElements = St->getNumElements(); 8505 llvm::SmallVector< 8506 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8507 RecordLayout(NumElements); 8508 8509 // Fill bases. 8510 for (const auto &I : RD->bases()) { 8511 if (I.isVirtual()) 8512 continue; 8513 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8514 // Ignore empty bases. 8515 if (Base->isEmpty() || CGF.getContext() 8516 .getASTRecordLayout(Base) 8517 .getNonVirtualSize() 8518 .isZero()) 8519 continue; 8520 8521 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8522 RecordLayout[FieldIndex] = Base; 8523 } 8524 // Fill in virtual bases. 8525 for (const auto &I : RD->vbases()) { 8526 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8527 // Ignore empty bases. 8528 if (Base->isEmpty()) 8529 continue; 8530 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8531 if (RecordLayout[FieldIndex]) 8532 continue; 8533 RecordLayout[FieldIndex] = Base; 8534 } 8535 // Fill in all the fields. 8536 assert(!RD->isUnion() && "Unexpected union."); 8537 for (const auto *Field : RD->fields()) { 8538 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8539 // will fill in later.) 8540 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8541 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8542 RecordLayout[FieldIndex] = Field; 8543 } 8544 } 8545 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8546 &Data : RecordLayout) { 8547 if (Data.isNull()) 8548 continue; 8549 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8550 getPlainLayout(Base, Layout, /*AsBase=*/true); 8551 else 8552 Layout.push_back(Data.get<const FieldDecl *>()); 8553 } 8554 } 8555 8556 /// Generate all the base pointers, section pointers, sizes, map types, and 8557 /// mappers for the extracted mappable expressions (all included in \a 8558 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8559 /// pair of the relevant declaration and index where it occurs is appended to 8560 /// the device pointers info array. 8561 void generateAllInfoForClauses( 8562 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8563 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8564 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8565 // We have to process the component lists that relate with the same 8566 // declaration in a single chunk so that we can generate the map flags 8567 // correctly. Therefore, we organize all lists in a map. 8568 enum MapKind { Present, Allocs, Other, Total }; 8569 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8570 SmallVector<SmallVector<MapInfo, 8>, 4>> 8571 Info; 8572 8573 // Helper function to fill the information map for the different supported 8574 // clauses. 8575 auto &&InfoGen = 8576 [&Info, &SkipVarSet]( 8577 const ValueDecl *D, MapKind Kind, 8578 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8579 OpenMPMapClauseKind MapType, 8580 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8581 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8582 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8583 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8584 if (SkipVarSet.contains(D)) 8585 return; 8586 auto It = Info.find(D); 8587 if (It == Info.end()) 8588 It = Info 8589 .insert(std::make_pair( 8590 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8591 .first; 8592 It->second[Kind].emplace_back( 8593 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8594 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8595 }; 8596 8597 for (const auto *Cl : Clauses) { 8598 const auto *C = dyn_cast<OMPMapClause>(Cl); 8599 if (!C) 8600 continue; 8601 MapKind Kind = Other; 8602 if (llvm::is_contained(C->getMapTypeModifiers(), 8603 OMPC_MAP_MODIFIER_present)) 8604 Kind = Present; 8605 else if (C->getMapType() == OMPC_MAP_alloc) 8606 Kind = Allocs; 8607 const auto *EI = C->getVarRefs().begin(); 8608 for (const auto L : C->component_lists()) { 8609 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8610 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8611 C->getMapTypeModifiers(), llvm::None, 8612 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8613 E); 8614 ++EI; 8615 } 8616 } 8617 for (const auto *Cl : Clauses) { 8618 const auto *C = dyn_cast<OMPToClause>(Cl); 8619 if (!C) 8620 continue; 8621 MapKind Kind = Other; 8622 if (llvm::is_contained(C->getMotionModifiers(), 8623 OMPC_MOTION_MODIFIER_present)) 8624 Kind = Present; 8625 const auto *EI = C->getVarRefs().begin(); 8626 for (const auto L : C->component_lists()) { 8627 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8628 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8629 C->isImplicit(), std::get<2>(L), *EI); 8630 ++EI; 8631 } 8632 } 8633 for (const auto *Cl : Clauses) { 8634 const auto *C = dyn_cast<OMPFromClause>(Cl); 8635 if (!C) 8636 continue; 8637 MapKind Kind = Other; 8638 if (llvm::is_contained(C->getMotionModifiers(), 8639 OMPC_MOTION_MODIFIER_present)) 8640 Kind = Present; 8641 const auto *EI = C->getVarRefs().begin(); 8642 for (const auto L : C->component_lists()) { 8643 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8644 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8645 C->isImplicit(), std::get<2>(L), *EI); 8646 ++EI; 8647 } 8648 } 8649 8650 // Look at the use_device_ptr clause information and mark the existing map 8651 // entries as such. If there is no map information for an entry in the 8652 // use_device_ptr list, we create one with map type 'alloc' and zero size 8653 // section. It is the user fault if that was not mapped before. If there is 8654 // no map information and the pointer is a struct member, then we defer the 8655 // emission of that entry until the whole struct has been processed. 8656 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8657 SmallVector<DeferredDevicePtrEntryTy, 4>> 8658 DeferredInfo; 8659 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8660 8661 for (const auto *Cl : Clauses) { 8662 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8663 if (!C) 8664 continue; 8665 for (const auto L : C->component_lists()) { 8666 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8667 std::get<1>(L); 8668 assert(!Components.empty() && 8669 "Not expecting empty list of components!"); 8670 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8671 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8672 const Expr *IE = Components.back().getAssociatedExpression(); 8673 // If the first component is a member expression, we have to look into 8674 // 'this', which maps to null in the map of map information. Otherwise 8675 // look directly for the information. 8676 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8677 8678 // We potentially have map information for this declaration already. 8679 // Look for the first set of components that refer to it. 8680 if (It != Info.end()) { 8681 bool Found = false; 8682 for (auto &Data : It->second) { 8683 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8684 return MI.Components.back().getAssociatedDeclaration() == VD; 8685 }); 8686 // If we found a map entry, signal that the pointer has to be 8687 // returned and move on to the next declaration. Exclude cases where 8688 // the base pointer is mapped as array subscript, array section or 8689 // array shaping. The base address is passed as a pointer to base in 8690 // this case and cannot be used as a base for use_device_ptr list 8691 // item. 8692 if (CI != Data.end()) { 8693 auto PrevCI = std::next(CI->Components.rbegin()); 8694 const auto *VarD = dyn_cast<VarDecl>(VD); 8695 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8696 isa<MemberExpr>(IE) || 8697 !VD->getType().getNonReferenceType()->isPointerType() || 8698 PrevCI == CI->Components.rend() || 8699 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8700 VarD->hasLocalStorage()) { 8701 CI->ReturnDevicePointer = true; 8702 Found = true; 8703 break; 8704 } 8705 } 8706 } 8707 if (Found) 8708 continue; 8709 } 8710 8711 // We didn't find any match in our map information - generate a zero 8712 // size array section - if the pointer is a struct member we defer this 8713 // action until the whole struct has been processed. 8714 if (isa<MemberExpr>(IE)) { 8715 // Insert the pointer into Info to be processed by 8716 // generateInfoForComponentList. Because it is a member pointer 8717 // without a pointee, no entry will be generated for it, therefore 8718 // we need to generate one after the whole struct has been processed. 8719 // Nonetheless, generateInfoForComponentList must be called to take 8720 // the pointer into account for the calculation of the range of the 8721 // partial struct. 8722 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8723 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8724 nullptr); 8725 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8726 } else { 8727 llvm::Value *Ptr = 8728 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8729 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8730 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8731 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8732 UseDevicePtrCombinedInfo.Sizes.push_back( 8733 llvm::Constant::getNullValue(CGF.Int64Ty)); 8734 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8735 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8736 } 8737 } 8738 } 8739 8740 // Look at the use_device_addr clause information and mark the existing map 8741 // entries as such. If there is no map information for an entry in the 8742 // use_device_addr list, we create one with map type 'alloc' and zero size 8743 // section. It is the user fault if that was not mapped before. If there is 8744 // no map information and the pointer is a struct member, then we defer the 8745 // emission of that entry until the whole struct has been processed. 8746 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8747 for (const auto *Cl : Clauses) { 8748 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8749 if (!C) 8750 continue; 8751 for (const auto L : C->component_lists()) { 8752 assert(!std::get<1>(L).empty() && 8753 "Not expecting empty list of components!"); 8754 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8755 if (!Processed.insert(VD).second) 8756 continue; 8757 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8758 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8759 // If the first component is a member expression, we have to look into 8760 // 'this', which maps to null in the map of map information. Otherwise 8761 // look directly for the information. 8762 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8763 8764 // We potentially have map information for this declaration already. 8765 // Look for the first set of components that refer to it. 8766 if (It != Info.end()) { 8767 bool Found = false; 8768 for (auto &Data : It->second) { 8769 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8770 return MI.Components.back().getAssociatedDeclaration() == VD; 8771 }); 8772 // If we found a map entry, signal that the pointer has to be 8773 // returned and move on to the next declaration. 8774 if (CI != Data.end()) { 8775 CI->ReturnDevicePointer = true; 8776 Found = true; 8777 break; 8778 } 8779 } 8780 if (Found) 8781 continue; 8782 } 8783 8784 // We didn't find any match in our map information - generate a zero 8785 // size array section - if the pointer is a struct member we defer this 8786 // action until the whole struct has been processed. 8787 if (isa<MemberExpr>(IE)) { 8788 // Insert the pointer into Info to be processed by 8789 // generateInfoForComponentList. Because it is a member pointer 8790 // without a pointee, no entry will be generated for it, therefore 8791 // we need to generate one after the whole struct has been processed. 8792 // Nonetheless, generateInfoForComponentList must be called to take 8793 // the pointer into account for the calculation of the range of the 8794 // partial struct. 8795 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8796 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8797 nullptr, nullptr, /*ForDeviceAddr=*/true); 8798 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8799 } else { 8800 llvm::Value *Ptr; 8801 if (IE->isGLValue()) 8802 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8803 else 8804 Ptr = CGF.EmitScalarExpr(IE); 8805 CombinedInfo.Exprs.push_back(VD); 8806 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8807 CombinedInfo.Pointers.push_back(Ptr); 8808 CombinedInfo.Sizes.push_back( 8809 llvm::Constant::getNullValue(CGF.Int64Ty)); 8810 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8811 CombinedInfo.Mappers.push_back(nullptr); 8812 } 8813 } 8814 } 8815 8816 for (const auto &Data : Info) { 8817 StructRangeInfoTy PartialStruct; 8818 // Temporary generated information. 8819 MapCombinedInfoTy CurInfo; 8820 const Decl *D = Data.first; 8821 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8822 for (const auto &M : Data.second) { 8823 for (const MapInfo &L : M) { 8824 assert(!L.Components.empty() && 8825 "Not expecting declaration with no component lists."); 8826 8827 // Remember the current base pointer index. 8828 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8829 CurInfo.NonContigInfo.IsNonContiguous = 8830 L.Components.back().isNonContiguous(); 8831 generateInfoForComponentList( 8832 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8833 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8834 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8835 8836 // If this entry relates with a device pointer, set the relevant 8837 // declaration and add the 'return pointer' flag. 8838 if (L.ReturnDevicePointer) { 8839 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8840 "Unexpected number of mapped base pointers."); 8841 8842 const ValueDecl *RelevantVD = 8843 L.Components.back().getAssociatedDeclaration(); 8844 assert(RelevantVD && 8845 "No relevant declaration related with device pointer??"); 8846 8847 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8848 RelevantVD); 8849 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8850 } 8851 } 8852 } 8853 8854 // Append any pending zero-length pointers which are struct members and 8855 // used with use_device_ptr or use_device_addr. 8856 auto CI = DeferredInfo.find(Data.first); 8857 if (CI != DeferredInfo.end()) { 8858 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8859 llvm::Value *BasePtr; 8860 llvm::Value *Ptr; 8861 if (L.ForDeviceAddr) { 8862 if (L.IE->isGLValue()) 8863 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8864 else 8865 Ptr = this->CGF.EmitScalarExpr(L.IE); 8866 BasePtr = Ptr; 8867 // Entry is RETURN_PARAM. Also, set the placeholder value 8868 // MEMBER_OF=FFFF so that the entry is later updated with the 8869 // correct value of MEMBER_OF. 8870 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8871 } else { 8872 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8873 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8874 L.IE->getExprLoc()); 8875 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8876 // placeholder value MEMBER_OF=FFFF so that the entry is later 8877 // updated with the correct value of MEMBER_OF. 8878 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8879 OMP_MAP_MEMBER_OF); 8880 } 8881 CurInfo.Exprs.push_back(L.VD); 8882 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8883 CurInfo.Pointers.push_back(Ptr); 8884 CurInfo.Sizes.push_back( 8885 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8886 CurInfo.Mappers.push_back(nullptr); 8887 } 8888 } 8889 // If there is an entry in PartialStruct it means we have a struct with 8890 // individual members mapped. Emit an extra combined entry. 8891 if (PartialStruct.Base.isValid()) { 8892 CurInfo.NonContigInfo.Dims.push_back(0); 8893 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8894 } 8895 8896 // We need to append the results of this capture to what we already 8897 // have. 8898 CombinedInfo.append(CurInfo); 8899 } 8900 // Append data for use_device_ptr clauses. 8901 CombinedInfo.append(UseDevicePtrCombinedInfo); 8902 } 8903 8904 public: 8905 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8906 : CurDir(&Dir), CGF(CGF) { 8907 // Extract firstprivate clause information. 8908 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8909 for (const auto *D : C->varlists()) 8910 FirstPrivateDecls.try_emplace( 8911 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8912 // Extract implicit firstprivates from uses_allocators clauses. 8913 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8914 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8915 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8916 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8917 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8918 /*Implicit=*/true); 8919 else if (const auto *VD = dyn_cast<VarDecl>( 8920 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8921 ->getDecl())) 8922 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8923 } 8924 } 8925 // Extract device pointer clause information. 8926 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8927 for (auto L : C->component_lists()) 8928 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8929 // Extract map information. 8930 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8931 if (C->getMapType() != OMPC_MAP_to) 8932 continue; 8933 for (auto L : C->component_lists()) { 8934 const ValueDecl *VD = std::get<0>(L); 8935 const auto *RD = VD ? VD->getType() 8936 .getCanonicalType() 8937 .getNonReferenceType() 8938 ->getAsCXXRecordDecl() 8939 : nullptr; 8940 if (RD && RD->isLambda()) 8941 LambdasMap.try_emplace(std::get<0>(L), C); 8942 } 8943 } 8944 } 8945 8946 /// Constructor for the declare mapper directive. 8947 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8948 : CurDir(&Dir), CGF(CGF) {} 8949 8950 /// Generate code for the combined entry if we have a partially mapped struct 8951 /// and take care of the mapping flags of the arguments corresponding to 8952 /// individual struct members. 8953 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8954 MapFlagsArrayTy &CurTypes, 8955 const StructRangeInfoTy &PartialStruct, 8956 const ValueDecl *VD = nullptr, 8957 bool NotTargetParams = true) const { 8958 if (CurTypes.size() == 1 && 8959 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8960 !PartialStruct.IsArraySection) 8961 return; 8962 Address LBAddr = PartialStruct.LowestElem.second; 8963 Address HBAddr = PartialStruct.HighestElem.second; 8964 if (PartialStruct.HasCompleteRecord) { 8965 LBAddr = PartialStruct.LB; 8966 HBAddr = PartialStruct.LB; 8967 } 8968 CombinedInfo.Exprs.push_back(VD); 8969 // Base is the base of the struct 8970 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8971 // Pointer is the address of the lowest element 8972 llvm::Value *LB = LBAddr.getPointer(); 8973 CombinedInfo.Pointers.push_back(LB); 8974 // There should not be a mapper for a combined entry. 8975 CombinedInfo.Mappers.push_back(nullptr); 8976 // Size is (addr of {highest+1} element) - (addr of lowest element) 8977 llvm::Value *HB = HBAddr.getPointer(); 8978 llvm::Value *HAddr = 8979 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8980 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8981 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8982 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8983 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8984 /*isSigned=*/false); 8985 CombinedInfo.Sizes.push_back(Size); 8986 // Map type is always TARGET_PARAM, if generate info for captures. 8987 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8988 : OMP_MAP_TARGET_PARAM); 8989 // If any element has the present modifier, then make sure the runtime 8990 // doesn't attempt to allocate the struct. 8991 if (CurTypes.end() != 8992 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8993 return Type & OMP_MAP_PRESENT; 8994 })) 8995 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8996 // Remove TARGET_PARAM flag from the first element 8997 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8998 // If any element has the ompx_hold modifier, then make sure the runtime 8999 // uses the hold reference count for the struct as a whole so that it won't 9000 // be unmapped by an extra dynamic reference count decrement. Add it to all 9001 // elements as well so the runtime knows which reference count to check 9002 // when determining whether it's time for device-to-host transfers of 9003 // individual elements. 9004 if (CurTypes.end() != 9005 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9006 return Type & OMP_MAP_OMPX_HOLD; 9007 })) { 9008 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 9009 for (auto &M : CurTypes) 9010 M |= OMP_MAP_OMPX_HOLD; 9011 } 9012 9013 // All other current entries will be MEMBER_OF the combined entry 9014 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9015 // 0xFFFF in the MEMBER_OF field). 9016 OpenMPOffloadMappingFlags MemberOfFlag = 9017 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 9018 for (auto &M : CurTypes) 9019 setCorrectMemberOfFlag(M, MemberOfFlag); 9020 } 9021 9022 /// Generate all the base pointers, section pointers, sizes, map types, and 9023 /// mappers for the extracted mappable expressions (all included in \a 9024 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9025 /// pair of the relevant declaration and index where it occurs is appended to 9026 /// the device pointers info array. 9027 void generateAllInfo( 9028 MapCombinedInfoTy &CombinedInfo, 9029 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9030 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9031 assert(CurDir.is<const OMPExecutableDirective *>() && 9032 "Expect a executable directive"); 9033 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9034 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9035 } 9036 9037 /// Generate all the base pointers, section pointers, sizes, map types, and 9038 /// mappers for the extracted map clauses of user-defined mapper (all included 9039 /// in \a CombinedInfo). 9040 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9041 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9042 "Expect a declare mapper directive"); 9043 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9044 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9045 } 9046 9047 /// Emit capture info for lambdas for variables captured by reference. 9048 void generateInfoForLambdaCaptures( 9049 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9050 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9051 const auto *RD = VD->getType() 9052 .getCanonicalType() 9053 .getNonReferenceType() 9054 ->getAsCXXRecordDecl(); 9055 if (!RD || !RD->isLambda()) 9056 return; 9057 Address VDAddr = 9058 Address::deprecated(Arg, CGF.getContext().getDeclAlign(VD)); 9059 LValue VDLVal = CGF.MakeAddrLValue( 9060 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 9061 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9062 FieldDecl *ThisCapture = nullptr; 9063 RD->getCaptureFields(Captures, ThisCapture); 9064 if (ThisCapture) { 9065 LValue ThisLVal = 9066 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9067 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9068 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9069 VDLVal.getPointer(CGF)); 9070 CombinedInfo.Exprs.push_back(VD); 9071 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9072 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9073 CombinedInfo.Sizes.push_back( 9074 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9075 CGF.Int64Ty, /*isSigned=*/true)); 9076 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9077 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9078 CombinedInfo.Mappers.push_back(nullptr); 9079 } 9080 for (const LambdaCapture &LC : RD->captures()) { 9081 if (!LC.capturesVariable()) 9082 continue; 9083 const VarDecl *VD = LC.getCapturedVar(); 9084 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9085 continue; 9086 auto It = Captures.find(VD); 9087 assert(It != Captures.end() && "Found lambda capture without field."); 9088 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9089 if (LC.getCaptureKind() == LCK_ByRef) { 9090 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9091 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9092 VDLVal.getPointer(CGF)); 9093 CombinedInfo.Exprs.push_back(VD); 9094 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9095 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9096 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9097 CGF.getTypeSize( 9098 VD->getType().getCanonicalType().getNonReferenceType()), 9099 CGF.Int64Ty, /*isSigned=*/true)); 9100 } else { 9101 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9102 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9103 VDLVal.getPointer(CGF)); 9104 CombinedInfo.Exprs.push_back(VD); 9105 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9106 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9107 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9108 } 9109 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9110 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9111 CombinedInfo.Mappers.push_back(nullptr); 9112 } 9113 } 9114 9115 /// Set correct indices for lambdas captures. 9116 void adjustMemberOfForLambdaCaptures( 9117 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9118 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9119 MapFlagsArrayTy &Types) const { 9120 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9121 // Set correct member_of idx for all implicit lambda captures. 9122 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9123 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9124 continue; 9125 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9126 assert(BasePtr && "Unable to find base lambda address."); 9127 int TgtIdx = -1; 9128 for (unsigned J = I; J > 0; --J) { 9129 unsigned Idx = J - 1; 9130 if (Pointers[Idx] != BasePtr) 9131 continue; 9132 TgtIdx = Idx; 9133 break; 9134 } 9135 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9136 // All other current entries will be MEMBER_OF the combined entry 9137 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9138 // 0xFFFF in the MEMBER_OF field). 9139 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9140 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9141 } 9142 } 9143 9144 /// Generate the base pointers, section pointers, sizes, map types, and 9145 /// mappers associated to a given capture (all included in \a CombinedInfo). 9146 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9147 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9148 StructRangeInfoTy &PartialStruct) const { 9149 assert(!Cap->capturesVariableArrayType() && 9150 "Not expecting to generate map info for a variable array type!"); 9151 9152 // We need to know when we generating information for the first component 9153 const ValueDecl *VD = Cap->capturesThis() 9154 ? nullptr 9155 : Cap->getCapturedVar()->getCanonicalDecl(); 9156 9157 // for map(to: lambda): skip here, processing it in 9158 // generateDefaultMapInfo 9159 if (LambdasMap.count(VD)) 9160 return; 9161 9162 // If this declaration appears in a is_device_ptr clause we just have to 9163 // pass the pointer by value. If it is a reference to a declaration, we just 9164 // pass its value. 9165 if (DevPointersMap.count(VD)) { 9166 CombinedInfo.Exprs.push_back(VD); 9167 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9168 CombinedInfo.Pointers.push_back(Arg); 9169 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9170 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9171 /*isSigned=*/true)); 9172 CombinedInfo.Types.push_back( 9173 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9174 OMP_MAP_TARGET_PARAM); 9175 CombinedInfo.Mappers.push_back(nullptr); 9176 return; 9177 } 9178 9179 using MapData = 9180 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9181 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9182 const ValueDecl *, const Expr *>; 9183 SmallVector<MapData, 4> DeclComponentLists; 9184 assert(CurDir.is<const OMPExecutableDirective *>() && 9185 "Expect a executable directive"); 9186 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9187 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9188 const auto *EI = C->getVarRefs().begin(); 9189 for (const auto L : C->decl_component_lists(VD)) { 9190 const ValueDecl *VDecl, *Mapper; 9191 // The Expression is not correct if the mapping is implicit 9192 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9193 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9194 std::tie(VDecl, Components, Mapper) = L; 9195 assert(VDecl == VD && "We got information for the wrong declaration??"); 9196 assert(!Components.empty() && 9197 "Not expecting declaration with no component lists."); 9198 DeclComponentLists.emplace_back(Components, C->getMapType(), 9199 C->getMapTypeModifiers(), 9200 C->isImplicit(), Mapper, E); 9201 ++EI; 9202 } 9203 } 9204 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9205 const MapData &RHS) { 9206 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9207 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9208 bool HasPresent = 9209 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9210 bool HasAllocs = MapType == OMPC_MAP_alloc; 9211 MapModifiers = std::get<2>(RHS); 9212 MapType = std::get<1>(LHS); 9213 bool HasPresentR = 9214 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9215 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9216 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9217 }); 9218 9219 // Find overlapping elements (including the offset from the base element). 9220 llvm::SmallDenseMap< 9221 const MapData *, 9222 llvm::SmallVector< 9223 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9224 4> 9225 OverlappedData; 9226 size_t Count = 0; 9227 for (const MapData &L : DeclComponentLists) { 9228 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9229 OpenMPMapClauseKind MapType; 9230 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9231 bool IsImplicit; 9232 const ValueDecl *Mapper; 9233 const Expr *VarRef; 9234 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9235 L; 9236 ++Count; 9237 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9238 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9239 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9240 VarRef) = L1; 9241 auto CI = Components.rbegin(); 9242 auto CE = Components.rend(); 9243 auto SI = Components1.rbegin(); 9244 auto SE = Components1.rend(); 9245 for (; CI != CE && SI != SE; ++CI, ++SI) { 9246 if (CI->getAssociatedExpression()->getStmtClass() != 9247 SI->getAssociatedExpression()->getStmtClass()) 9248 break; 9249 // Are we dealing with different variables/fields? 9250 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9251 break; 9252 } 9253 // Found overlapping if, at least for one component, reached the head 9254 // of the components list. 9255 if (CI == CE || SI == SE) { 9256 // Ignore it if it is the same component. 9257 if (CI == CE && SI == SE) 9258 continue; 9259 const auto It = (SI == SE) ? CI : SI; 9260 // If one component is a pointer and another one is a kind of 9261 // dereference of this pointer (array subscript, section, dereference, 9262 // etc.), it is not an overlapping. 9263 // Same, if one component is a base and another component is a 9264 // dereferenced pointer memberexpr with the same base. 9265 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9266 (std::prev(It)->getAssociatedDeclaration() && 9267 std::prev(It) 9268 ->getAssociatedDeclaration() 9269 ->getType() 9270 ->isPointerType()) || 9271 (It->getAssociatedDeclaration() && 9272 It->getAssociatedDeclaration()->getType()->isPointerType() && 9273 std::next(It) != CE && std::next(It) != SE)) 9274 continue; 9275 const MapData &BaseData = CI == CE ? L : L1; 9276 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9277 SI == SE ? Components : Components1; 9278 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9279 OverlappedElements.getSecond().push_back(SubData); 9280 } 9281 } 9282 } 9283 // Sort the overlapped elements for each item. 9284 llvm::SmallVector<const FieldDecl *, 4> Layout; 9285 if (!OverlappedData.empty()) { 9286 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9287 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9288 while (BaseType != OrigType) { 9289 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9290 OrigType = BaseType->getPointeeOrArrayElementType(); 9291 } 9292 9293 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9294 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9295 else { 9296 const auto *RD = BaseType->getAsRecordDecl(); 9297 Layout.append(RD->field_begin(), RD->field_end()); 9298 } 9299 } 9300 for (auto &Pair : OverlappedData) { 9301 llvm::stable_sort( 9302 Pair.getSecond(), 9303 [&Layout]( 9304 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9305 OMPClauseMappableExprCommon::MappableExprComponentListRef 9306 Second) { 9307 auto CI = First.rbegin(); 9308 auto CE = First.rend(); 9309 auto SI = Second.rbegin(); 9310 auto SE = Second.rend(); 9311 for (; CI != CE && SI != SE; ++CI, ++SI) { 9312 if (CI->getAssociatedExpression()->getStmtClass() != 9313 SI->getAssociatedExpression()->getStmtClass()) 9314 break; 9315 // Are we dealing with different variables/fields? 9316 if (CI->getAssociatedDeclaration() != 9317 SI->getAssociatedDeclaration()) 9318 break; 9319 } 9320 9321 // Lists contain the same elements. 9322 if (CI == CE && SI == SE) 9323 return false; 9324 9325 // List with less elements is less than list with more elements. 9326 if (CI == CE || SI == SE) 9327 return CI == CE; 9328 9329 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9330 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9331 if (FD1->getParent() == FD2->getParent()) 9332 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9333 const auto *It = 9334 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9335 return FD == FD1 || FD == FD2; 9336 }); 9337 return *It == FD1; 9338 }); 9339 } 9340 9341 // Associated with a capture, because the mapping flags depend on it. 9342 // Go through all of the elements with the overlapped elements. 9343 bool IsFirstComponentList = true; 9344 for (const auto &Pair : OverlappedData) { 9345 const MapData &L = *Pair.getFirst(); 9346 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9347 OpenMPMapClauseKind MapType; 9348 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9349 bool IsImplicit; 9350 const ValueDecl *Mapper; 9351 const Expr *VarRef; 9352 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9353 L; 9354 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9355 OverlappedComponents = Pair.getSecond(); 9356 generateInfoForComponentList( 9357 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9358 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9359 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9360 IsFirstComponentList = false; 9361 } 9362 // Go through other elements without overlapped elements. 9363 for (const MapData &L : DeclComponentLists) { 9364 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9365 OpenMPMapClauseKind MapType; 9366 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9367 bool IsImplicit; 9368 const ValueDecl *Mapper; 9369 const Expr *VarRef; 9370 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9371 L; 9372 auto It = OverlappedData.find(&L); 9373 if (It == OverlappedData.end()) 9374 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9375 Components, CombinedInfo, PartialStruct, 9376 IsFirstComponentList, IsImplicit, Mapper, 9377 /*ForDeviceAddr=*/false, VD, VarRef); 9378 IsFirstComponentList = false; 9379 } 9380 } 9381 9382 /// Generate the default map information for a given capture \a CI, 9383 /// record field declaration \a RI and captured value \a CV. 9384 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9385 const FieldDecl &RI, llvm::Value *CV, 9386 MapCombinedInfoTy &CombinedInfo) const { 9387 bool IsImplicit = true; 9388 // Do the default mapping. 9389 if (CI.capturesThis()) { 9390 CombinedInfo.Exprs.push_back(nullptr); 9391 CombinedInfo.BasePointers.push_back(CV); 9392 CombinedInfo.Pointers.push_back(CV); 9393 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9394 CombinedInfo.Sizes.push_back( 9395 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9396 CGF.Int64Ty, /*isSigned=*/true)); 9397 // Default map type. 9398 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9399 } else if (CI.capturesVariableByCopy()) { 9400 const VarDecl *VD = CI.getCapturedVar(); 9401 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9402 CombinedInfo.BasePointers.push_back(CV); 9403 CombinedInfo.Pointers.push_back(CV); 9404 if (!RI.getType()->isAnyPointerType()) { 9405 // We have to signal to the runtime captures passed by value that are 9406 // not pointers. 9407 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9408 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9409 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9410 } else { 9411 // Pointers are implicitly mapped with a zero size and no flags 9412 // (other than first map that is added for all implicit maps). 9413 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9414 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9415 } 9416 auto I = FirstPrivateDecls.find(VD); 9417 if (I != FirstPrivateDecls.end()) 9418 IsImplicit = I->getSecond(); 9419 } else { 9420 assert(CI.capturesVariable() && "Expected captured reference."); 9421 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9422 QualType ElementType = PtrTy->getPointeeType(); 9423 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9424 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9425 // The default map type for a scalar/complex type is 'to' because by 9426 // default the value doesn't have to be retrieved. For an aggregate 9427 // type, the default is 'tofrom'. 9428 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9429 const VarDecl *VD = CI.getCapturedVar(); 9430 auto I = FirstPrivateDecls.find(VD); 9431 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9432 CombinedInfo.BasePointers.push_back(CV); 9433 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9434 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9435 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9436 AlignmentSource::Decl)); 9437 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9438 } else { 9439 CombinedInfo.Pointers.push_back(CV); 9440 } 9441 if (I != FirstPrivateDecls.end()) 9442 IsImplicit = I->getSecond(); 9443 } 9444 // Every default map produces a single argument which is a target parameter. 9445 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9446 9447 // Add flag stating this is an implicit map. 9448 if (IsImplicit) 9449 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9450 9451 // No user-defined mapper for default mapping. 9452 CombinedInfo.Mappers.push_back(nullptr); 9453 } 9454 }; 9455 } // anonymous namespace 9456 9457 static void emitNonContiguousDescriptor( 9458 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9459 CGOpenMPRuntime::TargetDataInfo &Info) { 9460 CodeGenModule &CGM = CGF.CGM; 9461 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9462 &NonContigInfo = CombinedInfo.NonContigInfo; 9463 9464 // Build an array of struct descriptor_dim and then assign it to 9465 // offload_args. 9466 // 9467 // struct descriptor_dim { 9468 // uint64_t offset; 9469 // uint64_t count; 9470 // uint64_t stride 9471 // }; 9472 ASTContext &C = CGF.getContext(); 9473 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9474 RecordDecl *RD; 9475 RD = C.buildImplicitRecord("descriptor_dim"); 9476 RD->startDefinition(); 9477 addFieldToRecordDecl(C, RD, Int64Ty); 9478 addFieldToRecordDecl(C, RD, Int64Ty); 9479 addFieldToRecordDecl(C, RD, Int64Ty); 9480 RD->completeDefinition(); 9481 QualType DimTy = C.getRecordType(RD); 9482 9483 enum { OffsetFD = 0, CountFD, StrideFD }; 9484 // We need two index variable here since the size of "Dims" is the same as the 9485 // size of Components, however, the size of offset, count, and stride is equal 9486 // to the size of base declaration that is non-contiguous. 9487 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9488 // Skip emitting ir if dimension size is 1 since it cannot be 9489 // non-contiguous. 9490 if (NonContigInfo.Dims[I] == 1) 9491 continue; 9492 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9493 QualType ArrayTy = 9494 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9495 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9496 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9497 unsigned RevIdx = EE - II - 1; 9498 LValue DimsLVal = CGF.MakeAddrLValue( 9499 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9500 // Offset 9501 LValue OffsetLVal = CGF.EmitLValueForField( 9502 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9503 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9504 // Count 9505 LValue CountLVal = CGF.EmitLValueForField( 9506 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9507 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9508 // Stride 9509 LValue StrideLVal = CGF.EmitLValueForField( 9510 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9511 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9512 } 9513 // args[I] = &dims 9514 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9515 DimsAddr, CGM.Int8PtrTy); 9516 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9517 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9518 Info.PointersArray, 0, I); 9519 Address PAddr = Address::deprecated(P, CGF.getPointerAlign()); 9520 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9521 ++L; 9522 } 9523 } 9524 9525 // Try to extract the base declaration from a `this->x` expression if possible. 9526 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9527 if (!E) 9528 return nullptr; 9529 9530 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9531 if (const MemberExpr *ME = 9532 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9533 return ME->getMemberDecl(); 9534 return nullptr; 9535 } 9536 9537 /// Emit a string constant containing the names of the values mapped to the 9538 /// offloading runtime library. 9539 llvm::Constant * 9540 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9541 MappableExprsHandler::MappingExprInfo &MapExprs) { 9542 9543 uint32_t SrcLocStrSize; 9544 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9545 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9546 9547 SourceLocation Loc; 9548 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9549 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9550 Loc = VD->getLocation(); 9551 else 9552 Loc = MapExprs.getMapExpr()->getExprLoc(); 9553 } else { 9554 Loc = MapExprs.getMapDecl()->getLocation(); 9555 } 9556 9557 std::string ExprName; 9558 if (MapExprs.getMapExpr()) { 9559 PrintingPolicy P(CGF.getContext().getLangOpts()); 9560 llvm::raw_string_ostream OS(ExprName); 9561 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9562 OS.flush(); 9563 } else { 9564 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9565 } 9566 9567 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9568 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9569 PLoc.getLine(), PLoc.getColumn(), 9570 SrcLocStrSize); 9571 } 9572 9573 /// Emit the arrays used to pass the captures and map information to the 9574 /// offloading runtime library. If there is no map or capture information, 9575 /// return nullptr by reference. 9576 static void emitOffloadingArrays( 9577 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9578 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9579 bool IsNonContiguous = false) { 9580 CodeGenModule &CGM = CGF.CGM; 9581 ASTContext &Ctx = CGF.getContext(); 9582 9583 // Reset the array information. 9584 Info.clearArrayInfo(); 9585 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9586 9587 if (Info.NumberOfPtrs) { 9588 // Detect if we have any capture size requiring runtime evaluation of the 9589 // size so that a constant array could be eventually used. 9590 bool hasRuntimeEvaluationCaptureSize = false; 9591 for (llvm::Value *S : CombinedInfo.Sizes) 9592 if (!isa<llvm::Constant>(S)) { 9593 hasRuntimeEvaluationCaptureSize = true; 9594 break; 9595 } 9596 9597 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9598 QualType PointerArrayType = Ctx.getConstantArrayType( 9599 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9600 /*IndexTypeQuals=*/0); 9601 9602 Info.BasePointersArray = 9603 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9604 Info.PointersArray = 9605 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9606 Address MappersArray = 9607 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9608 Info.MappersArray = MappersArray.getPointer(); 9609 9610 // If we don't have any VLA types or other types that require runtime 9611 // evaluation, we can use a constant array for the map sizes, otherwise we 9612 // need to fill up the arrays as we do for the pointers. 9613 QualType Int64Ty = 9614 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9615 if (hasRuntimeEvaluationCaptureSize) { 9616 QualType SizeArrayType = Ctx.getConstantArrayType( 9617 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9618 /*IndexTypeQuals=*/0); 9619 Info.SizesArray = 9620 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9621 } else { 9622 // We expect all the sizes to be constant, so we collect them to create 9623 // a constant array. 9624 SmallVector<llvm::Constant *, 16> ConstSizes; 9625 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9626 if (IsNonContiguous && 9627 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9628 ConstSizes.push_back(llvm::ConstantInt::get( 9629 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9630 } else { 9631 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9632 } 9633 } 9634 9635 auto *SizesArrayInit = llvm::ConstantArray::get( 9636 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9637 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9638 auto *SizesArrayGbl = new llvm::GlobalVariable( 9639 CGM.getModule(), SizesArrayInit->getType(), 9640 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9641 SizesArrayInit, Name); 9642 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9643 Info.SizesArray = SizesArrayGbl; 9644 } 9645 9646 // The map types are always constant so we don't need to generate code to 9647 // fill arrays. Instead, we create an array constant. 9648 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9649 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9650 std::string MaptypesName = 9651 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9652 auto *MapTypesArrayGbl = 9653 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9654 Info.MapTypesArray = MapTypesArrayGbl; 9655 9656 // The information types are only built if there is debug information 9657 // requested. 9658 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9659 Info.MapNamesArray = llvm::Constant::getNullValue( 9660 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9661 } else { 9662 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9663 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9664 }; 9665 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9666 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9667 std::string MapnamesName = 9668 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9669 auto *MapNamesArrayGbl = 9670 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9671 Info.MapNamesArray = MapNamesArrayGbl; 9672 } 9673 9674 // If there's a present map type modifier, it must not be applied to the end 9675 // of a region, so generate a separate map type array in that case. 9676 if (Info.separateBeginEndCalls()) { 9677 bool EndMapTypesDiffer = false; 9678 for (uint64_t &Type : Mapping) { 9679 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9680 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9681 EndMapTypesDiffer = true; 9682 } 9683 } 9684 if (EndMapTypesDiffer) { 9685 MapTypesArrayGbl = 9686 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9687 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9688 } 9689 } 9690 9691 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9692 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9693 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9694 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9695 Info.BasePointersArray, 0, I); 9696 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9697 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9698 Address BPAddr = 9699 Address::deprecated(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9700 CGF.Builder.CreateStore(BPVal, BPAddr); 9701 9702 if (Info.requiresDevicePointerInfo()) 9703 if (const ValueDecl *DevVD = 9704 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9705 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9706 9707 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9708 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9709 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9710 Info.PointersArray, 0, I); 9711 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9712 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9713 Address PAddr = 9714 Address::deprecated(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9715 CGF.Builder.CreateStore(PVal, PAddr); 9716 9717 if (hasRuntimeEvaluationCaptureSize) { 9718 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9719 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9720 Info.SizesArray, 9721 /*Idx0=*/0, 9722 /*Idx1=*/I); 9723 Address SAddr = 9724 Address::deprecated(S, Ctx.getTypeAlignInChars(Int64Ty)); 9725 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9726 CGM.Int64Ty, 9727 /*isSigned=*/true), 9728 SAddr); 9729 } 9730 9731 // Fill up the mapper array. 9732 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9733 if (CombinedInfo.Mappers[I]) { 9734 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9735 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9736 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9737 Info.HasMapper = true; 9738 } 9739 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9740 CGF.Builder.CreateStore(MFunc, MAddr); 9741 } 9742 } 9743 9744 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9745 Info.NumberOfPtrs == 0) 9746 return; 9747 9748 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9749 } 9750 9751 namespace { 9752 /// Additional arguments for emitOffloadingArraysArgument function. 9753 struct ArgumentsOptions { 9754 bool ForEndCall = false; 9755 ArgumentsOptions() = default; 9756 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9757 }; 9758 } // namespace 9759 9760 /// Emit the arguments to be passed to the runtime library based on the 9761 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9762 /// ForEndCall, emit map types to be passed for the end of the region instead of 9763 /// the beginning. 9764 static void emitOffloadingArraysArgument( 9765 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9766 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9767 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9768 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9769 const ArgumentsOptions &Options = ArgumentsOptions()) { 9770 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9771 "expected region end call to runtime only when end call is separate"); 9772 CodeGenModule &CGM = CGF.CGM; 9773 if (Info.NumberOfPtrs) { 9774 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9775 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9776 Info.BasePointersArray, 9777 /*Idx0=*/0, /*Idx1=*/0); 9778 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9779 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9780 Info.PointersArray, 9781 /*Idx0=*/0, 9782 /*Idx1=*/0); 9783 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9784 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9785 /*Idx0=*/0, /*Idx1=*/0); 9786 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9787 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9788 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9789 : Info.MapTypesArray, 9790 /*Idx0=*/0, 9791 /*Idx1=*/0); 9792 9793 // Only emit the mapper information arrays if debug information is 9794 // requested. 9795 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9796 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9797 else 9798 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9799 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9800 Info.MapNamesArray, 9801 /*Idx0=*/0, 9802 /*Idx1=*/0); 9803 // If there is no user-defined mapper, set the mapper array to nullptr to 9804 // avoid an unnecessary data privatization 9805 if (!Info.HasMapper) 9806 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9807 else 9808 MappersArrayArg = 9809 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9810 } else { 9811 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9812 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9813 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9814 MapTypesArrayArg = 9815 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9816 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9817 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9818 } 9819 } 9820 9821 /// Check for inner distribute directive. 9822 static const OMPExecutableDirective * 9823 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9824 const auto *CS = D.getInnermostCapturedStmt(); 9825 const auto *Body = 9826 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9827 const Stmt *ChildStmt = 9828 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9829 9830 if (const auto *NestedDir = 9831 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9832 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9833 switch (D.getDirectiveKind()) { 9834 case OMPD_target: 9835 if (isOpenMPDistributeDirective(DKind)) 9836 return NestedDir; 9837 if (DKind == OMPD_teams) { 9838 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9839 /*IgnoreCaptured=*/true); 9840 if (!Body) 9841 return nullptr; 9842 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9843 if (const auto *NND = 9844 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9845 DKind = NND->getDirectiveKind(); 9846 if (isOpenMPDistributeDirective(DKind)) 9847 return NND; 9848 } 9849 } 9850 return nullptr; 9851 case OMPD_target_teams: 9852 if (isOpenMPDistributeDirective(DKind)) 9853 return NestedDir; 9854 return nullptr; 9855 case OMPD_target_parallel: 9856 case OMPD_target_simd: 9857 case OMPD_target_parallel_for: 9858 case OMPD_target_parallel_for_simd: 9859 return nullptr; 9860 case OMPD_target_teams_distribute: 9861 case OMPD_target_teams_distribute_simd: 9862 case OMPD_target_teams_distribute_parallel_for: 9863 case OMPD_target_teams_distribute_parallel_for_simd: 9864 case OMPD_parallel: 9865 case OMPD_for: 9866 case OMPD_parallel_for: 9867 case OMPD_parallel_master: 9868 case OMPD_parallel_sections: 9869 case OMPD_for_simd: 9870 case OMPD_parallel_for_simd: 9871 case OMPD_cancel: 9872 case OMPD_cancellation_point: 9873 case OMPD_ordered: 9874 case OMPD_threadprivate: 9875 case OMPD_allocate: 9876 case OMPD_task: 9877 case OMPD_simd: 9878 case OMPD_tile: 9879 case OMPD_unroll: 9880 case OMPD_sections: 9881 case OMPD_section: 9882 case OMPD_single: 9883 case OMPD_master: 9884 case OMPD_critical: 9885 case OMPD_taskyield: 9886 case OMPD_barrier: 9887 case OMPD_taskwait: 9888 case OMPD_taskgroup: 9889 case OMPD_atomic: 9890 case OMPD_flush: 9891 case OMPD_depobj: 9892 case OMPD_scan: 9893 case OMPD_teams: 9894 case OMPD_target_data: 9895 case OMPD_target_exit_data: 9896 case OMPD_target_enter_data: 9897 case OMPD_distribute: 9898 case OMPD_distribute_simd: 9899 case OMPD_distribute_parallel_for: 9900 case OMPD_distribute_parallel_for_simd: 9901 case OMPD_teams_distribute: 9902 case OMPD_teams_distribute_simd: 9903 case OMPD_teams_distribute_parallel_for: 9904 case OMPD_teams_distribute_parallel_for_simd: 9905 case OMPD_target_update: 9906 case OMPD_declare_simd: 9907 case OMPD_declare_variant: 9908 case OMPD_begin_declare_variant: 9909 case OMPD_end_declare_variant: 9910 case OMPD_declare_target: 9911 case OMPD_end_declare_target: 9912 case OMPD_declare_reduction: 9913 case OMPD_declare_mapper: 9914 case OMPD_taskloop: 9915 case OMPD_taskloop_simd: 9916 case OMPD_master_taskloop: 9917 case OMPD_master_taskloop_simd: 9918 case OMPD_parallel_master_taskloop: 9919 case OMPD_parallel_master_taskloop_simd: 9920 case OMPD_requires: 9921 case OMPD_metadirective: 9922 case OMPD_unknown: 9923 default: 9924 llvm_unreachable("Unexpected directive."); 9925 } 9926 } 9927 9928 return nullptr; 9929 } 9930 9931 /// Emit the user-defined mapper function. The code generation follows the 9932 /// pattern in the example below. 9933 /// \code 9934 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9935 /// void *base, void *begin, 9936 /// int64_t size, int64_t type, 9937 /// void *name = nullptr) { 9938 /// // Allocate space for an array section first or add a base/begin for 9939 /// // pointer dereference. 9940 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9941 /// !maptype.IsDelete) 9942 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9943 /// size*sizeof(Ty), clearToFromMember(type)); 9944 /// // Map members. 9945 /// for (unsigned i = 0; i < size; i++) { 9946 /// // For each component specified by this mapper: 9947 /// for (auto c : begin[i]->all_components) { 9948 /// if (c.hasMapper()) 9949 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9950 /// c.arg_type, c.arg_name); 9951 /// else 9952 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9953 /// c.arg_begin, c.arg_size, c.arg_type, 9954 /// c.arg_name); 9955 /// } 9956 /// } 9957 /// // Delete the array section. 9958 /// if (size > 1 && maptype.IsDelete) 9959 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9960 /// size*sizeof(Ty), clearToFromMember(type)); 9961 /// } 9962 /// \endcode 9963 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9964 CodeGenFunction *CGF) { 9965 if (UDMMap.count(D) > 0) 9966 return; 9967 ASTContext &C = CGM.getContext(); 9968 QualType Ty = D->getType(); 9969 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9970 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9971 auto *MapperVarDecl = 9972 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9973 SourceLocation Loc = D->getLocation(); 9974 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9975 9976 // Prepare mapper function arguments and attributes. 9977 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9978 C.VoidPtrTy, ImplicitParamDecl::Other); 9979 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9980 ImplicitParamDecl::Other); 9981 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9982 C.VoidPtrTy, ImplicitParamDecl::Other); 9983 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9984 ImplicitParamDecl::Other); 9985 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9986 ImplicitParamDecl::Other); 9987 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9988 ImplicitParamDecl::Other); 9989 FunctionArgList Args; 9990 Args.push_back(&HandleArg); 9991 Args.push_back(&BaseArg); 9992 Args.push_back(&BeginArg); 9993 Args.push_back(&SizeArg); 9994 Args.push_back(&TypeArg); 9995 Args.push_back(&NameArg); 9996 const CGFunctionInfo &FnInfo = 9997 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9998 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9999 SmallString<64> TyStr; 10000 llvm::raw_svector_ostream Out(TyStr); 10001 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 10002 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 10003 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 10004 Name, &CGM.getModule()); 10005 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 10006 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 10007 // Start the mapper function code generation. 10008 CodeGenFunction MapperCGF(CGM); 10009 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 10010 // Compute the starting and end addresses of array elements. 10011 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 10012 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 10013 C.getPointerType(Int64Ty), Loc); 10014 // Prepare common arguments for array initiation and deletion. 10015 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 10016 MapperCGF.GetAddrOfLocalVar(&HandleArg), 10017 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10018 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10019 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10020 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10021 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10022 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10023 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10024 // Convert the size in bytes into the number of array elements. 10025 Size = MapperCGF.Builder.CreateExactUDiv( 10026 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10027 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10028 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10029 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP( 10030 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size); 10031 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10032 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10033 C.getPointerType(Int64Ty), Loc); 10034 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10035 MapperCGF.GetAddrOfLocalVar(&NameArg), 10036 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10037 10038 // Emit array initiation if this is an array section and \p MapType indicates 10039 // that memory allocation is required. 10040 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10041 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10042 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10043 10044 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10045 10046 // Emit the loop header block. 10047 MapperCGF.EmitBlock(HeadBB); 10048 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10049 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10050 // Evaluate whether the initial condition is satisfied. 10051 llvm::Value *IsEmpty = 10052 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10053 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10054 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10055 10056 // Emit the loop body block. 10057 MapperCGF.EmitBlock(BodyBB); 10058 llvm::BasicBlock *LastBB = BodyBB; 10059 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10060 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10061 PtrPHI->addIncoming(PtrBegin, EntryBB); 10062 Address PtrCurrent = 10063 Address::deprecated(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 10064 .getAlignment() 10065 .alignmentOfArrayElement(ElementSize)); 10066 // Privatize the declared variable of mapper to be the current array element. 10067 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10068 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 10069 (void)Scope.Privatize(); 10070 10071 // Get map clause information. Fill up the arrays with all mapped variables. 10072 MappableExprsHandler::MapCombinedInfoTy Info; 10073 MappableExprsHandler MEHandler(*D, MapperCGF); 10074 MEHandler.generateAllInfoForMapper(Info); 10075 10076 // Call the runtime API __tgt_mapper_num_components to get the number of 10077 // pre-existing components. 10078 llvm::Value *OffloadingArgs[] = {Handle}; 10079 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10080 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10081 OMPRTL___tgt_mapper_num_components), 10082 OffloadingArgs); 10083 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10084 PreviousSize, 10085 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10086 10087 // Fill up the runtime mapper handle for all components. 10088 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10089 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10090 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10091 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10092 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10093 llvm::Value *CurSizeArg = Info.Sizes[I]; 10094 llvm::Value *CurNameArg = 10095 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10096 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10097 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10098 10099 // Extract the MEMBER_OF field from the map type. 10100 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10101 llvm::Value *MemberMapType = 10102 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10103 10104 // Combine the map type inherited from user-defined mapper with that 10105 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10106 // bits of the \a MapType, which is the input argument of the mapper 10107 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10108 // bits of MemberMapType. 10109 // [OpenMP 5.0], 1.2.6. map-type decay. 10110 // | alloc | to | from | tofrom | release | delete 10111 // ---------------------------------------------------------- 10112 // alloc | alloc | alloc | alloc | alloc | release | delete 10113 // to | alloc | to | alloc | to | release | delete 10114 // from | alloc | alloc | from | from | release | delete 10115 // tofrom | alloc | to | from | tofrom | release | delete 10116 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10117 MapType, 10118 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10119 MappableExprsHandler::OMP_MAP_FROM)); 10120 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10121 llvm::BasicBlock *AllocElseBB = 10122 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10123 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10124 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10125 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10126 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10127 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10128 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10129 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10130 MapperCGF.EmitBlock(AllocBB); 10131 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10132 MemberMapType, 10133 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10134 MappableExprsHandler::OMP_MAP_FROM))); 10135 MapperCGF.Builder.CreateBr(EndBB); 10136 MapperCGF.EmitBlock(AllocElseBB); 10137 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10138 LeftToFrom, 10139 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10140 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10141 // In case of to, clear OMP_MAP_FROM. 10142 MapperCGF.EmitBlock(ToBB); 10143 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10144 MemberMapType, 10145 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10146 MapperCGF.Builder.CreateBr(EndBB); 10147 MapperCGF.EmitBlock(ToElseBB); 10148 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10149 LeftToFrom, 10150 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10151 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10152 // In case of from, clear OMP_MAP_TO. 10153 MapperCGF.EmitBlock(FromBB); 10154 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10155 MemberMapType, 10156 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10157 // In case of tofrom, do nothing. 10158 MapperCGF.EmitBlock(EndBB); 10159 LastBB = EndBB; 10160 llvm::PHINode *CurMapType = 10161 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10162 CurMapType->addIncoming(AllocMapType, AllocBB); 10163 CurMapType->addIncoming(ToMapType, ToBB); 10164 CurMapType->addIncoming(FromMapType, FromBB); 10165 CurMapType->addIncoming(MemberMapType, ToElseBB); 10166 10167 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10168 CurSizeArg, CurMapType, CurNameArg}; 10169 if (Info.Mappers[I]) { 10170 // Call the corresponding mapper function. 10171 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10172 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10173 assert(MapperFunc && "Expect a valid mapper function is available."); 10174 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10175 } else { 10176 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10177 // data structure. 10178 MapperCGF.EmitRuntimeCall( 10179 OMPBuilder.getOrCreateRuntimeFunction( 10180 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10181 OffloadingArgs); 10182 } 10183 } 10184 10185 // Update the pointer to point to the next element that needs to be mapped, 10186 // and check whether we have mapped all elements. 10187 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType(); 10188 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10189 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10190 PtrPHI->addIncoming(PtrNext, LastBB); 10191 llvm::Value *IsDone = 10192 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10193 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10194 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10195 10196 MapperCGF.EmitBlock(ExitBB); 10197 // Emit array deletion if this is an array section and \p MapType indicates 10198 // that deletion is required. 10199 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10200 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10201 10202 // Emit the function exit block. 10203 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10204 MapperCGF.FinishFunction(); 10205 UDMMap.try_emplace(D, Fn); 10206 if (CGF) { 10207 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10208 Decls.second.push_back(D); 10209 } 10210 } 10211 10212 /// Emit the array initialization or deletion portion for user-defined mapper 10213 /// code generation. First, it evaluates whether an array section is mapped and 10214 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10215 /// true, and \a MapType indicates to not delete this array, array 10216 /// initialization code is generated. If \a IsInit is false, and \a MapType 10217 /// indicates to not this array, array deletion code is generated. 10218 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10219 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10220 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10221 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10222 bool IsInit) { 10223 StringRef Prefix = IsInit ? ".init" : ".del"; 10224 10225 // Evaluate if this is an array section. 10226 llvm::BasicBlock *BodyBB = 10227 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10228 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10229 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10230 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10231 MapType, 10232 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10233 llvm::Value *DeleteCond; 10234 llvm::Value *Cond; 10235 if (IsInit) { 10236 // base != begin? 10237 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 10238 // IsPtrAndObj? 10239 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10240 MapType, 10241 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10242 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10243 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10244 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10245 DeleteCond = MapperCGF.Builder.CreateIsNull( 10246 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10247 } else { 10248 Cond = IsArray; 10249 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10250 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10251 } 10252 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10253 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10254 10255 MapperCGF.EmitBlock(BodyBB); 10256 // Get the array size by multiplying element size and element number (i.e., \p 10257 // Size). 10258 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10259 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10260 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10261 // memory allocation/deletion purpose only. 10262 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10263 MapType, 10264 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10265 MappableExprsHandler::OMP_MAP_FROM))); 10266 MapTypeArg = MapperCGF.Builder.CreateOr( 10267 MapTypeArg, 10268 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10269 10270 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10271 // data structure. 10272 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10273 ArraySize, MapTypeArg, MapName}; 10274 MapperCGF.EmitRuntimeCall( 10275 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10276 OMPRTL___tgt_push_mapper_component), 10277 OffloadingArgs); 10278 } 10279 10280 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10281 const OMPDeclareMapperDecl *D) { 10282 auto I = UDMMap.find(D); 10283 if (I != UDMMap.end()) 10284 return I->second; 10285 emitUserDefinedMapper(D); 10286 return UDMMap.lookup(D); 10287 } 10288 10289 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10290 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10291 llvm::Value *DeviceID, 10292 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10293 const OMPLoopDirective &D)> 10294 SizeEmitter) { 10295 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10296 const OMPExecutableDirective *TD = &D; 10297 // Get nested teams distribute kind directive, if any. 10298 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10299 TD = getNestedDistributeDirective(CGM.getContext(), D); 10300 if (!TD) 10301 return; 10302 const auto *LD = cast<OMPLoopDirective>(TD); 10303 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10304 PrePostActionTy &) { 10305 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10306 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10307 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10308 CGF.EmitRuntimeCall( 10309 OMPBuilder.getOrCreateRuntimeFunction( 10310 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10311 Args); 10312 } 10313 }; 10314 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10315 } 10316 10317 void CGOpenMPRuntime::emitTargetCall( 10318 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10319 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10320 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10321 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10322 const OMPLoopDirective &D)> 10323 SizeEmitter) { 10324 if (!CGF.HaveInsertPoint()) 10325 return; 10326 10327 assert(OutlinedFn && "Invalid outlined function!"); 10328 10329 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10330 D.hasClausesOfKind<OMPNowaitClause>(); 10331 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10332 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10333 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10334 PrePostActionTy &) { 10335 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10336 }; 10337 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10338 10339 CodeGenFunction::OMPTargetDataInfo InputInfo; 10340 llvm::Value *MapTypesArray = nullptr; 10341 llvm::Value *MapNamesArray = nullptr; 10342 // Fill up the pointer arrays and transfer execution to the device. 10343 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10344 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10345 &CapturedVars, 10346 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10347 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10348 // Reverse offloading is not supported, so just execute on the host. 10349 if (RequiresOuterTask) { 10350 CapturedVars.clear(); 10351 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10352 } 10353 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10354 return; 10355 } 10356 10357 // On top of the arrays that were filled up, the target offloading call 10358 // takes as arguments the device id as well as the host pointer. The host 10359 // pointer is used by the runtime library to identify the current target 10360 // region, so it only has to be unique and not necessarily point to 10361 // anything. It could be the pointer to the outlined function that 10362 // implements the target region, but we aren't using that so that the 10363 // compiler doesn't need to keep that, and could therefore inline the host 10364 // function if proven worthwhile during optimization. 10365 10366 // From this point on, we need to have an ID of the target region defined. 10367 assert(OutlinedFnID && "Invalid outlined function ID!"); 10368 10369 // Emit device ID if any. 10370 llvm::Value *DeviceID; 10371 if (Device.getPointer()) { 10372 assert((Device.getInt() == OMPC_DEVICE_unknown || 10373 Device.getInt() == OMPC_DEVICE_device_num) && 10374 "Expected device_num modifier."); 10375 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10376 DeviceID = 10377 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10378 } else { 10379 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10380 } 10381 10382 // Emit the number of elements in the offloading arrays. 10383 llvm::Value *PointerNum = 10384 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10385 10386 // Return value of the runtime offloading call. 10387 llvm::Value *Return; 10388 10389 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10390 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10391 10392 // Source location for the ident struct 10393 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10394 10395 // Emit tripcount for the target loop-based directive. 10396 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10397 10398 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10399 // The target region is an outlined function launched by the runtime 10400 // via calls __tgt_target() or __tgt_target_teams(). 10401 // 10402 // __tgt_target() launches a target region with one team and one thread, 10403 // executing a serial region. This master thread may in turn launch 10404 // more threads within its team upon encountering a parallel region, 10405 // however, no additional teams can be launched on the device. 10406 // 10407 // __tgt_target_teams() launches a target region with one or more teams, 10408 // each with one or more threads. This call is required for target 10409 // constructs such as: 10410 // 'target teams' 10411 // 'target' / 'teams' 10412 // 'target teams distribute parallel for' 10413 // 'target parallel' 10414 // and so on. 10415 // 10416 // Note that on the host and CPU targets, the runtime implementation of 10417 // these calls simply call the outlined function without forking threads. 10418 // The outlined functions themselves have runtime calls to 10419 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10420 // the compiler in emitTeamsCall() and emitParallelCall(). 10421 // 10422 // In contrast, on the NVPTX target, the implementation of 10423 // __tgt_target_teams() launches a GPU kernel with the requested number 10424 // of teams and threads so no additional calls to the runtime are required. 10425 if (NumTeams) { 10426 // If we have NumTeams defined this means that we have an enclosed teams 10427 // region. Therefore we also expect to have NumThreads defined. These two 10428 // values should be defined in the presence of a teams directive, 10429 // regardless of having any clauses associated. If the user is using teams 10430 // but no clauses, these two values will be the default that should be 10431 // passed to the runtime library - a 32-bit integer with the value zero. 10432 assert(NumThreads && "Thread limit expression should be available along " 10433 "with number of teams."); 10434 SmallVector<llvm::Value *> OffloadingArgs = { 10435 RTLoc, 10436 DeviceID, 10437 OutlinedFnID, 10438 PointerNum, 10439 InputInfo.BasePointersArray.getPointer(), 10440 InputInfo.PointersArray.getPointer(), 10441 InputInfo.SizesArray.getPointer(), 10442 MapTypesArray, 10443 MapNamesArray, 10444 InputInfo.MappersArray.getPointer(), 10445 NumTeams, 10446 NumThreads}; 10447 if (HasNowait) { 10448 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10449 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10450 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10451 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10452 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10453 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10454 } 10455 Return = CGF.EmitRuntimeCall( 10456 OMPBuilder.getOrCreateRuntimeFunction( 10457 CGM.getModule(), HasNowait 10458 ? OMPRTL___tgt_target_teams_nowait_mapper 10459 : OMPRTL___tgt_target_teams_mapper), 10460 OffloadingArgs); 10461 } else { 10462 SmallVector<llvm::Value *> OffloadingArgs = { 10463 RTLoc, 10464 DeviceID, 10465 OutlinedFnID, 10466 PointerNum, 10467 InputInfo.BasePointersArray.getPointer(), 10468 InputInfo.PointersArray.getPointer(), 10469 InputInfo.SizesArray.getPointer(), 10470 MapTypesArray, 10471 MapNamesArray, 10472 InputInfo.MappersArray.getPointer()}; 10473 if (HasNowait) { 10474 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10475 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10476 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10477 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10478 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10479 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10480 } 10481 Return = CGF.EmitRuntimeCall( 10482 OMPBuilder.getOrCreateRuntimeFunction( 10483 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10484 : OMPRTL___tgt_target_mapper), 10485 OffloadingArgs); 10486 } 10487 10488 // Check the error code and execute the host version if required. 10489 llvm::BasicBlock *OffloadFailedBlock = 10490 CGF.createBasicBlock("omp_offload.failed"); 10491 llvm::BasicBlock *OffloadContBlock = 10492 CGF.createBasicBlock("omp_offload.cont"); 10493 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10494 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10495 10496 CGF.EmitBlock(OffloadFailedBlock); 10497 if (RequiresOuterTask) { 10498 CapturedVars.clear(); 10499 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10500 } 10501 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10502 CGF.EmitBranch(OffloadContBlock); 10503 10504 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10505 }; 10506 10507 // Notify that the host version must be executed. 10508 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10509 RequiresOuterTask](CodeGenFunction &CGF, 10510 PrePostActionTy &) { 10511 if (RequiresOuterTask) { 10512 CapturedVars.clear(); 10513 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10514 } 10515 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10516 }; 10517 10518 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10519 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10520 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10521 // Fill up the arrays with all the captured variables. 10522 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10523 10524 // Get mappable expression information. 10525 MappableExprsHandler MEHandler(D, CGF); 10526 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10527 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10528 10529 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10530 auto *CV = CapturedVars.begin(); 10531 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10532 CE = CS.capture_end(); 10533 CI != CE; ++CI, ++RI, ++CV) { 10534 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10535 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10536 10537 // VLA sizes are passed to the outlined region by copy and do not have map 10538 // information associated. 10539 if (CI->capturesVariableArrayType()) { 10540 CurInfo.Exprs.push_back(nullptr); 10541 CurInfo.BasePointers.push_back(*CV); 10542 CurInfo.Pointers.push_back(*CV); 10543 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10544 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10545 // Copy to the device as an argument. No need to retrieve it. 10546 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10547 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10548 MappableExprsHandler::OMP_MAP_IMPLICIT); 10549 CurInfo.Mappers.push_back(nullptr); 10550 } else { 10551 // If we have any information in the map clause, we use it, otherwise we 10552 // just do a default mapping. 10553 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10554 if (!CI->capturesThis()) 10555 MappedVarSet.insert(CI->getCapturedVar()); 10556 else 10557 MappedVarSet.insert(nullptr); 10558 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10559 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10560 // Generate correct mapping for variables captured by reference in 10561 // lambdas. 10562 if (CI->capturesVariable()) 10563 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10564 CurInfo, LambdaPointers); 10565 } 10566 // We expect to have at least an element of information for this capture. 10567 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10568 "Non-existing map pointer for capture!"); 10569 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10570 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10571 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10572 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10573 "Inconsistent map information sizes!"); 10574 10575 // If there is an entry in PartialStruct it means we have a struct with 10576 // individual members mapped. Emit an extra combined entry. 10577 if (PartialStruct.Base.isValid()) { 10578 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10579 MEHandler.emitCombinedEntry( 10580 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10581 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10582 } 10583 10584 // We need to append the results of this capture to what we already have. 10585 CombinedInfo.append(CurInfo); 10586 } 10587 // Adjust MEMBER_OF flags for the lambdas captures. 10588 MEHandler.adjustMemberOfForLambdaCaptures( 10589 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10590 CombinedInfo.Types); 10591 // Map any list items in a map clause that were not captures because they 10592 // weren't referenced within the construct. 10593 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10594 10595 TargetDataInfo Info; 10596 // Fill up the arrays and create the arguments. 10597 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10598 emitOffloadingArraysArgument( 10599 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10600 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10601 {/*ForEndCall=*/false}); 10602 10603 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10604 InputInfo.BasePointersArray = 10605 Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign()); 10606 InputInfo.PointersArray = 10607 Address::deprecated(Info.PointersArray, CGM.getPointerAlign()); 10608 InputInfo.SizesArray = 10609 Address::deprecated(Info.SizesArray, CGM.getPointerAlign()); 10610 InputInfo.MappersArray = 10611 Address::deprecated(Info.MappersArray, CGM.getPointerAlign()); 10612 MapTypesArray = Info.MapTypesArray; 10613 MapNamesArray = Info.MapNamesArray; 10614 if (RequiresOuterTask) 10615 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10616 else 10617 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10618 }; 10619 10620 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10621 CodeGenFunction &CGF, PrePostActionTy &) { 10622 if (RequiresOuterTask) { 10623 CodeGenFunction::OMPTargetDataInfo InputInfo; 10624 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10625 } else { 10626 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10627 } 10628 }; 10629 10630 // If we have a target function ID it means that we need to support 10631 // offloading, otherwise, just execute on the host. We need to execute on host 10632 // regardless of the conditional in the if clause if, e.g., the user do not 10633 // specify target triples. 10634 if (OutlinedFnID) { 10635 if (IfCond) { 10636 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10637 } else { 10638 RegionCodeGenTy ThenRCG(TargetThenGen); 10639 ThenRCG(CGF); 10640 } 10641 } else { 10642 RegionCodeGenTy ElseRCG(TargetElseGen); 10643 ElseRCG(CGF); 10644 } 10645 } 10646 10647 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10648 StringRef ParentName) { 10649 if (!S) 10650 return; 10651 10652 // Codegen OMP target directives that offload compute to the device. 10653 bool RequiresDeviceCodegen = 10654 isa<OMPExecutableDirective>(S) && 10655 isOpenMPTargetExecutionDirective( 10656 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10657 10658 if (RequiresDeviceCodegen) { 10659 const auto &E = *cast<OMPExecutableDirective>(S); 10660 unsigned DeviceID; 10661 unsigned FileID; 10662 unsigned Line; 10663 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10664 FileID, Line); 10665 10666 // Is this a target region that should not be emitted as an entry point? If 10667 // so just signal we are done with this target region. 10668 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10669 ParentName, Line)) 10670 return; 10671 10672 switch (E.getDirectiveKind()) { 10673 case OMPD_target: 10674 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10675 cast<OMPTargetDirective>(E)); 10676 break; 10677 case OMPD_target_parallel: 10678 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10679 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10680 break; 10681 case OMPD_target_teams: 10682 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10683 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10684 break; 10685 case OMPD_target_teams_distribute: 10686 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10687 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10688 break; 10689 case OMPD_target_teams_distribute_simd: 10690 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10691 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10692 break; 10693 case OMPD_target_parallel_for: 10694 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10695 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10696 break; 10697 case OMPD_target_parallel_for_simd: 10698 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10699 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10700 break; 10701 case OMPD_target_simd: 10702 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10703 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10704 break; 10705 case OMPD_target_teams_distribute_parallel_for: 10706 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10707 CGM, ParentName, 10708 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10709 break; 10710 case OMPD_target_teams_distribute_parallel_for_simd: 10711 CodeGenFunction:: 10712 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10713 CGM, ParentName, 10714 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10715 break; 10716 case OMPD_parallel: 10717 case OMPD_for: 10718 case OMPD_parallel_for: 10719 case OMPD_parallel_master: 10720 case OMPD_parallel_sections: 10721 case OMPD_for_simd: 10722 case OMPD_parallel_for_simd: 10723 case OMPD_cancel: 10724 case OMPD_cancellation_point: 10725 case OMPD_ordered: 10726 case OMPD_threadprivate: 10727 case OMPD_allocate: 10728 case OMPD_task: 10729 case OMPD_simd: 10730 case OMPD_tile: 10731 case OMPD_unroll: 10732 case OMPD_sections: 10733 case OMPD_section: 10734 case OMPD_single: 10735 case OMPD_master: 10736 case OMPD_critical: 10737 case OMPD_taskyield: 10738 case OMPD_barrier: 10739 case OMPD_taskwait: 10740 case OMPD_taskgroup: 10741 case OMPD_atomic: 10742 case OMPD_flush: 10743 case OMPD_depobj: 10744 case OMPD_scan: 10745 case OMPD_teams: 10746 case OMPD_target_data: 10747 case OMPD_target_exit_data: 10748 case OMPD_target_enter_data: 10749 case OMPD_distribute: 10750 case OMPD_distribute_simd: 10751 case OMPD_distribute_parallel_for: 10752 case OMPD_distribute_parallel_for_simd: 10753 case OMPD_teams_distribute: 10754 case OMPD_teams_distribute_simd: 10755 case OMPD_teams_distribute_parallel_for: 10756 case OMPD_teams_distribute_parallel_for_simd: 10757 case OMPD_target_update: 10758 case OMPD_declare_simd: 10759 case OMPD_declare_variant: 10760 case OMPD_begin_declare_variant: 10761 case OMPD_end_declare_variant: 10762 case OMPD_declare_target: 10763 case OMPD_end_declare_target: 10764 case OMPD_declare_reduction: 10765 case OMPD_declare_mapper: 10766 case OMPD_taskloop: 10767 case OMPD_taskloop_simd: 10768 case OMPD_master_taskloop: 10769 case OMPD_master_taskloop_simd: 10770 case OMPD_parallel_master_taskloop: 10771 case OMPD_parallel_master_taskloop_simd: 10772 case OMPD_requires: 10773 case OMPD_metadirective: 10774 case OMPD_unknown: 10775 default: 10776 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10777 } 10778 return; 10779 } 10780 10781 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10782 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10783 return; 10784 10785 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10786 return; 10787 } 10788 10789 // If this is a lambda function, look into its body. 10790 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10791 S = L->getBody(); 10792 10793 // Keep looking for target regions recursively. 10794 for (const Stmt *II : S->children()) 10795 scanForTargetRegionsFunctions(II, ParentName); 10796 } 10797 10798 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10799 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10800 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10801 if (!DevTy) 10802 return false; 10803 // Do not emit device_type(nohost) functions for the host. 10804 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10805 return true; 10806 // Do not emit device_type(host) functions for the device. 10807 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10808 return true; 10809 return false; 10810 } 10811 10812 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10813 // If emitting code for the host, we do not process FD here. Instead we do 10814 // the normal code generation. 10815 if (!CGM.getLangOpts().OpenMPIsDevice) { 10816 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10817 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10818 CGM.getLangOpts().OpenMPIsDevice)) 10819 return true; 10820 return false; 10821 } 10822 10823 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10824 // Try to detect target regions in the function. 10825 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10826 StringRef Name = CGM.getMangledName(GD); 10827 scanForTargetRegionsFunctions(FD->getBody(), Name); 10828 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10829 CGM.getLangOpts().OpenMPIsDevice)) 10830 return true; 10831 } 10832 10833 // Do not to emit function if it is not marked as declare target. 10834 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10835 AlreadyEmittedTargetDecls.count(VD) == 0; 10836 } 10837 10838 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10839 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10840 CGM.getLangOpts().OpenMPIsDevice)) 10841 return true; 10842 10843 if (!CGM.getLangOpts().OpenMPIsDevice) 10844 return false; 10845 10846 // Check if there are Ctors/Dtors in this declaration and look for target 10847 // regions in it. We use the complete variant to produce the kernel name 10848 // mangling. 10849 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10850 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10851 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10852 StringRef ParentName = 10853 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10854 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10855 } 10856 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10857 StringRef ParentName = 10858 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10859 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10860 } 10861 } 10862 10863 // Do not to emit variable if it is not marked as declare target. 10864 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10865 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10866 cast<VarDecl>(GD.getDecl())); 10867 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10868 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10869 HasRequiresUnifiedSharedMemory)) { 10870 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10871 return true; 10872 } 10873 return false; 10874 } 10875 10876 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10877 llvm::Constant *Addr) { 10878 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10879 !CGM.getLangOpts().OpenMPIsDevice) 10880 return; 10881 10882 // If we have host/nohost variables, they do not need to be registered. 10883 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10884 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10885 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10886 return; 10887 10888 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10889 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10890 if (!Res) { 10891 if (CGM.getLangOpts().OpenMPIsDevice) { 10892 // Register non-target variables being emitted in device code (debug info 10893 // may cause this). 10894 StringRef VarName = CGM.getMangledName(VD); 10895 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10896 } 10897 return; 10898 } 10899 // Register declare target variables. 10900 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10901 StringRef VarName; 10902 CharUnits VarSize; 10903 llvm::GlobalValue::LinkageTypes Linkage; 10904 10905 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10906 !HasRequiresUnifiedSharedMemory) { 10907 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10908 VarName = CGM.getMangledName(VD); 10909 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10910 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10911 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10912 } else { 10913 VarSize = CharUnits::Zero(); 10914 } 10915 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10916 // Temp solution to prevent optimizations of the internal variables. 10917 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10918 // Do not create a "ref-variable" if the original is not also available 10919 // on the host. 10920 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10921 return; 10922 std::string RefName = getName({VarName, "ref"}); 10923 if (!CGM.GetGlobalValue(RefName)) { 10924 llvm::Constant *AddrRef = 10925 getOrCreateInternalVariable(Addr->getType(), RefName); 10926 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10927 GVAddrRef->setConstant(/*Val=*/true); 10928 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10929 GVAddrRef->setInitializer(Addr); 10930 CGM.addCompilerUsedGlobal(GVAddrRef); 10931 } 10932 } 10933 } else { 10934 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10935 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10936 HasRequiresUnifiedSharedMemory)) && 10937 "Declare target attribute must link or to with unified memory."); 10938 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10939 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10940 else 10941 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10942 10943 if (CGM.getLangOpts().OpenMPIsDevice) { 10944 VarName = Addr->getName(); 10945 Addr = nullptr; 10946 } else { 10947 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10948 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10949 } 10950 VarSize = CGM.getPointerSize(); 10951 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10952 } 10953 10954 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10955 VarName, Addr, VarSize, Flags, Linkage); 10956 } 10957 10958 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10959 if (isa<FunctionDecl>(GD.getDecl()) || 10960 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10961 return emitTargetFunctions(GD); 10962 10963 return emitTargetGlobalVariable(GD); 10964 } 10965 10966 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10967 for (const VarDecl *VD : DeferredGlobalVariables) { 10968 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10969 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10970 if (!Res) 10971 continue; 10972 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10973 !HasRequiresUnifiedSharedMemory) { 10974 CGM.EmitGlobal(VD); 10975 } else { 10976 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10977 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10978 HasRequiresUnifiedSharedMemory)) && 10979 "Expected link clause or to clause with unified memory."); 10980 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10981 } 10982 } 10983 } 10984 10985 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10986 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10987 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10988 " Expected target-based directive."); 10989 } 10990 10991 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10992 for (const OMPClause *Clause : D->clauselists()) { 10993 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10994 HasRequiresUnifiedSharedMemory = true; 10995 } else if (const auto *AC = 10996 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10997 switch (AC->getAtomicDefaultMemOrderKind()) { 10998 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10999 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 11000 break; 11001 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 11002 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 11003 break; 11004 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 11005 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 11006 break; 11007 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 11008 break; 11009 } 11010 } 11011 } 11012 } 11013 11014 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 11015 return RequiresAtomicOrdering; 11016 } 11017 11018 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11019 LangAS &AS) { 11020 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11021 return false; 11022 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11023 switch(A->getAllocatorType()) { 11024 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11025 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11026 // Not supported, fallback to the default mem space. 11027 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11028 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11029 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11030 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11031 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11032 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11033 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11034 AS = LangAS::Default; 11035 return true; 11036 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11037 llvm_unreachable("Expected predefined allocator for the variables with the " 11038 "static storage."); 11039 } 11040 return false; 11041 } 11042 11043 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11044 return HasRequiresUnifiedSharedMemory; 11045 } 11046 11047 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11048 CodeGenModule &CGM) 11049 : CGM(CGM) { 11050 if (CGM.getLangOpts().OpenMPIsDevice) { 11051 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11052 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11053 } 11054 } 11055 11056 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11057 if (CGM.getLangOpts().OpenMPIsDevice) 11058 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11059 } 11060 11061 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11062 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11063 return true; 11064 11065 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11066 // Do not to emit function if it is marked as declare target as it was already 11067 // emitted. 11068 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11069 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11070 if (auto *F = dyn_cast_or_null<llvm::Function>( 11071 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11072 return !F->isDeclaration(); 11073 return false; 11074 } 11075 return true; 11076 } 11077 11078 return !AlreadyEmittedTargetDecls.insert(D).second; 11079 } 11080 11081 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11082 // If we don't have entries or if we are emitting code for the device, we 11083 // don't need to do anything. 11084 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11085 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11086 (OffloadEntriesInfoManager.empty() && 11087 !HasEmittedDeclareTargetRegion && 11088 !HasEmittedTargetRegion)) 11089 return nullptr; 11090 11091 // Create and register the function that handles the requires directives. 11092 ASTContext &C = CGM.getContext(); 11093 11094 llvm::Function *RequiresRegFn; 11095 { 11096 CodeGenFunction CGF(CGM); 11097 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11098 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11099 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11100 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11101 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11102 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11103 // TODO: check for other requires clauses. 11104 // The requires directive takes effect only when a target region is 11105 // present in the compilation unit. Otherwise it is ignored and not 11106 // passed to the runtime. This avoids the runtime from throwing an error 11107 // for mismatching requires clauses across compilation units that don't 11108 // contain at least 1 target region. 11109 assert((HasEmittedTargetRegion || 11110 HasEmittedDeclareTargetRegion || 11111 !OffloadEntriesInfoManager.empty()) && 11112 "Target or declare target region expected."); 11113 if (HasRequiresUnifiedSharedMemory) 11114 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11115 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11116 CGM.getModule(), OMPRTL___tgt_register_requires), 11117 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11118 CGF.FinishFunction(); 11119 } 11120 return RequiresRegFn; 11121 } 11122 11123 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11124 const OMPExecutableDirective &D, 11125 SourceLocation Loc, 11126 llvm::Function *OutlinedFn, 11127 ArrayRef<llvm::Value *> CapturedVars) { 11128 if (!CGF.HaveInsertPoint()) 11129 return; 11130 11131 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11132 CodeGenFunction::RunCleanupsScope Scope(CGF); 11133 11134 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11135 llvm::Value *Args[] = { 11136 RTLoc, 11137 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11138 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11139 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11140 RealArgs.append(std::begin(Args), std::end(Args)); 11141 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11142 11143 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11144 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11145 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11146 } 11147 11148 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11149 const Expr *NumTeams, 11150 const Expr *ThreadLimit, 11151 SourceLocation Loc) { 11152 if (!CGF.HaveInsertPoint()) 11153 return; 11154 11155 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11156 11157 llvm::Value *NumTeamsVal = 11158 NumTeams 11159 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11160 CGF.CGM.Int32Ty, /* isSigned = */ true) 11161 : CGF.Builder.getInt32(0); 11162 11163 llvm::Value *ThreadLimitVal = 11164 ThreadLimit 11165 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11166 CGF.CGM.Int32Ty, /* isSigned = */ true) 11167 : CGF.Builder.getInt32(0); 11168 11169 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11170 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11171 ThreadLimitVal}; 11172 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11173 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11174 PushNumTeamsArgs); 11175 } 11176 11177 void CGOpenMPRuntime::emitTargetDataCalls( 11178 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11179 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11180 if (!CGF.HaveInsertPoint()) 11181 return; 11182 11183 // Action used to replace the default codegen action and turn privatization 11184 // off. 11185 PrePostActionTy NoPrivAction; 11186 11187 // Generate the code for the opening of the data environment. Capture all the 11188 // arguments of the runtime call by reference because they are used in the 11189 // closing of the region. 11190 auto &&BeginThenGen = [this, &D, Device, &Info, 11191 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11192 // Fill up the arrays with all the mapped variables. 11193 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11194 11195 // Get map clause information. 11196 MappableExprsHandler MEHandler(D, CGF); 11197 MEHandler.generateAllInfo(CombinedInfo); 11198 11199 // Fill up the arrays and create the arguments. 11200 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11201 /*IsNonContiguous=*/true); 11202 11203 llvm::Value *BasePointersArrayArg = nullptr; 11204 llvm::Value *PointersArrayArg = nullptr; 11205 llvm::Value *SizesArrayArg = nullptr; 11206 llvm::Value *MapTypesArrayArg = nullptr; 11207 llvm::Value *MapNamesArrayArg = nullptr; 11208 llvm::Value *MappersArrayArg = nullptr; 11209 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11210 SizesArrayArg, MapTypesArrayArg, 11211 MapNamesArrayArg, MappersArrayArg, Info); 11212 11213 // Emit device ID if any. 11214 llvm::Value *DeviceID = nullptr; 11215 if (Device) { 11216 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11217 CGF.Int64Ty, /*isSigned=*/true); 11218 } else { 11219 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11220 } 11221 11222 // Emit the number of elements in the offloading arrays. 11223 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11224 // 11225 // Source location for the ident struct 11226 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11227 11228 llvm::Value *OffloadingArgs[] = {RTLoc, 11229 DeviceID, 11230 PointerNum, 11231 BasePointersArrayArg, 11232 PointersArrayArg, 11233 SizesArrayArg, 11234 MapTypesArrayArg, 11235 MapNamesArrayArg, 11236 MappersArrayArg}; 11237 CGF.EmitRuntimeCall( 11238 OMPBuilder.getOrCreateRuntimeFunction( 11239 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11240 OffloadingArgs); 11241 11242 // If device pointer privatization is required, emit the body of the region 11243 // here. It will have to be duplicated: with and without privatization. 11244 if (!Info.CaptureDeviceAddrMap.empty()) 11245 CodeGen(CGF); 11246 }; 11247 11248 // Generate code for the closing of the data region. 11249 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11250 PrePostActionTy &) { 11251 assert(Info.isValid() && "Invalid data environment closing arguments."); 11252 11253 llvm::Value *BasePointersArrayArg = nullptr; 11254 llvm::Value *PointersArrayArg = nullptr; 11255 llvm::Value *SizesArrayArg = nullptr; 11256 llvm::Value *MapTypesArrayArg = nullptr; 11257 llvm::Value *MapNamesArrayArg = nullptr; 11258 llvm::Value *MappersArrayArg = nullptr; 11259 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11260 SizesArrayArg, MapTypesArrayArg, 11261 MapNamesArrayArg, MappersArrayArg, Info, 11262 {/*ForEndCall=*/true}); 11263 11264 // Emit device ID if any. 11265 llvm::Value *DeviceID = nullptr; 11266 if (Device) { 11267 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11268 CGF.Int64Ty, /*isSigned=*/true); 11269 } else { 11270 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11271 } 11272 11273 // Emit the number of elements in the offloading arrays. 11274 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11275 11276 // Source location for the ident struct 11277 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11278 11279 llvm::Value *OffloadingArgs[] = {RTLoc, 11280 DeviceID, 11281 PointerNum, 11282 BasePointersArrayArg, 11283 PointersArrayArg, 11284 SizesArrayArg, 11285 MapTypesArrayArg, 11286 MapNamesArrayArg, 11287 MappersArrayArg}; 11288 CGF.EmitRuntimeCall( 11289 OMPBuilder.getOrCreateRuntimeFunction( 11290 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11291 OffloadingArgs); 11292 }; 11293 11294 // If we need device pointer privatization, we need to emit the body of the 11295 // region with no privatization in the 'else' branch of the conditional. 11296 // Otherwise, we don't have to do anything. 11297 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11298 PrePostActionTy &) { 11299 if (!Info.CaptureDeviceAddrMap.empty()) { 11300 CodeGen.setAction(NoPrivAction); 11301 CodeGen(CGF); 11302 } 11303 }; 11304 11305 // We don't have to do anything to close the region if the if clause evaluates 11306 // to false. 11307 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11308 11309 if (IfCond) { 11310 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11311 } else { 11312 RegionCodeGenTy RCG(BeginThenGen); 11313 RCG(CGF); 11314 } 11315 11316 // If we don't require privatization of device pointers, we emit the body in 11317 // between the runtime calls. This avoids duplicating the body code. 11318 if (Info.CaptureDeviceAddrMap.empty()) { 11319 CodeGen.setAction(NoPrivAction); 11320 CodeGen(CGF); 11321 } 11322 11323 if (IfCond) { 11324 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11325 } else { 11326 RegionCodeGenTy RCG(EndThenGen); 11327 RCG(CGF); 11328 } 11329 } 11330 11331 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11332 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11333 const Expr *Device) { 11334 if (!CGF.HaveInsertPoint()) 11335 return; 11336 11337 assert((isa<OMPTargetEnterDataDirective>(D) || 11338 isa<OMPTargetExitDataDirective>(D) || 11339 isa<OMPTargetUpdateDirective>(D)) && 11340 "Expecting either target enter, exit data, or update directives."); 11341 11342 CodeGenFunction::OMPTargetDataInfo InputInfo; 11343 llvm::Value *MapTypesArray = nullptr; 11344 llvm::Value *MapNamesArray = nullptr; 11345 // Generate the code for the opening of the data environment. 11346 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11347 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11348 // Emit device ID if any. 11349 llvm::Value *DeviceID = nullptr; 11350 if (Device) { 11351 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11352 CGF.Int64Ty, /*isSigned=*/true); 11353 } else { 11354 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11355 } 11356 11357 // Emit the number of elements in the offloading arrays. 11358 llvm::Constant *PointerNum = 11359 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11360 11361 // Source location for the ident struct 11362 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11363 11364 llvm::Value *OffloadingArgs[] = {RTLoc, 11365 DeviceID, 11366 PointerNum, 11367 InputInfo.BasePointersArray.getPointer(), 11368 InputInfo.PointersArray.getPointer(), 11369 InputInfo.SizesArray.getPointer(), 11370 MapTypesArray, 11371 MapNamesArray, 11372 InputInfo.MappersArray.getPointer()}; 11373 11374 // Select the right runtime function call for each standalone 11375 // directive. 11376 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11377 RuntimeFunction RTLFn; 11378 switch (D.getDirectiveKind()) { 11379 case OMPD_target_enter_data: 11380 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11381 : OMPRTL___tgt_target_data_begin_mapper; 11382 break; 11383 case OMPD_target_exit_data: 11384 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11385 : OMPRTL___tgt_target_data_end_mapper; 11386 break; 11387 case OMPD_target_update: 11388 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11389 : OMPRTL___tgt_target_data_update_mapper; 11390 break; 11391 case OMPD_parallel: 11392 case OMPD_for: 11393 case OMPD_parallel_for: 11394 case OMPD_parallel_master: 11395 case OMPD_parallel_sections: 11396 case OMPD_for_simd: 11397 case OMPD_parallel_for_simd: 11398 case OMPD_cancel: 11399 case OMPD_cancellation_point: 11400 case OMPD_ordered: 11401 case OMPD_threadprivate: 11402 case OMPD_allocate: 11403 case OMPD_task: 11404 case OMPD_simd: 11405 case OMPD_tile: 11406 case OMPD_unroll: 11407 case OMPD_sections: 11408 case OMPD_section: 11409 case OMPD_single: 11410 case OMPD_master: 11411 case OMPD_critical: 11412 case OMPD_taskyield: 11413 case OMPD_barrier: 11414 case OMPD_taskwait: 11415 case OMPD_taskgroup: 11416 case OMPD_atomic: 11417 case OMPD_flush: 11418 case OMPD_depobj: 11419 case OMPD_scan: 11420 case OMPD_teams: 11421 case OMPD_target_data: 11422 case OMPD_distribute: 11423 case OMPD_distribute_simd: 11424 case OMPD_distribute_parallel_for: 11425 case OMPD_distribute_parallel_for_simd: 11426 case OMPD_teams_distribute: 11427 case OMPD_teams_distribute_simd: 11428 case OMPD_teams_distribute_parallel_for: 11429 case OMPD_teams_distribute_parallel_for_simd: 11430 case OMPD_declare_simd: 11431 case OMPD_declare_variant: 11432 case OMPD_begin_declare_variant: 11433 case OMPD_end_declare_variant: 11434 case OMPD_declare_target: 11435 case OMPD_end_declare_target: 11436 case OMPD_declare_reduction: 11437 case OMPD_declare_mapper: 11438 case OMPD_taskloop: 11439 case OMPD_taskloop_simd: 11440 case OMPD_master_taskloop: 11441 case OMPD_master_taskloop_simd: 11442 case OMPD_parallel_master_taskloop: 11443 case OMPD_parallel_master_taskloop_simd: 11444 case OMPD_target: 11445 case OMPD_target_simd: 11446 case OMPD_target_teams_distribute: 11447 case OMPD_target_teams_distribute_simd: 11448 case OMPD_target_teams_distribute_parallel_for: 11449 case OMPD_target_teams_distribute_parallel_for_simd: 11450 case OMPD_target_teams: 11451 case OMPD_target_parallel: 11452 case OMPD_target_parallel_for: 11453 case OMPD_target_parallel_for_simd: 11454 case OMPD_requires: 11455 case OMPD_metadirective: 11456 case OMPD_unknown: 11457 default: 11458 llvm_unreachable("Unexpected standalone target data directive."); 11459 break; 11460 } 11461 CGF.EmitRuntimeCall( 11462 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11463 OffloadingArgs); 11464 }; 11465 11466 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11467 &MapNamesArray](CodeGenFunction &CGF, 11468 PrePostActionTy &) { 11469 // Fill up the arrays with all the mapped variables. 11470 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11471 11472 // Get map clause information. 11473 MappableExprsHandler MEHandler(D, CGF); 11474 MEHandler.generateAllInfo(CombinedInfo); 11475 11476 TargetDataInfo Info; 11477 // Fill up the arrays and create the arguments. 11478 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11479 /*IsNonContiguous=*/true); 11480 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11481 D.hasClausesOfKind<OMPNowaitClause>(); 11482 emitOffloadingArraysArgument( 11483 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11484 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11485 {/*ForEndCall=*/false}); 11486 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11487 InputInfo.BasePointersArray = 11488 Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign()); 11489 InputInfo.PointersArray = 11490 Address::deprecated(Info.PointersArray, CGM.getPointerAlign()); 11491 InputInfo.SizesArray = 11492 Address::deprecated(Info.SizesArray, CGM.getPointerAlign()); 11493 InputInfo.MappersArray = 11494 Address::deprecated(Info.MappersArray, CGM.getPointerAlign()); 11495 MapTypesArray = Info.MapTypesArray; 11496 MapNamesArray = Info.MapNamesArray; 11497 if (RequiresOuterTask) 11498 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11499 else 11500 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11501 }; 11502 11503 if (IfCond) { 11504 emitIfClause(CGF, IfCond, TargetThenGen, 11505 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11506 } else { 11507 RegionCodeGenTy ThenRCG(TargetThenGen); 11508 ThenRCG(CGF); 11509 } 11510 } 11511 11512 namespace { 11513 /// Kind of parameter in a function with 'declare simd' directive. 11514 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11515 /// Attribute set of the parameter. 11516 struct ParamAttrTy { 11517 ParamKindTy Kind = Vector; 11518 llvm::APSInt StrideOrArg; 11519 llvm::APSInt Alignment; 11520 }; 11521 } // namespace 11522 11523 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11524 ArrayRef<ParamAttrTy> ParamAttrs) { 11525 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11526 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11527 // of that clause. The VLEN value must be power of 2. 11528 // In other case the notion of the function`s "characteristic data type" (CDT) 11529 // is used to compute the vector length. 11530 // CDT is defined in the following order: 11531 // a) For non-void function, the CDT is the return type. 11532 // b) If the function has any non-uniform, non-linear parameters, then the 11533 // CDT is the type of the first such parameter. 11534 // c) If the CDT determined by a) or b) above is struct, union, or class 11535 // type which is pass-by-value (except for the type that maps to the 11536 // built-in complex data type), the characteristic data type is int. 11537 // d) If none of the above three cases is applicable, the CDT is int. 11538 // The VLEN is then determined based on the CDT and the size of vector 11539 // register of that ISA for which current vector version is generated. The 11540 // VLEN is computed using the formula below: 11541 // VLEN = sizeof(vector_register) / sizeof(CDT), 11542 // where vector register size specified in section 3.2.1 Registers and the 11543 // Stack Frame of original AMD64 ABI document. 11544 QualType RetType = FD->getReturnType(); 11545 if (RetType.isNull()) 11546 return 0; 11547 ASTContext &C = FD->getASTContext(); 11548 QualType CDT; 11549 if (!RetType.isNull() && !RetType->isVoidType()) { 11550 CDT = RetType; 11551 } else { 11552 unsigned Offset = 0; 11553 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11554 if (ParamAttrs[Offset].Kind == Vector) 11555 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11556 ++Offset; 11557 } 11558 if (CDT.isNull()) { 11559 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11560 if (ParamAttrs[I + Offset].Kind == Vector) { 11561 CDT = FD->getParamDecl(I)->getType(); 11562 break; 11563 } 11564 } 11565 } 11566 } 11567 if (CDT.isNull()) 11568 CDT = C.IntTy; 11569 CDT = CDT->getCanonicalTypeUnqualified(); 11570 if (CDT->isRecordType() || CDT->isUnionType()) 11571 CDT = C.IntTy; 11572 return C.getTypeSize(CDT); 11573 } 11574 11575 static void 11576 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11577 const llvm::APSInt &VLENVal, 11578 ArrayRef<ParamAttrTy> ParamAttrs, 11579 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11580 struct ISADataTy { 11581 char ISA; 11582 unsigned VecRegSize; 11583 }; 11584 ISADataTy ISAData[] = { 11585 { 11586 'b', 128 11587 }, // SSE 11588 { 11589 'c', 256 11590 }, // AVX 11591 { 11592 'd', 256 11593 }, // AVX2 11594 { 11595 'e', 512 11596 }, // AVX512 11597 }; 11598 llvm::SmallVector<char, 2> Masked; 11599 switch (State) { 11600 case OMPDeclareSimdDeclAttr::BS_Undefined: 11601 Masked.push_back('N'); 11602 Masked.push_back('M'); 11603 break; 11604 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11605 Masked.push_back('N'); 11606 break; 11607 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11608 Masked.push_back('M'); 11609 break; 11610 } 11611 for (char Mask : Masked) { 11612 for (const ISADataTy &Data : ISAData) { 11613 SmallString<256> Buffer; 11614 llvm::raw_svector_ostream Out(Buffer); 11615 Out << "_ZGV" << Data.ISA << Mask; 11616 if (!VLENVal) { 11617 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11618 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11619 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11620 } else { 11621 Out << VLENVal; 11622 } 11623 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11624 switch (ParamAttr.Kind){ 11625 case LinearWithVarStride: 11626 Out << 's' << ParamAttr.StrideOrArg; 11627 break; 11628 case Linear: 11629 Out << 'l'; 11630 if (ParamAttr.StrideOrArg != 1) 11631 Out << ParamAttr.StrideOrArg; 11632 break; 11633 case Uniform: 11634 Out << 'u'; 11635 break; 11636 case Vector: 11637 Out << 'v'; 11638 break; 11639 } 11640 if (!!ParamAttr.Alignment) 11641 Out << 'a' << ParamAttr.Alignment; 11642 } 11643 Out << '_' << Fn->getName(); 11644 Fn->addFnAttr(Out.str()); 11645 } 11646 } 11647 } 11648 11649 // This are the Functions that are needed to mangle the name of the 11650 // vector functions generated by the compiler, according to the rules 11651 // defined in the "Vector Function ABI specifications for AArch64", 11652 // available at 11653 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11654 11655 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11656 /// 11657 /// TODO: Need to implement the behavior for reference marked with a 11658 /// var or no linear modifiers (1.b in the section). For this, we 11659 /// need to extend ParamKindTy to support the linear modifiers. 11660 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11661 QT = QT.getCanonicalType(); 11662 11663 if (QT->isVoidType()) 11664 return false; 11665 11666 if (Kind == ParamKindTy::Uniform) 11667 return false; 11668 11669 if (Kind == ParamKindTy::Linear) 11670 return false; 11671 11672 // TODO: Handle linear references with modifiers 11673 11674 if (Kind == ParamKindTy::LinearWithVarStride) 11675 return false; 11676 11677 return true; 11678 } 11679 11680 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11681 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11682 QT = QT.getCanonicalType(); 11683 unsigned Size = C.getTypeSize(QT); 11684 11685 // Only scalars and complex within 16 bytes wide set PVB to true. 11686 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11687 return false; 11688 11689 if (QT->isFloatingType()) 11690 return true; 11691 11692 if (QT->isIntegerType()) 11693 return true; 11694 11695 if (QT->isPointerType()) 11696 return true; 11697 11698 // TODO: Add support for complex types (section 3.1.2, item 2). 11699 11700 return false; 11701 } 11702 11703 /// Computes the lane size (LS) of a return type or of an input parameter, 11704 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11705 /// TODO: Add support for references, section 3.2.1, item 1. 11706 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11707 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11708 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11709 if (getAArch64PBV(PTy, C)) 11710 return C.getTypeSize(PTy); 11711 } 11712 if (getAArch64PBV(QT, C)) 11713 return C.getTypeSize(QT); 11714 11715 return C.getTypeSize(C.getUIntPtrType()); 11716 } 11717 11718 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11719 // signature of the scalar function, as defined in 3.2.2 of the 11720 // AAVFABI. 11721 static std::tuple<unsigned, unsigned, bool> 11722 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11723 QualType RetType = FD->getReturnType().getCanonicalType(); 11724 11725 ASTContext &C = FD->getASTContext(); 11726 11727 bool OutputBecomesInput = false; 11728 11729 llvm::SmallVector<unsigned, 8> Sizes; 11730 if (!RetType->isVoidType()) { 11731 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11732 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11733 OutputBecomesInput = true; 11734 } 11735 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11736 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11737 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11738 } 11739 11740 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11741 // The LS of a function parameter / return value can only be a power 11742 // of 2, starting from 8 bits, up to 128. 11743 assert(llvm::all_of(Sizes, 11744 [](unsigned Size) { 11745 return Size == 8 || Size == 16 || Size == 32 || 11746 Size == 64 || Size == 128; 11747 }) && 11748 "Invalid size"); 11749 11750 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11751 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11752 OutputBecomesInput); 11753 } 11754 11755 /// Mangle the parameter part of the vector function name according to 11756 /// their OpenMP classification. The mangling function is defined in 11757 /// section 3.5 of the AAVFABI. 11758 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11759 SmallString<256> Buffer; 11760 llvm::raw_svector_ostream Out(Buffer); 11761 for (const auto &ParamAttr : ParamAttrs) { 11762 switch (ParamAttr.Kind) { 11763 case LinearWithVarStride: 11764 Out << "ls" << ParamAttr.StrideOrArg; 11765 break; 11766 case Linear: 11767 Out << 'l'; 11768 // Don't print the step value if it is not present or if it is 11769 // equal to 1. 11770 if (ParamAttr.StrideOrArg != 1) 11771 Out << ParamAttr.StrideOrArg; 11772 break; 11773 case Uniform: 11774 Out << 'u'; 11775 break; 11776 case Vector: 11777 Out << 'v'; 11778 break; 11779 } 11780 11781 if (!!ParamAttr.Alignment) 11782 Out << 'a' << ParamAttr.Alignment; 11783 } 11784 11785 return std::string(Out.str()); 11786 } 11787 11788 // Function used to add the attribute. The parameter `VLEN` is 11789 // templated to allow the use of "x" when targeting scalable functions 11790 // for SVE. 11791 template <typename T> 11792 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11793 char ISA, StringRef ParSeq, 11794 StringRef MangledName, bool OutputBecomesInput, 11795 llvm::Function *Fn) { 11796 SmallString<256> Buffer; 11797 llvm::raw_svector_ostream Out(Buffer); 11798 Out << Prefix << ISA << LMask << VLEN; 11799 if (OutputBecomesInput) 11800 Out << "v"; 11801 Out << ParSeq << "_" << MangledName; 11802 Fn->addFnAttr(Out.str()); 11803 } 11804 11805 // Helper function to generate the Advanced SIMD names depending on 11806 // the value of the NDS when simdlen is not present. 11807 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11808 StringRef Prefix, char ISA, 11809 StringRef ParSeq, StringRef MangledName, 11810 bool OutputBecomesInput, 11811 llvm::Function *Fn) { 11812 switch (NDS) { 11813 case 8: 11814 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11815 OutputBecomesInput, Fn); 11816 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11817 OutputBecomesInput, Fn); 11818 break; 11819 case 16: 11820 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11821 OutputBecomesInput, Fn); 11822 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11823 OutputBecomesInput, Fn); 11824 break; 11825 case 32: 11826 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11827 OutputBecomesInput, Fn); 11828 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11829 OutputBecomesInput, Fn); 11830 break; 11831 case 64: 11832 case 128: 11833 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11834 OutputBecomesInput, Fn); 11835 break; 11836 default: 11837 llvm_unreachable("Scalar type is too wide."); 11838 } 11839 } 11840 11841 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11842 static void emitAArch64DeclareSimdFunction( 11843 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11844 ArrayRef<ParamAttrTy> ParamAttrs, 11845 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11846 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11847 11848 // Get basic data for building the vector signature. 11849 const auto Data = getNDSWDS(FD, ParamAttrs); 11850 const unsigned NDS = std::get<0>(Data); 11851 const unsigned WDS = std::get<1>(Data); 11852 const bool OutputBecomesInput = std::get<2>(Data); 11853 11854 // Check the values provided via `simdlen` by the user. 11855 // 1. A `simdlen(1)` doesn't produce vector signatures, 11856 if (UserVLEN == 1) { 11857 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11858 DiagnosticsEngine::Warning, 11859 "The clause simdlen(1) has no effect when targeting aarch64."); 11860 CGM.getDiags().Report(SLoc, DiagID); 11861 return; 11862 } 11863 11864 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11865 // Advanced SIMD output. 11866 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11867 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11868 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11869 "power of 2 when targeting Advanced SIMD."); 11870 CGM.getDiags().Report(SLoc, DiagID); 11871 return; 11872 } 11873 11874 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11875 // limits. 11876 if (ISA == 's' && UserVLEN != 0) { 11877 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11878 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11879 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11880 "lanes in the architectural constraints " 11881 "for SVE (min is 128-bit, max is " 11882 "2048-bit, by steps of 128-bit)"); 11883 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11884 return; 11885 } 11886 } 11887 11888 // Sort out parameter sequence. 11889 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11890 StringRef Prefix = "_ZGV"; 11891 // Generate simdlen from user input (if any). 11892 if (UserVLEN) { 11893 if (ISA == 's') { 11894 // SVE generates only a masked function. 11895 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11896 OutputBecomesInput, Fn); 11897 } else { 11898 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11899 // Advanced SIMD generates one or two functions, depending on 11900 // the `[not]inbranch` clause. 11901 switch (State) { 11902 case OMPDeclareSimdDeclAttr::BS_Undefined: 11903 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11904 OutputBecomesInput, Fn); 11905 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11906 OutputBecomesInput, Fn); 11907 break; 11908 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11909 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11910 OutputBecomesInput, Fn); 11911 break; 11912 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11913 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11914 OutputBecomesInput, Fn); 11915 break; 11916 } 11917 } 11918 } else { 11919 // If no user simdlen is provided, follow the AAVFABI rules for 11920 // generating the vector length. 11921 if (ISA == 's') { 11922 // SVE, section 3.4.1, item 1. 11923 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11924 OutputBecomesInput, Fn); 11925 } else { 11926 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11927 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11928 // two vector names depending on the use of the clause 11929 // `[not]inbranch`. 11930 switch (State) { 11931 case OMPDeclareSimdDeclAttr::BS_Undefined: 11932 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11933 OutputBecomesInput, Fn); 11934 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11935 OutputBecomesInput, Fn); 11936 break; 11937 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11938 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11939 OutputBecomesInput, Fn); 11940 break; 11941 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11942 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11943 OutputBecomesInput, Fn); 11944 break; 11945 } 11946 } 11947 } 11948 } 11949 11950 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11951 llvm::Function *Fn) { 11952 ASTContext &C = CGM.getContext(); 11953 FD = FD->getMostRecentDecl(); 11954 // Map params to their positions in function decl. 11955 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11956 if (isa<CXXMethodDecl>(FD)) 11957 ParamPositions.try_emplace(FD, 0); 11958 unsigned ParamPos = ParamPositions.size(); 11959 for (const ParmVarDecl *P : FD->parameters()) { 11960 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11961 ++ParamPos; 11962 } 11963 while (FD) { 11964 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11965 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11966 // Mark uniform parameters. 11967 for (const Expr *E : Attr->uniforms()) { 11968 E = E->IgnoreParenImpCasts(); 11969 unsigned Pos; 11970 if (isa<CXXThisExpr>(E)) { 11971 Pos = ParamPositions[FD]; 11972 } else { 11973 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11974 ->getCanonicalDecl(); 11975 Pos = ParamPositions[PVD]; 11976 } 11977 ParamAttrs[Pos].Kind = Uniform; 11978 } 11979 // Get alignment info. 11980 auto NI = Attr->alignments_begin(); 11981 for (const Expr *E : Attr->aligneds()) { 11982 E = E->IgnoreParenImpCasts(); 11983 unsigned Pos; 11984 QualType ParmTy; 11985 if (isa<CXXThisExpr>(E)) { 11986 Pos = ParamPositions[FD]; 11987 ParmTy = E->getType(); 11988 } else { 11989 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11990 ->getCanonicalDecl(); 11991 Pos = ParamPositions[PVD]; 11992 ParmTy = PVD->getType(); 11993 } 11994 ParamAttrs[Pos].Alignment = 11995 (*NI) 11996 ? (*NI)->EvaluateKnownConstInt(C) 11997 : llvm::APSInt::getUnsigned( 11998 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11999 .getQuantity()); 12000 ++NI; 12001 } 12002 // Mark linear parameters. 12003 auto SI = Attr->steps_begin(); 12004 auto MI = Attr->modifiers_begin(); 12005 for (const Expr *E : Attr->linears()) { 12006 E = E->IgnoreParenImpCasts(); 12007 unsigned Pos; 12008 // Rescaling factor needed to compute the linear parameter 12009 // value in the mangled name. 12010 unsigned PtrRescalingFactor = 1; 12011 if (isa<CXXThisExpr>(E)) { 12012 Pos = ParamPositions[FD]; 12013 } else { 12014 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12015 ->getCanonicalDecl(); 12016 Pos = ParamPositions[PVD]; 12017 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 12018 PtrRescalingFactor = CGM.getContext() 12019 .getTypeSizeInChars(P->getPointeeType()) 12020 .getQuantity(); 12021 } 12022 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12023 ParamAttr.Kind = Linear; 12024 // Assuming a stride of 1, for `linear` without modifiers. 12025 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12026 if (*SI) { 12027 Expr::EvalResult Result; 12028 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12029 if (const auto *DRE = 12030 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12031 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 12032 ParamAttr.Kind = LinearWithVarStride; 12033 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 12034 ParamPositions[StridePVD->getCanonicalDecl()]); 12035 } 12036 } 12037 } else { 12038 ParamAttr.StrideOrArg = Result.Val.getInt(); 12039 } 12040 } 12041 // If we are using a linear clause on a pointer, we need to 12042 // rescale the value of linear_step with the byte size of the 12043 // pointee type. 12044 if (Linear == ParamAttr.Kind) 12045 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12046 ++SI; 12047 ++MI; 12048 } 12049 llvm::APSInt VLENVal; 12050 SourceLocation ExprLoc; 12051 const Expr *VLENExpr = Attr->getSimdlen(); 12052 if (VLENExpr) { 12053 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12054 ExprLoc = VLENExpr->getExprLoc(); 12055 } 12056 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12057 if (CGM.getTriple().isX86()) { 12058 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12059 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12060 unsigned VLEN = VLENVal.getExtValue(); 12061 StringRef MangledName = Fn->getName(); 12062 if (CGM.getTarget().hasFeature("sve")) 12063 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12064 MangledName, 's', 128, Fn, ExprLoc); 12065 if (CGM.getTarget().hasFeature("neon")) 12066 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12067 MangledName, 'n', 128, Fn, ExprLoc); 12068 } 12069 } 12070 FD = FD->getPreviousDecl(); 12071 } 12072 } 12073 12074 namespace { 12075 /// Cleanup action for doacross support. 12076 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12077 public: 12078 static const int DoacrossFinArgs = 2; 12079 12080 private: 12081 llvm::FunctionCallee RTLFn; 12082 llvm::Value *Args[DoacrossFinArgs]; 12083 12084 public: 12085 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12086 ArrayRef<llvm::Value *> CallArgs) 12087 : RTLFn(RTLFn) { 12088 assert(CallArgs.size() == DoacrossFinArgs); 12089 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12090 } 12091 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12092 if (!CGF.HaveInsertPoint()) 12093 return; 12094 CGF.EmitRuntimeCall(RTLFn, Args); 12095 } 12096 }; 12097 } // namespace 12098 12099 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12100 const OMPLoopDirective &D, 12101 ArrayRef<Expr *> NumIterations) { 12102 if (!CGF.HaveInsertPoint()) 12103 return; 12104 12105 ASTContext &C = CGM.getContext(); 12106 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12107 RecordDecl *RD; 12108 if (KmpDimTy.isNull()) { 12109 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12110 // kmp_int64 lo; // lower 12111 // kmp_int64 up; // upper 12112 // kmp_int64 st; // stride 12113 // }; 12114 RD = C.buildImplicitRecord("kmp_dim"); 12115 RD->startDefinition(); 12116 addFieldToRecordDecl(C, RD, Int64Ty); 12117 addFieldToRecordDecl(C, RD, Int64Ty); 12118 addFieldToRecordDecl(C, RD, Int64Ty); 12119 RD->completeDefinition(); 12120 KmpDimTy = C.getRecordType(RD); 12121 } else { 12122 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12123 } 12124 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12125 QualType ArrayTy = 12126 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12127 12128 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12129 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12130 enum { LowerFD = 0, UpperFD, StrideFD }; 12131 // Fill dims with data. 12132 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12133 LValue DimsLVal = CGF.MakeAddrLValue( 12134 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12135 // dims.upper = num_iterations; 12136 LValue UpperLVal = CGF.EmitLValueForField( 12137 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12138 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12139 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12140 Int64Ty, NumIterations[I]->getExprLoc()); 12141 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12142 // dims.stride = 1; 12143 LValue StrideLVal = CGF.EmitLValueForField( 12144 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12145 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12146 StrideLVal); 12147 } 12148 12149 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12150 // kmp_int32 num_dims, struct kmp_dim * dims); 12151 llvm::Value *Args[] = { 12152 emitUpdateLocation(CGF, D.getBeginLoc()), 12153 getThreadID(CGF, D.getBeginLoc()), 12154 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12155 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12156 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12157 CGM.VoidPtrTy)}; 12158 12159 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12160 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12161 CGF.EmitRuntimeCall(RTLFn, Args); 12162 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12163 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12164 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12165 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12166 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12167 llvm::makeArrayRef(FiniArgs)); 12168 } 12169 12170 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12171 const OMPDependClause *C) { 12172 QualType Int64Ty = 12173 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12174 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12175 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12176 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12177 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12178 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12179 const Expr *CounterVal = C->getLoopData(I); 12180 assert(CounterVal); 12181 llvm::Value *CntVal = CGF.EmitScalarConversion( 12182 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12183 CounterVal->getExprLoc()); 12184 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12185 /*Volatile=*/false, Int64Ty); 12186 } 12187 llvm::Value *Args[] = { 12188 emitUpdateLocation(CGF, C->getBeginLoc()), 12189 getThreadID(CGF, C->getBeginLoc()), 12190 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12191 llvm::FunctionCallee RTLFn; 12192 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12193 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12194 OMPRTL___kmpc_doacross_post); 12195 } else { 12196 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12197 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12198 OMPRTL___kmpc_doacross_wait); 12199 } 12200 CGF.EmitRuntimeCall(RTLFn, Args); 12201 } 12202 12203 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12204 llvm::FunctionCallee Callee, 12205 ArrayRef<llvm::Value *> Args) const { 12206 assert(Loc.isValid() && "Outlined function call location must be valid."); 12207 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12208 12209 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12210 if (Fn->doesNotThrow()) { 12211 CGF.EmitNounwindRuntimeCall(Fn, Args); 12212 return; 12213 } 12214 } 12215 CGF.EmitRuntimeCall(Callee, Args); 12216 } 12217 12218 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12219 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12220 ArrayRef<llvm::Value *> Args) const { 12221 emitCall(CGF, Loc, OutlinedFn, Args); 12222 } 12223 12224 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12225 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12226 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12227 HasEmittedDeclareTargetRegion = true; 12228 } 12229 12230 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12231 const VarDecl *NativeParam, 12232 const VarDecl *TargetParam) const { 12233 return CGF.GetAddrOfLocalVar(NativeParam); 12234 } 12235 12236 /// Return allocator value from expression, or return a null allocator (default 12237 /// when no allocator specified). 12238 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 12239 const Expr *Allocator) { 12240 llvm::Value *AllocVal; 12241 if (Allocator) { 12242 AllocVal = CGF.EmitScalarExpr(Allocator); 12243 // According to the standard, the original allocator type is a enum 12244 // (integer). Convert to pointer type, if required. 12245 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12246 CGF.getContext().VoidPtrTy, 12247 Allocator->getExprLoc()); 12248 } else { 12249 // If no allocator specified, it defaults to the null allocator. 12250 AllocVal = llvm::Constant::getNullValue( 12251 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 12252 } 12253 return AllocVal; 12254 } 12255 12256 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12257 const VarDecl *VD) { 12258 if (!VD) 12259 return Address::invalid(); 12260 Address UntiedAddr = Address::invalid(); 12261 Address UntiedRealAddr = Address::invalid(); 12262 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12263 if (It != FunctionToUntiedTaskStackMap.end()) { 12264 const UntiedLocalVarsAddressesMap &UntiedData = 12265 UntiedLocalVarsStack[It->second]; 12266 auto I = UntiedData.find(VD); 12267 if (I != UntiedData.end()) { 12268 UntiedAddr = I->second.first; 12269 UntiedRealAddr = I->second.second; 12270 } 12271 } 12272 const VarDecl *CVD = VD->getCanonicalDecl(); 12273 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12274 // Use the default allocation. 12275 if (!isAllocatableDecl(VD)) 12276 return UntiedAddr; 12277 llvm::Value *Size; 12278 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12279 if (CVD->getType()->isVariablyModifiedType()) { 12280 Size = CGF.getTypeSize(CVD->getType()); 12281 // Align the size: ((size + align - 1) / align) * align 12282 Size = CGF.Builder.CreateNUWAdd( 12283 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12284 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12285 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12286 } else { 12287 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12288 Size = CGM.getSize(Sz.alignTo(Align)); 12289 } 12290 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12291 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12292 const Expr *Allocator = AA->getAllocator(); 12293 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 12294 llvm::Value *Alignment = 12295 AA->getAlignment() 12296 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()), 12297 CGM.SizeTy, /*isSigned=*/false) 12298 : nullptr; 12299 SmallVector<llvm::Value *, 4> Args; 12300 Args.push_back(ThreadID); 12301 if (Alignment) 12302 Args.push_back(Alignment); 12303 Args.push_back(Size); 12304 Args.push_back(AllocVal); 12305 llvm::omp::RuntimeFunction FnID = 12306 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 12307 llvm::Value *Addr = CGF.EmitRuntimeCall( 12308 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 12309 getName({CVD->getName(), ".void.addr"})); 12310 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12311 CGM.getModule(), OMPRTL___kmpc_free); 12312 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12313 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12314 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12315 if (UntiedAddr.isValid()) 12316 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12317 12318 // Cleanup action for allocate support. 12319 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12320 llvm::FunctionCallee RTLFn; 12321 SourceLocation::UIntTy LocEncoding; 12322 Address Addr; 12323 const Expr *AllocExpr; 12324 12325 public: 12326 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12327 SourceLocation::UIntTy LocEncoding, Address Addr, 12328 const Expr *AllocExpr) 12329 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12330 AllocExpr(AllocExpr) {} 12331 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12332 if (!CGF.HaveInsertPoint()) 12333 return; 12334 llvm::Value *Args[3]; 12335 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12336 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12337 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12338 Addr.getPointer(), CGF.VoidPtrTy); 12339 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 12340 Args[2] = AllocVal; 12341 CGF.EmitRuntimeCall(RTLFn, Args); 12342 } 12343 }; 12344 Address VDAddr = UntiedRealAddr.isValid() 12345 ? UntiedRealAddr 12346 : Address::deprecated(Addr, Align); 12347 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12348 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12349 VDAddr, Allocator); 12350 if (UntiedRealAddr.isValid()) 12351 if (auto *Region = 12352 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12353 Region->emitUntiedSwitch(CGF); 12354 return VDAddr; 12355 } 12356 return UntiedAddr; 12357 } 12358 12359 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12360 const VarDecl *VD) const { 12361 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12362 if (It == FunctionToUntiedTaskStackMap.end()) 12363 return false; 12364 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12365 } 12366 12367 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12368 CodeGenModule &CGM, const OMPLoopDirective &S) 12369 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12370 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12371 if (!NeedToPush) 12372 return; 12373 NontemporalDeclsSet &DS = 12374 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12375 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12376 for (const Stmt *Ref : C->private_refs()) { 12377 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12378 const ValueDecl *VD; 12379 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12380 VD = DRE->getDecl(); 12381 } else { 12382 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12383 assert((ME->isImplicitCXXThis() || 12384 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12385 "Expected member of current class."); 12386 VD = ME->getMemberDecl(); 12387 } 12388 DS.insert(VD); 12389 } 12390 } 12391 } 12392 12393 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12394 if (!NeedToPush) 12395 return; 12396 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12397 } 12398 12399 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12400 CodeGenFunction &CGF, 12401 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12402 std::pair<Address, Address>> &LocalVars) 12403 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12404 if (!NeedToPush) 12405 return; 12406 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12407 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12408 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12409 } 12410 12411 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12412 if (!NeedToPush) 12413 return; 12414 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12415 } 12416 12417 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12418 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12419 12420 return llvm::any_of( 12421 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12422 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12423 } 12424 12425 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12426 const OMPExecutableDirective &S, 12427 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12428 const { 12429 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12430 // Vars in target/task regions must be excluded completely. 12431 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12432 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12433 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12434 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12435 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12436 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12437 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12438 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12439 } 12440 } 12441 // Exclude vars in private clauses. 12442 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12443 for (const Expr *Ref : C->varlists()) { 12444 if (!Ref->getType()->isScalarType()) 12445 continue; 12446 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12447 if (!DRE) 12448 continue; 12449 NeedToCheckForLPCs.insert(DRE->getDecl()); 12450 } 12451 } 12452 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12453 for (const Expr *Ref : C->varlists()) { 12454 if (!Ref->getType()->isScalarType()) 12455 continue; 12456 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12457 if (!DRE) 12458 continue; 12459 NeedToCheckForLPCs.insert(DRE->getDecl()); 12460 } 12461 } 12462 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12463 for (const Expr *Ref : C->varlists()) { 12464 if (!Ref->getType()->isScalarType()) 12465 continue; 12466 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12467 if (!DRE) 12468 continue; 12469 NeedToCheckForLPCs.insert(DRE->getDecl()); 12470 } 12471 } 12472 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12473 for (const Expr *Ref : C->varlists()) { 12474 if (!Ref->getType()->isScalarType()) 12475 continue; 12476 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12477 if (!DRE) 12478 continue; 12479 NeedToCheckForLPCs.insert(DRE->getDecl()); 12480 } 12481 } 12482 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12483 for (const Expr *Ref : C->varlists()) { 12484 if (!Ref->getType()->isScalarType()) 12485 continue; 12486 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12487 if (!DRE) 12488 continue; 12489 NeedToCheckForLPCs.insert(DRE->getDecl()); 12490 } 12491 } 12492 for (const Decl *VD : NeedToCheckForLPCs) { 12493 for (const LastprivateConditionalData &Data : 12494 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12495 if (Data.DeclToUniqueName.count(VD) > 0) { 12496 if (!Data.Disabled) 12497 NeedToAddForLPCsAsDisabled.insert(VD); 12498 break; 12499 } 12500 } 12501 } 12502 } 12503 12504 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12505 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12506 : CGM(CGF.CGM), 12507 Action((CGM.getLangOpts().OpenMP >= 50 && 12508 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12509 [](const OMPLastprivateClause *C) { 12510 return C->getKind() == 12511 OMPC_LASTPRIVATE_conditional; 12512 })) 12513 ? ActionToDo::PushAsLastprivateConditional 12514 : ActionToDo::DoNotPush) { 12515 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12516 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12517 return; 12518 assert(Action == ActionToDo::PushAsLastprivateConditional && 12519 "Expected a push action."); 12520 LastprivateConditionalData &Data = 12521 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12522 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12523 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12524 continue; 12525 12526 for (const Expr *Ref : C->varlists()) { 12527 Data.DeclToUniqueName.insert(std::make_pair( 12528 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12529 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12530 } 12531 } 12532 Data.IVLVal = IVLVal; 12533 Data.Fn = CGF.CurFn; 12534 } 12535 12536 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12537 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12538 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12539 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12540 if (CGM.getLangOpts().OpenMP < 50) 12541 return; 12542 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12543 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12544 if (!NeedToAddForLPCsAsDisabled.empty()) { 12545 Action = ActionToDo::DisableLastprivateConditional; 12546 LastprivateConditionalData &Data = 12547 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12548 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12549 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12550 Data.Fn = CGF.CurFn; 12551 Data.Disabled = true; 12552 } 12553 } 12554 12555 CGOpenMPRuntime::LastprivateConditionalRAII 12556 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12557 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12558 return LastprivateConditionalRAII(CGF, S); 12559 } 12560 12561 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12562 if (CGM.getLangOpts().OpenMP < 50) 12563 return; 12564 if (Action == ActionToDo::DisableLastprivateConditional) { 12565 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12566 "Expected list of disabled private vars."); 12567 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12568 } 12569 if (Action == ActionToDo::PushAsLastprivateConditional) { 12570 assert( 12571 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12572 "Expected list of lastprivate conditional vars."); 12573 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12574 } 12575 } 12576 12577 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12578 const VarDecl *VD) { 12579 ASTContext &C = CGM.getContext(); 12580 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12581 if (I == LastprivateConditionalToTypes.end()) 12582 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12583 QualType NewType; 12584 const FieldDecl *VDField; 12585 const FieldDecl *FiredField; 12586 LValue BaseLVal; 12587 auto VI = I->getSecond().find(VD); 12588 if (VI == I->getSecond().end()) { 12589 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12590 RD->startDefinition(); 12591 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12592 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12593 RD->completeDefinition(); 12594 NewType = C.getRecordType(RD); 12595 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12596 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12597 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12598 } else { 12599 NewType = std::get<0>(VI->getSecond()); 12600 VDField = std::get<1>(VI->getSecond()); 12601 FiredField = std::get<2>(VI->getSecond()); 12602 BaseLVal = std::get<3>(VI->getSecond()); 12603 } 12604 LValue FiredLVal = 12605 CGF.EmitLValueForField(BaseLVal, FiredField); 12606 CGF.EmitStoreOfScalar( 12607 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12608 FiredLVal); 12609 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12610 } 12611 12612 namespace { 12613 /// Checks if the lastprivate conditional variable is referenced in LHS. 12614 class LastprivateConditionalRefChecker final 12615 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12616 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12617 const Expr *FoundE = nullptr; 12618 const Decl *FoundD = nullptr; 12619 StringRef UniqueDeclName; 12620 LValue IVLVal; 12621 llvm::Function *FoundFn = nullptr; 12622 SourceLocation Loc; 12623 12624 public: 12625 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12626 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12627 llvm::reverse(LPM)) { 12628 auto It = D.DeclToUniqueName.find(E->getDecl()); 12629 if (It == D.DeclToUniqueName.end()) 12630 continue; 12631 if (D.Disabled) 12632 return false; 12633 FoundE = E; 12634 FoundD = E->getDecl()->getCanonicalDecl(); 12635 UniqueDeclName = It->second; 12636 IVLVal = D.IVLVal; 12637 FoundFn = D.Fn; 12638 break; 12639 } 12640 return FoundE == E; 12641 } 12642 bool VisitMemberExpr(const MemberExpr *E) { 12643 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12644 return false; 12645 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12646 llvm::reverse(LPM)) { 12647 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12648 if (It == D.DeclToUniqueName.end()) 12649 continue; 12650 if (D.Disabled) 12651 return false; 12652 FoundE = E; 12653 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12654 UniqueDeclName = It->second; 12655 IVLVal = D.IVLVal; 12656 FoundFn = D.Fn; 12657 break; 12658 } 12659 return FoundE == E; 12660 } 12661 bool VisitStmt(const Stmt *S) { 12662 for (const Stmt *Child : S->children()) { 12663 if (!Child) 12664 continue; 12665 if (const auto *E = dyn_cast<Expr>(Child)) 12666 if (!E->isGLValue()) 12667 continue; 12668 if (Visit(Child)) 12669 return true; 12670 } 12671 return false; 12672 } 12673 explicit LastprivateConditionalRefChecker( 12674 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12675 : LPM(LPM) {} 12676 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12677 getFoundData() const { 12678 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12679 } 12680 }; 12681 } // namespace 12682 12683 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12684 LValue IVLVal, 12685 StringRef UniqueDeclName, 12686 LValue LVal, 12687 SourceLocation Loc) { 12688 // Last updated loop counter for the lastprivate conditional var. 12689 // int<xx> last_iv = 0; 12690 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12691 llvm::Constant *LastIV = 12692 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12693 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12694 IVLVal.getAlignment().getAsAlign()); 12695 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12696 12697 // Last value of the lastprivate conditional. 12698 // decltype(priv_a) last_a; 12699 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12700 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12701 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12702 LValue LastLVal = CGF.MakeAddrLValue( 12703 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12704 12705 // Global loop counter. Required to handle inner parallel-for regions. 12706 // iv 12707 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12708 12709 // #pragma omp critical(a) 12710 // if (last_iv <= iv) { 12711 // last_iv = iv; 12712 // last_a = priv_a; 12713 // } 12714 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12715 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12716 Action.Enter(CGF); 12717 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12718 // (last_iv <= iv) ? Check if the variable is updated and store new 12719 // value in global var. 12720 llvm::Value *CmpRes; 12721 if (IVLVal.getType()->isSignedIntegerType()) { 12722 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12723 } else { 12724 assert(IVLVal.getType()->isUnsignedIntegerType() && 12725 "Loop iteration variable must be integer."); 12726 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12727 } 12728 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12729 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12730 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12731 // { 12732 CGF.EmitBlock(ThenBB); 12733 12734 // last_iv = iv; 12735 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12736 12737 // last_a = priv_a; 12738 switch (CGF.getEvaluationKind(LVal.getType())) { 12739 case TEK_Scalar: { 12740 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12741 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12742 break; 12743 } 12744 case TEK_Complex: { 12745 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12746 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12747 break; 12748 } 12749 case TEK_Aggregate: 12750 llvm_unreachable( 12751 "Aggregates are not supported in lastprivate conditional."); 12752 } 12753 // } 12754 CGF.EmitBranch(ExitBB); 12755 // There is no need to emit line number for unconditional branch. 12756 (void)ApplyDebugLocation::CreateEmpty(CGF); 12757 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12758 }; 12759 12760 if (CGM.getLangOpts().OpenMPSimd) { 12761 // Do not emit as a critical region as no parallel region could be emitted. 12762 RegionCodeGenTy ThenRCG(CodeGen); 12763 ThenRCG(CGF); 12764 } else { 12765 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12766 } 12767 } 12768 12769 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12770 const Expr *LHS) { 12771 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12772 return; 12773 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12774 if (!Checker.Visit(LHS)) 12775 return; 12776 const Expr *FoundE; 12777 const Decl *FoundD; 12778 StringRef UniqueDeclName; 12779 LValue IVLVal; 12780 llvm::Function *FoundFn; 12781 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12782 Checker.getFoundData(); 12783 if (FoundFn != CGF.CurFn) { 12784 // Special codegen for inner parallel regions. 12785 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12786 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12787 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12788 "Lastprivate conditional is not found in outer region."); 12789 QualType StructTy = std::get<0>(It->getSecond()); 12790 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12791 LValue PrivLVal = CGF.EmitLValue(FoundE); 12792 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12793 PrivLVal.getAddress(CGF), 12794 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12795 LValue BaseLVal = 12796 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12797 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12798 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12799 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12800 FiredLVal, llvm::AtomicOrdering::Unordered, 12801 /*IsVolatile=*/true, /*isInit=*/false); 12802 return; 12803 } 12804 12805 // Private address of the lastprivate conditional in the current context. 12806 // priv_a 12807 LValue LVal = CGF.EmitLValue(FoundE); 12808 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12809 FoundE->getExprLoc()); 12810 } 12811 12812 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12813 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12814 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12815 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12816 return; 12817 auto Range = llvm::reverse(LastprivateConditionalStack); 12818 auto It = llvm::find_if( 12819 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12820 if (It == Range.end() || It->Fn != CGF.CurFn) 12821 return; 12822 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12823 assert(LPCI != LastprivateConditionalToTypes.end() && 12824 "Lastprivates must be registered already."); 12825 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12826 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12827 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12828 for (const auto &Pair : It->DeclToUniqueName) { 12829 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12830 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12831 continue; 12832 auto I = LPCI->getSecond().find(Pair.first); 12833 assert(I != LPCI->getSecond().end() && 12834 "Lastprivate must be rehistered already."); 12835 // bool Cmp = priv_a.Fired != 0; 12836 LValue BaseLVal = std::get<3>(I->getSecond()); 12837 LValue FiredLVal = 12838 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12839 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12840 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12841 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12842 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12843 // if (Cmp) { 12844 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12845 CGF.EmitBlock(ThenBB); 12846 Address Addr = CGF.GetAddrOfLocalVar(VD); 12847 LValue LVal; 12848 if (VD->getType()->isReferenceType()) 12849 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12850 AlignmentSource::Decl); 12851 else 12852 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12853 AlignmentSource::Decl); 12854 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12855 D.getBeginLoc()); 12856 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12857 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12858 // } 12859 } 12860 } 12861 12862 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12863 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12864 SourceLocation Loc) { 12865 if (CGF.getLangOpts().OpenMP < 50) 12866 return; 12867 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12868 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12869 "Unknown lastprivate conditional variable."); 12870 StringRef UniqueName = It->second; 12871 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12872 // The variable was not updated in the region - exit. 12873 if (!GV) 12874 return; 12875 LValue LPLVal = CGF.MakeAddrLValue( 12876 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12877 PrivLVal.getType().getNonReferenceType()); 12878 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12879 CGF.EmitStoreOfScalar(Res, PrivLVal); 12880 } 12881 12882 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12883 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12884 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12885 llvm_unreachable("Not supported in SIMD-only mode"); 12886 } 12887 12888 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12889 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12890 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12891 llvm_unreachable("Not supported in SIMD-only mode"); 12892 } 12893 12894 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12895 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12896 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12897 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12898 bool Tied, unsigned &NumberOfParts) { 12899 llvm_unreachable("Not supported in SIMD-only mode"); 12900 } 12901 12902 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12903 SourceLocation Loc, 12904 llvm::Function *OutlinedFn, 12905 ArrayRef<llvm::Value *> CapturedVars, 12906 const Expr *IfCond, 12907 llvm::Value *NumThreads) { 12908 llvm_unreachable("Not supported in SIMD-only mode"); 12909 } 12910 12911 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12912 CodeGenFunction &CGF, StringRef CriticalName, 12913 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12914 const Expr *Hint) { 12915 llvm_unreachable("Not supported in SIMD-only mode"); 12916 } 12917 12918 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12919 const RegionCodeGenTy &MasterOpGen, 12920 SourceLocation Loc) { 12921 llvm_unreachable("Not supported in SIMD-only mode"); 12922 } 12923 12924 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12925 const RegionCodeGenTy &MasterOpGen, 12926 SourceLocation Loc, 12927 const Expr *Filter) { 12928 llvm_unreachable("Not supported in SIMD-only mode"); 12929 } 12930 12931 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12932 SourceLocation Loc) { 12933 llvm_unreachable("Not supported in SIMD-only mode"); 12934 } 12935 12936 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12937 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12938 SourceLocation Loc) { 12939 llvm_unreachable("Not supported in SIMD-only mode"); 12940 } 12941 12942 void CGOpenMPSIMDRuntime::emitSingleRegion( 12943 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12944 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12945 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12946 ArrayRef<const Expr *> AssignmentOps) { 12947 llvm_unreachable("Not supported in SIMD-only mode"); 12948 } 12949 12950 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12951 const RegionCodeGenTy &OrderedOpGen, 12952 SourceLocation Loc, 12953 bool IsThreads) { 12954 llvm_unreachable("Not supported in SIMD-only mode"); 12955 } 12956 12957 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12958 SourceLocation Loc, 12959 OpenMPDirectiveKind Kind, 12960 bool EmitChecks, 12961 bool ForceSimpleCall) { 12962 llvm_unreachable("Not supported in SIMD-only mode"); 12963 } 12964 12965 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12966 CodeGenFunction &CGF, SourceLocation Loc, 12967 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12968 bool Ordered, const DispatchRTInput &DispatchValues) { 12969 llvm_unreachable("Not supported in SIMD-only mode"); 12970 } 12971 12972 void CGOpenMPSIMDRuntime::emitForStaticInit( 12973 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12974 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12975 llvm_unreachable("Not supported in SIMD-only mode"); 12976 } 12977 12978 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12979 CodeGenFunction &CGF, SourceLocation Loc, 12980 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12981 llvm_unreachable("Not supported in SIMD-only mode"); 12982 } 12983 12984 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12985 SourceLocation Loc, 12986 unsigned IVSize, 12987 bool IVSigned) { 12988 llvm_unreachable("Not supported in SIMD-only mode"); 12989 } 12990 12991 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12992 SourceLocation Loc, 12993 OpenMPDirectiveKind DKind) { 12994 llvm_unreachable("Not supported in SIMD-only mode"); 12995 } 12996 12997 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12998 SourceLocation Loc, 12999 unsigned IVSize, bool IVSigned, 13000 Address IL, Address LB, 13001 Address UB, Address ST) { 13002 llvm_unreachable("Not supported in SIMD-only mode"); 13003 } 13004 13005 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 13006 llvm::Value *NumThreads, 13007 SourceLocation Loc) { 13008 llvm_unreachable("Not supported in SIMD-only mode"); 13009 } 13010 13011 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 13012 ProcBindKind ProcBind, 13013 SourceLocation Loc) { 13014 llvm_unreachable("Not supported in SIMD-only mode"); 13015 } 13016 13017 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 13018 const VarDecl *VD, 13019 Address VDAddr, 13020 SourceLocation Loc) { 13021 llvm_unreachable("Not supported in SIMD-only mode"); 13022 } 13023 13024 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 13025 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 13026 CodeGenFunction *CGF) { 13027 llvm_unreachable("Not supported in SIMD-only mode"); 13028 } 13029 13030 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 13031 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 13032 llvm_unreachable("Not supported in SIMD-only mode"); 13033 } 13034 13035 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13036 ArrayRef<const Expr *> Vars, 13037 SourceLocation Loc, 13038 llvm::AtomicOrdering AO) { 13039 llvm_unreachable("Not supported in SIMD-only mode"); 13040 } 13041 13042 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13043 const OMPExecutableDirective &D, 13044 llvm::Function *TaskFunction, 13045 QualType SharedsTy, Address Shareds, 13046 const Expr *IfCond, 13047 const OMPTaskDataTy &Data) { 13048 llvm_unreachable("Not supported in SIMD-only mode"); 13049 } 13050 13051 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13052 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13053 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13054 const Expr *IfCond, const OMPTaskDataTy &Data) { 13055 llvm_unreachable("Not supported in SIMD-only mode"); 13056 } 13057 13058 void CGOpenMPSIMDRuntime::emitReduction( 13059 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13060 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13061 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13062 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13063 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13064 ReductionOps, Options); 13065 } 13066 13067 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13068 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13069 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13070 llvm_unreachable("Not supported in SIMD-only mode"); 13071 } 13072 13073 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13074 SourceLocation Loc, 13075 bool IsWorksharingReduction) { 13076 llvm_unreachable("Not supported in SIMD-only mode"); 13077 } 13078 13079 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13080 SourceLocation Loc, 13081 ReductionCodeGen &RCG, 13082 unsigned N) { 13083 llvm_unreachable("Not supported in SIMD-only mode"); 13084 } 13085 13086 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13087 SourceLocation Loc, 13088 llvm::Value *ReductionsPtr, 13089 LValue SharedLVal) { 13090 llvm_unreachable("Not supported in SIMD-only mode"); 13091 } 13092 13093 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13094 SourceLocation Loc, 13095 const OMPTaskDataTy &Data) { 13096 llvm_unreachable("Not supported in SIMD-only mode"); 13097 } 13098 13099 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13100 CodeGenFunction &CGF, SourceLocation Loc, 13101 OpenMPDirectiveKind CancelRegion) { 13102 llvm_unreachable("Not supported in SIMD-only mode"); 13103 } 13104 13105 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13106 SourceLocation Loc, const Expr *IfCond, 13107 OpenMPDirectiveKind CancelRegion) { 13108 llvm_unreachable("Not supported in SIMD-only mode"); 13109 } 13110 13111 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13112 const OMPExecutableDirective &D, StringRef ParentName, 13113 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13114 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13115 llvm_unreachable("Not supported in SIMD-only mode"); 13116 } 13117 13118 void CGOpenMPSIMDRuntime::emitTargetCall( 13119 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13120 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13121 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13122 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13123 const OMPLoopDirective &D)> 13124 SizeEmitter) { 13125 llvm_unreachable("Not supported in SIMD-only mode"); 13126 } 13127 13128 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13129 llvm_unreachable("Not supported in SIMD-only mode"); 13130 } 13131 13132 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13133 llvm_unreachable("Not supported in SIMD-only mode"); 13134 } 13135 13136 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13137 return false; 13138 } 13139 13140 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13141 const OMPExecutableDirective &D, 13142 SourceLocation Loc, 13143 llvm::Function *OutlinedFn, 13144 ArrayRef<llvm::Value *> CapturedVars) { 13145 llvm_unreachable("Not supported in SIMD-only mode"); 13146 } 13147 13148 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13149 const Expr *NumTeams, 13150 const Expr *ThreadLimit, 13151 SourceLocation Loc) { 13152 llvm_unreachable("Not supported in SIMD-only mode"); 13153 } 13154 13155 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13156 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13157 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13158 llvm_unreachable("Not supported in SIMD-only mode"); 13159 } 13160 13161 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13162 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13163 const Expr *Device) { 13164 llvm_unreachable("Not supported in SIMD-only mode"); 13165 } 13166 13167 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13168 const OMPLoopDirective &D, 13169 ArrayRef<Expr *> NumIterations) { 13170 llvm_unreachable("Not supported in SIMD-only mode"); 13171 } 13172 13173 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13174 const OMPDependClause *C) { 13175 llvm_unreachable("Not supported in SIMD-only mode"); 13176 } 13177 13178 const VarDecl * 13179 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13180 const VarDecl *NativeParam) const { 13181 llvm_unreachable("Not supported in SIMD-only mode"); 13182 } 13183 13184 Address 13185 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13186 const VarDecl *NativeParam, 13187 const VarDecl *TargetParam) const { 13188 llvm_unreachable("Not supported in SIMD-only mode"); 13189 } 13190