1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/SmallBitVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/Bitcode/BitcodeReader.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/GlobalValue.h" 38 #include "llvm/IR/Value.h" 39 #include "llvm/Support/AtomicOrdering.h" 40 #include "llvm/Support/Format.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <cassert> 43 #include <numeric> 44 45 using namespace clang; 46 using namespace CodeGen; 47 using namespace llvm::omp; 48 49 namespace { 50 /// Base class for handling code generation inside OpenMP regions. 51 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 52 public: 53 /// Kinds of OpenMP regions used in codegen. 54 enum CGOpenMPRegionKind { 55 /// Region with outlined function for standalone 'parallel' 56 /// directive. 57 ParallelOutlinedRegion, 58 /// Region with outlined function for standalone 'task' directive. 59 TaskOutlinedRegion, 60 /// Region for constructs that do not require function outlining, 61 /// like 'for', 'sections', 'atomic' etc. directives. 62 InlinedRegion, 63 /// Region with outlined function for standalone 'target' directive. 64 TargetRegion, 65 }; 66 67 CGOpenMPRegionInfo(const CapturedStmt &CS, 68 const CGOpenMPRegionKind RegionKind, 69 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 70 bool HasCancel) 71 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 72 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 73 74 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 75 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 76 bool HasCancel) 77 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 78 Kind(Kind), HasCancel(HasCancel) {} 79 80 /// Get a variable or parameter for storing global thread id 81 /// inside OpenMP construct. 82 virtual const VarDecl *getThreadIDVariable() const = 0; 83 84 /// Emit the captured statement body. 85 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 86 87 /// Get an LValue for the current ThreadID variable. 88 /// \return LValue for thread id variable. This LValue always has type int32*. 89 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 90 91 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 92 93 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 94 95 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 96 97 bool hasCancel() const { return HasCancel; } 98 99 static bool classof(const CGCapturedStmtInfo *Info) { 100 return Info->getKind() == CR_OpenMP; 101 } 102 103 ~CGOpenMPRegionInfo() override = default; 104 105 protected: 106 CGOpenMPRegionKind RegionKind; 107 RegionCodeGenTy CodeGen; 108 OpenMPDirectiveKind Kind; 109 bool HasCancel; 110 }; 111 112 /// API for captured statement code generation in OpenMP constructs. 113 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 114 public: 115 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 116 const RegionCodeGenTy &CodeGen, 117 OpenMPDirectiveKind Kind, bool HasCancel, 118 StringRef HelperName) 119 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 120 HasCancel), 121 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 122 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 123 } 124 125 /// Get a variable or parameter for storing global thread id 126 /// inside OpenMP construct. 127 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 128 129 /// Get the name of the capture helper. 130 StringRef getHelperName() const override { return HelperName; } 131 132 static bool classof(const CGCapturedStmtInfo *Info) { 133 return CGOpenMPRegionInfo::classof(Info) && 134 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 135 ParallelOutlinedRegion; 136 } 137 138 private: 139 /// A variable or parameter storing global thread id for OpenMP 140 /// constructs. 141 const VarDecl *ThreadIDVar; 142 StringRef HelperName; 143 }; 144 145 /// API for captured statement code generation in OpenMP constructs. 146 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 147 public: 148 class UntiedTaskActionTy final : public PrePostActionTy { 149 bool Untied; 150 const VarDecl *PartIDVar; 151 const RegionCodeGenTy UntiedCodeGen; 152 llvm::SwitchInst *UntiedSwitch = nullptr; 153 154 public: 155 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 156 const RegionCodeGenTy &UntiedCodeGen) 157 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 158 void Enter(CodeGenFunction &CGF) override { 159 if (Untied) { 160 // Emit task switching point. 161 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 162 CGF.GetAddrOfLocalVar(PartIDVar), 163 PartIDVar->getType()->castAs<PointerType>()); 164 llvm::Value *Res = 165 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 166 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 167 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 168 CGF.EmitBlock(DoneBB); 169 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 170 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 171 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 172 CGF.Builder.GetInsertBlock()); 173 emitUntiedSwitch(CGF); 174 } 175 } 176 void emitUntiedSwitch(CodeGenFunction &CGF) const { 177 if (Untied) { 178 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 179 CGF.GetAddrOfLocalVar(PartIDVar), 180 PartIDVar->getType()->castAs<PointerType>()); 181 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 182 PartIdLVal); 183 UntiedCodeGen(CGF); 184 CodeGenFunction::JumpDest CurPoint = 185 CGF.getJumpDestInCurrentScope(".untied.next."); 186 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 187 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 188 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 189 CGF.Builder.GetInsertBlock()); 190 CGF.EmitBranchThroughCleanup(CurPoint); 191 CGF.EmitBlock(CurPoint.getBlock()); 192 } 193 } 194 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 195 }; 196 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 197 const VarDecl *ThreadIDVar, 198 const RegionCodeGenTy &CodeGen, 199 OpenMPDirectiveKind Kind, bool HasCancel, 200 const UntiedTaskActionTy &Action) 201 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 202 ThreadIDVar(ThreadIDVar), Action(Action) { 203 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 204 } 205 206 /// Get a variable or parameter for storing global thread id 207 /// inside OpenMP construct. 208 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 209 210 /// Get an LValue for the current ThreadID variable. 211 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 212 213 /// Get the name of the capture helper. 214 StringRef getHelperName() const override { return ".omp_outlined."; } 215 216 void emitUntiedSwitch(CodeGenFunction &CGF) override { 217 Action.emitUntiedSwitch(CGF); 218 } 219 220 static bool classof(const CGCapturedStmtInfo *Info) { 221 return CGOpenMPRegionInfo::classof(Info) && 222 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 223 TaskOutlinedRegion; 224 } 225 226 private: 227 /// A variable or parameter storing global thread id for OpenMP 228 /// constructs. 229 const VarDecl *ThreadIDVar; 230 /// Action for emitting code for untied tasks. 231 const UntiedTaskActionTy &Action; 232 }; 233 234 /// API for inlined captured statement code generation in OpenMP 235 /// constructs. 236 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 237 public: 238 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 239 const RegionCodeGenTy &CodeGen, 240 OpenMPDirectiveKind Kind, bool HasCancel) 241 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 242 OldCSI(OldCSI), 243 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 244 245 // Retrieve the value of the context parameter. 246 llvm::Value *getContextValue() const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->getContextValue(); 249 llvm_unreachable("No context value for inlined OpenMP region"); 250 } 251 252 void setContextValue(llvm::Value *V) override { 253 if (OuterRegionInfo) { 254 OuterRegionInfo->setContextValue(V); 255 return; 256 } 257 llvm_unreachable("No context value for inlined OpenMP region"); 258 } 259 260 /// Lookup the captured field decl for a variable. 261 const FieldDecl *lookup(const VarDecl *VD) const override { 262 if (OuterRegionInfo) 263 return OuterRegionInfo->lookup(VD); 264 // If there is no outer outlined region,no need to lookup in a list of 265 // captured variables, we can use the original one. 266 return nullptr; 267 } 268 269 FieldDecl *getThisFieldDecl() const override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThisFieldDecl(); 272 return nullptr; 273 } 274 275 /// Get a variable or parameter for storing global thread id 276 /// inside OpenMP construct. 277 const VarDecl *getThreadIDVariable() const override { 278 if (OuterRegionInfo) 279 return OuterRegionInfo->getThreadIDVariable(); 280 return nullptr; 281 } 282 283 /// Get an LValue for the current ThreadID variable. 284 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 285 if (OuterRegionInfo) 286 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 287 llvm_unreachable("No LValue for inlined OpenMP construct"); 288 } 289 290 /// Get the name of the capture helper. 291 StringRef getHelperName() const override { 292 if (auto *OuterRegionInfo = getOldCSI()) 293 return OuterRegionInfo->getHelperName(); 294 llvm_unreachable("No helper name for inlined OpenMP construct"); 295 } 296 297 void emitUntiedSwitch(CodeGenFunction &CGF) override { 298 if (OuterRegionInfo) 299 OuterRegionInfo->emitUntiedSwitch(CGF); 300 } 301 302 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 303 304 static bool classof(const CGCapturedStmtInfo *Info) { 305 return CGOpenMPRegionInfo::classof(Info) && 306 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 307 } 308 309 ~CGOpenMPInlinedRegionInfo() override = default; 310 311 private: 312 /// CodeGen info about outer OpenMP region. 313 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 314 CGOpenMPRegionInfo *OuterRegionInfo; 315 }; 316 317 /// API for captured statement code generation in OpenMP target 318 /// constructs. For this captures, implicit parameters are used instead of the 319 /// captured fields. The name of the target region has to be unique in a given 320 /// application so it is provided by the client, because only the client has 321 /// the information to generate that. 322 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 323 public: 324 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 325 const RegionCodeGenTy &CodeGen, StringRef HelperName) 326 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 327 /*HasCancel=*/false), 328 HelperName(HelperName) {} 329 330 /// This is unused for target regions because each starts executing 331 /// with a single thread. 332 const VarDecl *getThreadIDVariable() const override { return nullptr; } 333 334 /// Get the name of the capture helper. 335 StringRef getHelperName() const override { return HelperName; } 336 337 static bool classof(const CGCapturedStmtInfo *Info) { 338 return CGOpenMPRegionInfo::classof(Info) && 339 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 340 } 341 342 private: 343 StringRef HelperName; 344 }; 345 346 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 347 llvm_unreachable("No codegen for expressions"); 348 } 349 /// API for generation of expressions captured in a innermost OpenMP 350 /// region. 351 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 352 public: 353 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 354 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 355 OMPD_unknown, 356 /*HasCancel=*/false), 357 PrivScope(CGF) { 358 // Make sure the globals captured in the provided statement are local by 359 // using the privatization logic. We assume the same variable is not 360 // captured more than once. 361 for (const auto &C : CS.captures()) { 362 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 363 continue; 364 365 const VarDecl *VD = C.getCapturedVar(); 366 if (VD->isLocalVarDeclOrParm()) 367 continue; 368 369 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 370 /*RefersToEnclosingVariableOrCapture=*/false, 371 VD->getType().getNonReferenceType(), VK_LValue, 372 C.getLocation()); 373 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 374 } 375 (void)PrivScope.Privatize(); 376 } 377 378 /// Lookup the captured field decl for a variable. 379 const FieldDecl *lookup(const VarDecl *VD) const override { 380 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 381 return FD; 382 return nullptr; 383 } 384 385 /// Emit the captured statement body. 386 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 387 llvm_unreachable("No body for expressions"); 388 } 389 390 /// Get a variable or parameter for storing global thread id 391 /// inside OpenMP construct. 392 const VarDecl *getThreadIDVariable() const override { 393 llvm_unreachable("No thread id for expressions"); 394 } 395 396 /// Get the name of the capture helper. 397 StringRef getHelperName() const override { 398 llvm_unreachable("No helper name for expressions"); 399 } 400 401 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 402 403 private: 404 /// Private scope to capture global variables. 405 CodeGenFunction::OMPPrivateScope PrivScope; 406 }; 407 408 /// RAII for emitting code of OpenMP constructs. 409 class InlinedOpenMPRegionRAII { 410 CodeGenFunction &CGF; 411 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 412 FieldDecl *LambdaThisCaptureField = nullptr; 413 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 414 bool NoInheritance = false; 415 416 public: 417 /// Constructs region for combined constructs. 418 /// \param CodeGen Code generation sequence for combined directives. Includes 419 /// a list of functions used for code generation of implicitly inlined 420 /// regions. 421 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 422 OpenMPDirectiveKind Kind, bool HasCancel, 423 bool NoInheritance = true) 424 : CGF(CGF), NoInheritance(NoInheritance) { 425 // Start emission for the construct. 426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 428 if (NoInheritance) { 429 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 430 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 431 CGF.LambdaThisCaptureField = nullptr; 432 BlockInfo = CGF.BlockInfo; 433 CGF.BlockInfo = nullptr; 434 } 435 } 436 437 ~InlinedOpenMPRegionRAII() { 438 // Restore original CapturedStmtInfo only if we're done with code emission. 439 auto *OldCSI = 440 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 441 delete CGF.CapturedStmtInfo; 442 CGF.CapturedStmtInfo = OldCSI; 443 if (NoInheritance) { 444 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 445 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 446 CGF.BlockInfo = BlockInfo; 447 } 448 } 449 }; 450 451 /// Values for bit flags used in the ident_t to describe the fields. 452 /// All enumeric elements are named and described in accordance with the code 453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 454 enum OpenMPLocationFlags : unsigned { 455 /// Use trampoline for internal microtask. 456 OMP_IDENT_IMD = 0x01, 457 /// Use c-style ident structure. 458 OMP_IDENT_KMPC = 0x02, 459 /// Atomic reduction option for kmpc_reduce. 460 OMP_ATOMIC_REDUCE = 0x10, 461 /// Explicit 'barrier' directive. 462 OMP_IDENT_BARRIER_EXPL = 0x20, 463 /// Implicit barrier in code. 464 OMP_IDENT_BARRIER_IMPL = 0x40, 465 /// Implicit barrier in 'for' directive. 466 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 467 /// Implicit barrier in 'sections' directive. 468 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 469 /// Implicit barrier in 'single' directive. 470 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 471 /// Call of __kmp_for_static_init for static loop. 472 OMP_IDENT_WORK_LOOP = 0x200, 473 /// Call of __kmp_for_static_init for sections. 474 OMP_IDENT_WORK_SECTIONS = 0x400, 475 /// Call of __kmp_for_static_init for distribute. 476 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 478 }; 479 480 namespace { 481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 482 /// Values for bit flags for marking which requires clauses have been used. 483 enum OpenMPOffloadingRequiresDirFlags : int64_t { 484 /// flag undefined. 485 OMP_REQ_UNDEFINED = 0x000, 486 /// no requires clause present. 487 OMP_REQ_NONE = 0x001, 488 /// reverse_offload clause. 489 OMP_REQ_REVERSE_OFFLOAD = 0x002, 490 /// unified_address clause. 491 OMP_REQ_UNIFIED_ADDRESS = 0x004, 492 /// unified_shared_memory clause. 493 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 494 /// dynamic_allocators clause. 495 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 496 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 497 }; 498 499 enum OpenMPOffloadingReservedDeviceIDs { 500 /// Device ID if the device was not defined, runtime should get it 501 /// from environment variables in the spec. 502 OMP_DEVICEID_UNDEF = -1, 503 }; 504 } // anonymous namespace 505 506 /// Describes ident structure that describes a source location. 507 /// All descriptions are taken from 508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 509 /// Original structure: 510 /// typedef struct ident { 511 /// kmp_int32 reserved_1; /**< might be used in Fortran; 512 /// see above */ 513 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 514 /// KMP_IDENT_KMPC identifies this union 515 /// member */ 516 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 517 /// see above */ 518 ///#if USE_ITT_BUILD 519 /// /* but currently used for storing 520 /// region-specific ITT */ 521 /// /* contextual information. */ 522 ///#endif /* USE_ITT_BUILD */ 523 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 524 /// C++ */ 525 /// char const *psource; /**< String describing the source location. 526 /// The string is composed of semi-colon separated 527 // fields which describe the source file, 528 /// the function and a pair of line numbers that 529 /// delimit the construct. 530 /// */ 531 /// } ident_t; 532 enum IdentFieldIndex { 533 /// might be used in Fortran 534 IdentField_Reserved_1, 535 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 536 IdentField_Flags, 537 /// Not really used in Fortran any more 538 IdentField_Reserved_2, 539 /// Source[4] in Fortran, do not use for C++ 540 IdentField_Reserved_3, 541 /// String describing the source location. The string is composed of 542 /// semi-colon separated fields which describe the source file, the function 543 /// and a pair of line numbers that delimit the construct. 544 IdentField_PSource 545 }; 546 547 /// Schedule types for 'omp for' loops (these enumerators are taken from 548 /// the enum sched_type in kmp.h). 549 enum OpenMPSchedType { 550 /// Lower bound for default (unordered) versions. 551 OMP_sch_lower = 32, 552 OMP_sch_static_chunked = 33, 553 OMP_sch_static = 34, 554 OMP_sch_dynamic_chunked = 35, 555 OMP_sch_guided_chunked = 36, 556 OMP_sch_runtime = 37, 557 OMP_sch_auto = 38, 558 /// static with chunk adjustment (e.g., simd) 559 OMP_sch_static_balanced_chunked = 45, 560 /// Lower bound for 'ordered' versions. 561 OMP_ord_lower = 64, 562 OMP_ord_static_chunked = 65, 563 OMP_ord_static = 66, 564 OMP_ord_dynamic_chunked = 67, 565 OMP_ord_guided_chunked = 68, 566 OMP_ord_runtime = 69, 567 OMP_ord_auto = 70, 568 OMP_sch_default = OMP_sch_static, 569 /// dist_schedule types 570 OMP_dist_sch_static_chunked = 91, 571 OMP_dist_sch_static = 92, 572 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 573 /// Set if the monotonic schedule modifier was present. 574 OMP_sch_modifier_monotonic = (1 << 29), 575 /// Set if the nonmonotonic schedule modifier was present. 576 OMP_sch_modifier_nonmonotonic = (1 << 30), 577 }; 578 579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 580 /// region. 581 class CleanupTy final : public EHScopeStack::Cleanup { 582 PrePostActionTy *Action; 583 584 public: 585 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 586 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 587 if (!CGF.HaveInsertPoint()) 588 return; 589 Action->Exit(CGF); 590 } 591 }; 592 593 } // anonymous namespace 594 595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 596 CodeGenFunction::RunCleanupsScope Scope(CGF); 597 if (PrePostAction) { 598 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 599 Callback(CodeGen, CGF, *PrePostAction); 600 } else { 601 PrePostActionTy Action; 602 Callback(CodeGen, CGF, Action); 603 } 604 } 605 606 /// Check if the combiner is a call to UDR combiner and if it is so return the 607 /// UDR decl used for reduction. 608 static const OMPDeclareReductionDecl * 609 getReductionInit(const Expr *ReductionOp) { 610 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 611 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 612 if (const auto *DRE = 613 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 614 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 615 return DRD; 616 return nullptr; 617 } 618 619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 620 const OMPDeclareReductionDecl *DRD, 621 const Expr *InitOp, 622 Address Private, Address Original, 623 QualType Ty) { 624 if (DRD->getInitializer()) { 625 std::pair<llvm::Function *, llvm::Function *> Reduction = 626 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 627 const auto *CE = cast<CallExpr>(InitOp); 628 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 629 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 630 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 631 const auto *LHSDRE = 632 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 633 const auto *RHSDRE = 634 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 635 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 636 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); 637 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); 638 (void)PrivateScope.Privatize(); 639 RValue Func = RValue::get(Reduction.second); 640 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 641 CGF.EmitIgnoredExpr(InitOp); 642 } else { 643 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 644 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 645 auto *GV = new llvm::GlobalVariable( 646 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 647 llvm::GlobalValue::PrivateLinkage, Init, Name); 648 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 649 RValue InitRVal; 650 switch (CGF.getEvaluationKind(Ty)) { 651 case TEK_Scalar: 652 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 653 break; 654 case TEK_Complex: 655 InitRVal = 656 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 657 break; 658 case TEK_Aggregate: { 659 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 660 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 661 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 662 /*IsInitializer=*/false); 663 return; 664 } 665 } 666 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 667 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 668 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 669 /*IsInitializer=*/false); 670 } 671 } 672 673 /// Emit initialization of arrays of complex types. 674 /// \param DestAddr Address of the array. 675 /// \param Type Type of array. 676 /// \param Init Initial expression of array. 677 /// \param SrcAddr Address of the original array. 678 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 679 QualType Type, bool EmitDeclareReductionInit, 680 const Expr *Init, 681 const OMPDeclareReductionDecl *DRD, 682 Address SrcAddr = Address::invalid()) { 683 // Perform element-by-element initialization. 684 QualType ElementTy; 685 686 // Drill down to the base element type on both arrays. 687 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 688 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 689 if (DRD) 690 SrcAddr = 691 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 692 693 llvm::Value *SrcBegin = nullptr; 694 if (DRD) 695 SrcBegin = SrcAddr.getPointer(); 696 llvm::Value *DestBegin = DestAddr.getPointer(); 697 // Cast from pointer to array type to pointer to single element. 698 llvm::Value *DestEnd = 699 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 700 // The basic structure here is a while-do loop. 701 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 702 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 703 llvm::Value *IsEmpty = 704 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 705 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 706 707 // Enter the loop body, making that address the current address. 708 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 709 CGF.EmitBlock(BodyBB); 710 711 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 712 713 llvm::PHINode *SrcElementPHI = nullptr; 714 Address SrcElementCurrent = Address::invalid(); 715 if (DRD) { 716 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 717 "omp.arraycpy.srcElementPast"); 718 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 719 SrcElementCurrent = 720 Address(SrcElementPHI, SrcAddr.getElementType(), 721 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 722 } 723 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 724 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 725 DestElementPHI->addIncoming(DestBegin, EntryBB); 726 Address DestElementCurrent = 727 Address(DestElementPHI, DestAddr.getElementType(), 728 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 729 730 // Emit copy. 731 { 732 CodeGenFunction::RunCleanupsScope InitScope(CGF); 733 if (EmitDeclareReductionInit) { 734 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 735 SrcElementCurrent, ElementTy); 736 } else 737 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 738 /*IsInitializer=*/false); 739 } 740 741 if (DRD) { 742 // Shift the address forward by one element. 743 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 744 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 745 "omp.arraycpy.dest.element"); 746 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 747 } 748 749 // Shift the address forward by one element. 750 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 751 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 752 "omp.arraycpy.dest.element"); 753 // Check whether we've reached the end. 754 llvm::Value *Done = 755 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 756 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 757 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 758 759 // Done. 760 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 761 } 762 763 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 764 return CGF.EmitOMPSharedLValue(E); 765 } 766 767 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 768 const Expr *E) { 769 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 770 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 771 return LValue(); 772 } 773 774 void ReductionCodeGen::emitAggregateInitialization( 775 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 776 const OMPDeclareReductionDecl *DRD) { 777 // Emit VarDecl with copy init for arrays. 778 // Get the address of the original variable captured in current 779 // captured region. 780 const auto *PrivateVD = 781 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 782 bool EmitDeclareReductionInit = 783 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 784 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 785 EmitDeclareReductionInit, 786 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 787 : PrivateVD->getInit(), 788 DRD, SharedAddr); 789 } 790 791 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 792 ArrayRef<const Expr *> Origs, 793 ArrayRef<const Expr *> Privates, 794 ArrayRef<const Expr *> ReductionOps) { 795 ClausesData.reserve(Shareds.size()); 796 SharedAddresses.reserve(Shareds.size()); 797 Sizes.reserve(Shareds.size()); 798 BaseDecls.reserve(Shareds.size()); 799 const auto *IOrig = Origs.begin(); 800 const auto *IPriv = Privates.begin(); 801 const auto *IRed = ReductionOps.begin(); 802 for (const Expr *Ref : Shareds) { 803 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 804 std::advance(IOrig, 1); 805 std::advance(IPriv, 1); 806 std::advance(IRed, 1); 807 } 808 } 809 810 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 811 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 812 "Number of generated lvalues must be exactly N."); 813 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 814 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 815 SharedAddresses.emplace_back(First, Second); 816 if (ClausesData[N].Shared == ClausesData[N].Ref) { 817 OrigAddresses.emplace_back(First, Second); 818 } else { 819 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 820 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 821 OrigAddresses.emplace_back(First, Second); 822 } 823 } 824 825 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 826 const auto *PrivateVD = 827 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 828 QualType PrivateType = PrivateVD->getType(); 829 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 830 if (!PrivateType->isVariablyModifiedType()) { 831 Sizes.emplace_back( 832 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 833 nullptr); 834 return; 835 } 836 llvm::Value *Size; 837 llvm::Value *SizeInChars; 838 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 839 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 840 if (AsArraySection) { 841 Size = CGF.Builder.CreatePtrDiff(ElemType, 842 OrigAddresses[N].second.getPointer(CGF), 843 OrigAddresses[N].first.getPointer(CGF)); 844 Size = CGF.Builder.CreateNUWAdd( 845 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 846 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 847 } else { 848 SizeInChars = 849 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 850 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 851 } 852 Sizes.emplace_back(SizeInChars, Size); 853 CodeGenFunction::OpaqueValueMapping OpaqueMap( 854 CGF, 855 cast<OpaqueValueExpr>( 856 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 857 RValue::get(Size)); 858 CGF.EmitVariablyModifiedType(PrivateType); 859 } 860 861 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 862 llvm::Value *Size) { 863 const auto *PrivateVD = 864 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 865 QualType PrivateType = PrivateVD->getType(); 866 if (!PrivateType->isVariablyModifiedType()) { 867 assert(!Size && !Sizes[N].second && 868 "Size should be nullptr for non-variably modified reduction " 869 "items."); 870 return; 871 } 872 CodeGenFunction::OpaqueValueMapping OpaqueMap( 873 CGF, 874 cast<OpaqueValueExpr>( 875 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 876 RValue::get(Size)); 877 CGF.EmitVariablyModifiedType(PrivateType); 878 } 879 880 void ReductionCodeGen::emitInitialization( 881 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 882 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 883 assert(SharedAddresses.size() > N && "No variable was generated"); 884 const auto *PrivateVD = 885 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 886 const OMPDeclareReductionDecl *DRD = 887 getReductionInit(ClausesData[N].ReductionOp); 888 QualType PrivateType = PrivateVD->getType(); 889 PrivateAddr = CGF.Builder.CreateElementBitCast( 890 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 891 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 892 if (DRD && DRD->getInitializer()) 893 (void)DefaultInit(CGF); 894 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 895 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 896 (void)DefaultInit(CGF); 897 QualType SharedType = SharedAddresses[N].first.getType(); 898 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 899 PrivateAddr, SharedAddr, SharedType); 900 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 901 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 902 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 903 PrivateVD->getType().getQualifiers(), 904 /*IsInitializer=*/false); 905 } 906 } 907 908 bool ReductionCodeGen::needCleanups(unsigned N) { 909 const auto *PrivateVD = 910 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 911 QualType PrivateType = PrivateVD->getType(); 912 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 913 return DTorKind != QualType::DK_none; 914 } 915 916 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 917 Address PrivateAddr) { 918 const auto *PrivateVD = 919 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 920 QualType PrivateType = PrivateVD->getType(); 921 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 922 if (needCleanups(N)) { 923 PrivateAddr = CGF.Builder.CreateElementBitCast( 924 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 925 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 926 } 927 } 928 929 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 930 LValue BaseLV) { 931 BaseTy = BaseTy.getNonReferenceType(); 932 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 933 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 934 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 935 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 936 } else { 937 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 938 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 939 } 940 BaseTy = BaseTy->getPointeeType(); 941 } 942 return CGF.MakeAddrLValue( 943 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 944 CGF.ConvertTypeForMem(ElTy)), 945 BaseLV.getType(), BaseLV.getBaseInfo(), 946 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 947 } 948 949 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 950 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 951 llvm::Value *Addr) { 952 Address Tmp = Address::invalid(); 953 Address TopTmp = Address::invalid(); 954 Address MostTopTmp = Address::invalid(); 955 BaseTy = BaseTy.getNonReferenceType(); 956 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 957 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 958 Tmp = CGF.CreateMemTemp(BaseTy); 959 if (TopTmp.isValid()) 960 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 961 else 962 MostTopTmp = Tmp; 963 TopTmp = Tmp; 964 BaseTy = BaseTy->getPointeeType(); 965 } 966 llvm::Type *Ty = BaseLVType; 967 if (Tmp.isValid()) 968 Ty = Tmp.getElementType(); 969 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 970 if (Tmp.isValid()) { 971 CGF.Builder.CreateStore(Addr, Tmp); 972 return MostTopTmp; 973 } 974 return Address::deprecated(Addr, BaseLVAlignment); 975 } 976 977 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 978 const VarDecl *OrigVD = nullptr; 979 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 980 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 981 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 982 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 983 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 984 Base = TempASE->getBase()->IgnoreParenImpCasts(); 985 DE = cast<DeclRefExpr>(Base); 986 OrigVD = cast<VarDecl>(DE->getDecl()); 987 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 988 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 989 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 990 Base = TempASE->getBase()->IgnoreParenImpCasts(); 991 DE = cast<DeclRefExpr>(Base); 992 OrigVD = cast<VarDecl>(DE->getDecl()); 993 } 994 return OrigVD; 995 } 996 997 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 998 Address PrivateAddr) { 999 const DeclRefExpr *DE; 1000 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1001 BaseDecls.emplace_back(OrigVD); 1002 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1003 LValue BaseLValue = 1004 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1005 OriginalBaseLValue); 1006 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1007 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1008 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 1009 SharedAddr.getPointer()); 1010 llvm::Value *PrivatePointer = 1011 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1012 PrivateAddr.getPointer(), SharedAddr.getType()); 1013 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1014 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1015 return castToBase(CGF, OrigVD->getType(), 1016 SharedAddresses[N].first.getType(), 1017 OriginalBaseLValue.getAddress(CGF).getType(), 1018 OriginalBaseLValue.getAlignment(), Ptr); 1019 } 1020 BaseDecls.emplace_back( 1021 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1022 return PrivateAddr; 1023 } 1024 1025 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1026 const OMPDeclareReductionDecl *DRD = 1027 getReductionInit(ClausesData[N].ReductionOp); 1028 return DRD && DRD->getInitializer(); 1029 } 1030 1031 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1032 return CGF.EmitLoadOfPointerLValue( 1033 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1034 getThreadIDVariable()->getType()->castAs<PointerType>()); 1035 } 1036 1037 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1038 if (!CGF.HaveInsertPoint()) 1039 return; 1040 // 1.2.2 OpenMP Language Terminology 1041 // Structured block - An executable statement with a single entry at the 1042 // top and a single exit at the bottom. 1043 // The point of exit cannot be a branch out of the structured block. 1044 // longjmp() and throw() must not violate the entry/exit criteria. 1045 CGF.EHStack.pushTerminate(); 1046 if (S) 1047 CGF.incrementProfileCounter(S); 1048 CodeGen(CGF); 1049 CGF.EHStack.popTerminate(); 1050 } 1051 1052 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1053 CodeGenFunction &CGF) { 1054 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1055 getThreadIDVariable()->getType(), 1056 AlignmentSource::Decl); 1057 } 1058 1059 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1060 QualType FieldTy) { 1061 auto *Field = FieldDecl::Create( 1062 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1063 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1064 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1065 Field->setAccess(AS_public); 1066 DC->addDecl(Field); 1067 return Field; 1068 } 1069 1070 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1071 StringRef Separator) 1072 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1073 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1074 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1075 1076 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1077 OMPBuilder.initialize(); 1078 loadOffloadInfoMetadata(); 1079 } 1080 1081 void CGOpenMPRuntime::clear() { 1082 InternalVars.clear(); 1083 // Clean non-target variable declarations possibly used only in debug info. 1084 for (const auto &Data : EmittedNonTargetVariables) { 1085 if (!Data.getValue().pointsToAliveValue()) 1086 continue; 1087 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1088 if (!GV) 1089 continue; 1090 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1091 continue; 1092 GV->eraseFromParent(); 1093 } 1094 } 1095 1096 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1097 SmallString<128> Buffer; 1098 llvm::raw_svector_ostream OS(Buffer); 1099 StringRef Sep = FirstSeparator; 1100 for (StringRef Part : Parts) { 1101 OS << Sep << Part; 1102 Sep = Separator; 1103 } 1104 return std::string(OS.str()); 1105 } 1106 1107 static llvm::Function * 1108 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1109 const Expr *CombinerInitializer, const VarDecl *In, 1110 const VarDecl *Out, bool IsCombiner) { 1111 // void .omp_combiner.(Ty *in, Ty *out); 1112 ASTContext &C = CGM.getContext(); 1113 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1114 FunctionArgList Args; 1115 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1116 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1117 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1118 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1119 Args.push_back(&OmpOutParm); 1120 Args.push_back(&OmpInParm); 1121 const CGFunctionInfo &FnInfo = 1122 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1123 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1124 std::string Name = CGM.getOpenMPRuntime().getName( 1125 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1126 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1127 Name, &CGM.getModule()); 1128 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1129 if (CGM.getLangOpts().Optimize) { 1130 Fn->removeFnAttr(llvm::Attribute::NoInline); 1131 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1132 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1133 } 1134 CodeGenFunction CGF(CGM); 1135 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1136 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1137 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1138 Out->getLocation()); 1139 CodeGenFunction::OMPPrivateScope Scope(CGF); 1140 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1141 Scope.addPrivate( 1142 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1143 .getAddress(CGF)); 1144 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1145 Scope.addPrivate( 1146 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1147 .getAddress(CGF)); 1148 (void)Scope.Privatize(); 1149 if (!IsCombiner && Out->hasInit() && 1150 !CGF.isTrivialInitializer(Out->getInit())) { 1151 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1152 Out->getType().getQualifiers(), 1153 /*IsInitializer=*/true); 1154 } 1155 if (CombinerInitializer) 1156 CGF.EmitIgnoredExpr(CombinerInitializer); 1157 Scope.ForceCleanup(); 1158 CGF.FinishFunction(); 1159 return Fn; 1160 } 1161 1162 void CGOpenMPRuntime::emitUserDefinedReduction( 1163 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1164 if (UDRMap.count(D) > 0) 1165 return; 1166 llvm::Function *Combiner = emitCombinerOrInitializer( 1167 CGM, D->getType(), D->getCombiner(), 1168 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1169 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1170 /*IsCombiner=*/true); 1171 llvm::Function *Initializer = nullptr; 1172 if (const Expr *Init = D->getInitializer()) { 1173 Initializer = emitCombinerOrInitializer( 1174 CGM, D->getType(), 1175 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1176 : nullptr, 1177 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1178 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1179 /*IsCombiner=*/false); 1180 } 1181 UDRMap.try_emplace(D, Combiner, Initializer); 1182 if (CGF) { 1183 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1184 Decls.second.push_back(D); 1185 } 1186 } 1187 1188 std::pair<llvm::Function *, llvm::Function *> 1189 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1190 auto I = UDRMap.find(D); 1191 if (I != UDRMap.end()) 1192 return I->second; 1193 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1194 return UDRMap.lookup(D); 1195 } 1196 1197 namespace { 1198 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1199 // Builder if one is present. 1200 struct PushAndPopStackRAII { 1201 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1202 bool HasCancel, llvm::omp::Directive Kind) 1203 : OMPBuilder(OMPBuilder) { 1204 if (!OMPBuilder) 1205 return; 1206 1207 // The following callback is the crucial part of clangs cleanup process. 1208 // 1209 // NOTE: 1210 // Once the OpenMPIRBuilder is used to create parallel regions (and 1211 // similar), the cancellation destination (Dest below) is determined via 1212 // IP. That means if we have variables to finalize we split the block at IP, 1213 // use the new block (=BB) as destination to build a JumpDest (via 1214 // getJumpDestInCurrentScope(BB)) which then is fed to 1215 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1216 // to push & pop an FinalizationInfo object. 1217 // The FiniCB will still be needed but at the point where the 1218 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1219 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1220 assert(IP.getBlock()->end() == IP.getPoint() && 1221 "Clang CG should cause non-terminated block!"); 1222 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1223 CGF.Builder.restoreIP(IP); 1224 CodeGenFunction::JumpDest Dest = 1225 CGF.getOMPCancelDestination(OMPD_parallel); 1226 CGF.EmitBranchThroughCleanup(Dest); 1227 }; 1228 1229 // TODO: Remove this once we emit parallel regions through the 1230 // OpenMPIRBuilder as it can do this setup internally. 1231 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1232 OMPBuilder->pushFinalizationCB(std::move(FI)); 1233 } 1234 ~PushAndPopStackRAII() { 1235 if (OMPBuilder) 1236 OMPBuilder->popFinalizationCB(); 1237 } 1238 llvm::OpenMPIRBuilder *OMPBuilder; 1239 }; 1240 } // namespace 1241 1242 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1243 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1244 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1245 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1246 assert(ThreadIDVar->getType()->isPointerType() && 1247 "thread id variable must be of type kmp_int32 *"); 1248 CodeGenFunction CGF(CGM, true); 1249 bool HasCancel = false; 1250 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1251 HasCancel = OPD->hasCancel(); 1252 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1253 HasCancel = OPD->hasCancel(); 1254 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1255 HasCancel = OPSD->hasCancel(); 1256 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1257 HasCancel = OPFD->hasCancel(); 1258 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1259 HasCancel = OPFD->hasCancel(); 1260 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1261 HasCancel = OPFD->hasCancel(); 1262 else if (const auto *OPFD = 1263 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1264 HasCancel = OPFD->hasCancel(); 1265 else if (const auto *OPFD = 1266 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1267 HasCancel = OPFD->hasCancel(); 1268 1269 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1270 // parallel region to make cancellation barriers work properly. 1271 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1272 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1273 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1274 HasCancel, OutlinedHelperName); 1275 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1276 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1277 } 1278 1279 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1280 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1281 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1282 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1283 return emitParallelOrTeamsOutlinedFunction( 1284 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1285 } 1286 1287 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1288 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1289 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1290 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1291 return emitParallelOrTeamsOutlinedFunction( 1292 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1293 } 1294 1295 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1296 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1297 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1298 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1299 bool Tied, unsigned &NumberOfParts) { 1300 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1301 PrePostActionTy &) { 1302 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1303 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1304 llvm::Value *TaskArgs[] = { 1305 UpLoc, ThreadID, 1306 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1307 TaskTVar->getType()->castAs<PointerType>()) 1308 .getPointer(CGF)}; 1309 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1310 CGM.getModule(), OMPRTL___kmpc_omp_task), 1311 TaskArgs); 1312 }; 1313 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1314 UntiedCodeGen); 1315 CodeGen.setAction(Action); 1316 assert(!ThreadIDVar->getType()->isPointerType() && 1317 "thread id variable must be of type kmp_int32 for tasks"); 1318 const OpenMPDirectiveKind Region = 1319 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1320 : OMPD_task; 1321 const CapturedStmt *CS = D.getCapturedStmt(Region); 1322 bool HasCancel = false; 1323 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1324 HasCancel = TD->hasCancel(); 1325 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1326 HasCancel = TD->hasCancel(); 1327 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1328 HasCancel = TD->hasCancel(); 1329 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1330 HasCancel = TD->hasCancel(); 1331 1332 CodeGenFunction CGF(CGM, true); 1333 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1334 InnermostKind, HasCancel, Action); 1335 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1336 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1337 if (!Tied) 1338 NumberOfParts = Action.getNumberOfParts(); 1339 return Res; 1340 } 1341 1342 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1343 const RecordDecl *RD, const CGRecordLayout &RL, 1344 ArrayRef<llvm::Constant *> Data) { 1345 llvm::StructType *StructTy = RL.getLLVMType(); 1346 unsigned PrevIdx = 0; 1347 ConstantInitBuilder CIBuilder(CGM); 1348 const auto *DI = Data.begin(); 1349 for (const FieldDecl *FD : RD->fields()) { 1350 unsigned Idx = RL.getLLVMFieldNo(FD); 1351 // Fill the alignment. 1352 for (unsigned I = PrevIdx; I < Idx; ++I) 1353 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1354 PrevIdx = Idx + 1; 1355 Fields.add(*DI); 1356 ++DI; 1357 } 1358 } 1359 1360 template <class... As> 1361 static llvm::GlobalVariable * 1362 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1363 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1364 As &&... Args) { 1365 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1366 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1367 ConstantInitBuilder CIBuilder(CGM); 1368 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1369 buildStructValue(Fields, CGM, RD, RL, Data); 1370 return Fields.finishAndCreateGlobal( 1371 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1372 std::forward<As>(Args)...); 1373 } 1374 1375 template <typename T> 1376 static void 1377 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1378 ArrayRef<llvm::Constant *> Data, 1379 T &Parent) { 1380 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1381 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1382 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1383 buildStructValue(Fields, CGM, RD, RL, Data); 1384 Fields.finishAndAddTo(Parent); 1385 } 1386 1387 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1388 bool AtCurrentPoint) { 1389 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1390 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1391 1392 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1393 if (AtCurrentPoint) { 1394 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1395 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1396 } else { 1397 Elem.second.ServiceInsertPt = 1398 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1399 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1400 } 1401 } 1402 1403 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1404 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1405 if (Elem.second.ServiceInsertPt) { 1406 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1407 Elem.second.ServiceInsertPt = nullptr; 1408 Ptr->eraseFromParent(); 1409 } 1410 } 1411 1412 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1413 SourceLocation Loc, 1414 SmallString<128> &Buffer) { 1415 llvm::raw_svector_ostream OS(Buffer); 1416 // Build debug location 1417 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1418 OS << ";" << PLoc.getFilename() << ";"; 1419 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1420 OS << FD->getQualifiedNameAsString(); 1421 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1422 return OS.str(); 1423 } 1424 1425 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1426 SourceLocation Loc, 1427 unsigned Flags) { 1428 uint32_t SrcLocStrSize; 1429 llvm::Constant *SrcLocStr; 1430 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1431 Loc.isInvalid()) { 1432 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1433 } else { 1434 std::string FunctionName; 1435 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1436 FunctionName = FD->getQualifiedNameAsString(); 1437 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1438 const char *FileName = PLoc.getFilename(); 1439 unsigned Line = PLoc.getLine(); 1440 unsigned Column = PLoc.getColumn(); 1441 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1442 Column, SrcLocStrSize); 1443 } 1444 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1445 return OMPBuilder.getOrCreateIdent( 1446 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1447 } 1448 1449 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1450 SourceLocation Loc) { 1451 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1452 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1453 // the clang invariants used below might be broken. 1454 if (CGM.getLangOpts().OpenMPIRBuilder) { 1455 SmallString<128> Buffer; 1456 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1457 uint32_t SrcLocStrSize; 1458 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1459 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1460 return OMPBuilder.getOrCreateThreadID( 1461 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1462 } 1463 1464 llvm::Value *ThreadID = nullptr; 1465 // Check whether we've already cached a load of the thread id in this 1466 // function. 1467 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1468 if (I != OpenMPLocThreadIDMap.end()) { 1469 ThreadID = I->second.ThreadID; 1470 if (ThreadID != nullptr) 1471 return ThreadID; 1472 } 1473 // If exceptions are enabled, do not use parameter to avoid possible crash. 1474 if (auto *OMPRegionInfo = 1475 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1476 if (OMPRegionInfo->getThreadIDVariable()) { 1477 // Check if this an outlined function with thread id passed as argument. 1478 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1479 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1480 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1481 !CGF.getLangOpts().CXXExceptions || 1482 CGF.Builder.GetInsertBlock() == TopBlock || 1483 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1484 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1485 TopBlock || 1486 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1487 CGF.Builder.GetInsertBlock()) { 1488 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1489 // If value loaded in entry block, cache it and use it everywhere in 1490 // function. 1491 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1492 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1493 Elem.second.ThreadID = ThreadID; 1494 } 1495 return ThreadID; 1496 } 1497 } 1498 } 1499 1500 // This is not an outlined function region - need to call __kmpc_int32 1501 // kmpc_global_thread_num(ident_t *loc). 1502 // Generate thread id value and cache this value for use across the 1503 // function. 1504 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1505 if (!Elem.second.ServiceInsertPt) 1506 setLocThreadIdInsertPt(CGF); 1507 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1508 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1509 llvm::CallInst *Call = CGF.Builder.CreateCall( 1510 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1511 OMPRTL___kmpc_global_thread_num), 1512 emitUpdateLocation(CGF, Loc)); 1513 Call->setCallingConv(CGF.getRuntimeCC()); 1514 Elem.second.ThreadID = Call; 1515 return Call; 1516 } 1517 1518 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1519 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1520 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1521 clearLocThreadIdInsertPt(CGF); 1522 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1523 } 1524 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1525 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1526 UDRMap.erase(D); 1527 FunctionUDRMap.erase(CGF.CurFn); 1528 } 1529 auto I = FunctionUDMMap.find(CGF.CurFn); 1530 if (I != FunctionUDMMap.end()) { 1531 for(const auto *D : I->second) 1532 UDMMap.erase(D); 1533 FunctionUDMMap.erase(I); 1534 } 1535 LastprivateConditionalToTypes.erase(CGF.CurFn); 1536 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1537 } 1538 1539 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1540 return OMPBuilder.IdentPtr; 1541 } 1542 1543 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1544 if (!Kmpc_MicroTy) { 1545 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1546 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1547 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1548 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1549 } 1550 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1551 } 1552 1553 llvm::FunctionCallee 1554 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1555 bool IsGPUDistribute) { 1556 assert((IVSize == 32 || IVSize == 64) && 1557 "IV size is not compatible with the omp runtime"); 1558 StringRef Name; 1559 if (IsGPUDistribute) 1560 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1561 : "__kmpc_distribute_static_init_4u") 1562 : (IVSigned ? "__kmpc_distribute_static_init_8" 1563 : "__kmpc_distribute_static_init_8u"); 1564 else 1565 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1566 : "__kmpc_for_static_init_4u") 1567 : (IVSigned ? "__kmpc_for_static_init_8" 1568 : "__kmpc_for_static_init_8u"); 1569 1570 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1571 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1572 llvm::Type *TypeParams[] = { 1573 getIdentTyPointerTy(), // loc 1574 CGM.Int32Ty, // tid 1575 CGM.Int32Ty, // schedtype 1576 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1577 PtrTy, // p_lower 1578 PtrTy, // p_upper 1579 PtrTy, // p_stride 1580 ITy, // incr 1581 ITy // chunk 1582 }; 1583 auto *FnTy = 1584 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1585 return CGM.CreateRuntimeFunction(FnTy, Name); 1586 } 1587 1588 llvm::FunctionCallee 1589 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1590 assert((IVSize == 32 || IVSize == 64) && 1591 "IV size is not compatible with the omp runtime"); 1592 StringRef Name = 1593 IVSize == 32 1594 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1595 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1596 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1597 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1598 CGM.Int32Ty, // tid 1599 CGM.Int32Ty, // schedtype 1600 ITy, // lower 1601 ITy, // upper 1602 ITy, // stride 1603 ITy // chunk 1604 }; 1605 auto *FnTy = 1606 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1607 return CGM.CreateRuntimeFunction(FnTy, Name); 1608 } 1609 1610 llvm::FunctionCallee 1611 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1612 assert((IVSize == 32 || IVSize == 64) && 1613 "IV size is not compatible with the omp runtime"); 1614 StringRef Name = 1615 IVSize == 32 1616 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1617 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1618 llvm::Type *TypeParams[] = { 1619 getIdentTyPointerTy(), // loc 1620 CGM.Int32Ty, // tid 1621 }; 1622 auto *FnTy = 1623 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1624 return CGM.CreateRuntimeFunction(FnTy, Name); 1625 } 1626 1627 llvm::FunctionCallee 1628 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1629 assert((IVSize == 32 || IVSize == 64) && 1630 "IV size is not compatible with the omp runtime"); 1631 StringRef Name = 1632 IVSize == 32 1633 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1634 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1635 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1636 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1637 llvm::Type *TypeParams[] = { 1638 getIdentTyPointerTy(), // loc 1639 CGM.Int32Ty, // tid 1640 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1641 PtrTy, // p_lower 1642 PtrTy, // p_upper 1643 PtrTy // p_stride 1644 }; 1645 auto *FnTy = 1646 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1647 return CGM.CreateRuntimeFunction(FnTy, Name); 1648 } 1649 1650 /// Obtain information that uniquely identifies a target entry. This 1651 /// consists of the file and device IDs as well as line number associated with 1652 /// the relevant entry source location. 1653 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1654 unsigned &DeviceID, unsigned &FileID, 1655 unsigned &LineNum) { 1656 SourceManager &SM = C.getSourceManager(); 1657 1658 // The loc should be always valid and have a file ID (the user cannot use 1659 // #pragma directives in macros) 1660 1661 assert(Loc.isValid() && "Source location is expected to be always valid."); 1662 1663 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1664 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1665 1666 llvm::sys::fs::UniqueID ID; 1667 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1668 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1669 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1670 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1671 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1672 << PLoc.getFilename() << EC.message(); 1673 } 1674 1675 DeviceID = ID.getDevice(); 1676 FileID = ID.getFile(); 1677 LineNum = PLoc.getLine(); 1678 } 1679 1680 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1681 if (CGM.getLangOpts().OpenMPSimd) 1682 return Address::invalid(); 1683 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1684 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1685 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1686 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1687 HasRequiresUnifiedSharedMemory))) { 1688 SmallString<64> PtrName; 1689 { 1690 llvm::raw_svector_ostream OS(PtrName); 1691 OS << CGM.getMangledName(GlobalDecl(VD)); 1692 if (!VD->isExternallyVisible()) { 1693 unsigned DeviceID, FileID, Line; 1694 getTargetEntryUniqueInfo(CGM.getContext(), 1695 VD->getCanonicalDecl()->getBeginLoc(), 1696 DeviceID, FileID, Line); 1697 OS << llvm::format("_%x", FileID); 1698 } 1699 OS << "_decl_tgt_ref_ptr"; 1700 } 1701 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1702 if (!Ptr) { 1703 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1704 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1705 PtrName); 1706 1707 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1708 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1709 1710 if (!CGM.getLangOpts().OpenMPIsDevice) 1711 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1712 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1713 } 1714 return Address::deprecated(Ptr, CGM.getContext().getDeclAlign(VD)); 1715 } 1716 return Address::invalid(); 1717 } 1718 1719 llvm::Constant * 1720 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1721 assert(!CGM.getLangOpts().OpenMPUseTLS || 1722 !CGM.getContext().getTargetInfo().isTLSSupported()); 1723 // Lookup the entry, lazily creating it if necessary. 1724 std::string Suffix = getName({"cache", ""}); 1725 return getOrCreateInternalVariable( 1726 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1727 } 1728 1729 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1730 const VarDecl *VD, 1731 Address VDAddr, 1732 SourceLocation Loc) { 1733 if (CGM.getLangOpts().OpenMPUseTLS && 1734 CGM.getContext().getTargetInfo().isTLSSupported()) 1735 return VDAddr; 1736 1737 llvm::Type *VarTy = VDAddr.getElementType(); 1738 llvm::Value *Args[] = { 1739 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1740 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), 1741 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1742 getOrCreateThreadPrivateCache(VD)}; 1743 return Address::deprecated( 1744 CGF.EmitRuntimeCall( 1745 OMPBuilder.getOrCreateRuntimeFunction( 1746 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1747 Args), 1748 VDAddr.getAlignment()); 1749 } 1750 1751 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1752 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1753 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1754 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1755 // library. 1756 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1757 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1758 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1759 OMPLoc); 1760 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1761 // to register constructor/destructor for variable. 1762 llvm::Value *Args[] = { 1763 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1764 Ctor, CopyCtor, Dtor}; 1765 CGF.EmitRuntimeCall( 1766 OMPBuilder.getOrCreateRuntimeFunction( 1767 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1768 Args); 1769 } 1770 1771 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1772 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1773 bool PerformInit, CodeGenFunction *CGF) { 1774 if (CGM.getLangOpts().OpenMPUseTLS && 1775 CGM.getContext().getTargetInfo().isTLSSupported()) 1776 return nullptr; 1777 1778 VD = VD->getDefinition(CGM.getContext()); 1779 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1780 QualType ASTTy = VD->getType(); 1781 1782 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1783 const Expr *Init = VD->getAnyInitializer(); 1784 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1785 // Generate function that re-emits the declaration's initializer into the 1786 // threadprivate copy of the variable VD 1787 CodeGenFunction CtorCGF(CGM); 1788 FunctionArgList Args; 1789 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1790 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1791 ImplicitParamDecl::Other); 1792 Args.push_back(&Dst); 1793 1794 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1795 CGM.getContext().VoidPtrTy, Args); 1796 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1797 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1798 llvm::Function *Fn = 1799 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1800 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1801 Args, Loc, Loc); 1802 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1803 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1804 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1805 Address Arg = Address::deprecated(ArgVal, VDAddr.getAlignment()); 1806 Arg = CtorCGF.Builder.CreateElementBitCast( 1807 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1808 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1809 /*IsInitializer=*/true); 1810 ArgVal = CtorCGF.EmitLoadOfScalar( 1811 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1812 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1813 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1814 CtorCGF.FinishFunction(); 1815 Ctor = Fn; 1816 } 1817 if (VD->getType().isDestructedType() != QualType::DK_none) { 1818 // Generate function that emits destructor call for the threadprivate copy 1819 // of the variable VD 1820 CodeGenFunction DtorCGF(CGM); 1821 FunctionArgList Args; 1822 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1823 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1824 ImplicitParamDecl::Other); 1825 Args.push_back(&Dst); 1826 1827 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1828 CGM.getContext().VoidTy, Args); 1829 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1830 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1831 llvm::Function *Fn = 1832 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1833 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1834 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1835 Loc, Loc); 1836 // Create a scope with an artificial location for the body of this function. 1837 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1838 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1839 DtorCGF.GetAddrOfLocalVar(&Dst), 1840 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1841 DtorCGF.emitDestroy(Address::deprecated(ArgVal, VDAddr.getAlignment()), 1842 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1843 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1844 DtorCGF.FinishFunction(); 1845 Dtor = Fn; 1846 } 1847 // Do not emit init function if it is not required. 1848 if (!Ctor && !Dtor) 1849 return nullptr; 1850 1851 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1852 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1853 /*isVarArg=*/false) 1854 ->getPointerTo(); 1855 // Copying constructor for the threadprivate variable. 1856 // Must be NULL - reserved by runtime, but currently it requires that this 1857 // parameter is always NULL. Otherwise it fires assertion. 1858 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1859 if (Ctor == nullptr) { 1860 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1861 /*isVarArg=*/false) 1862 ->getPointerTo(); 1863 Ctor = llvm::Constant::getNullValue(CtorTy); 1864 } 1865 if (Dtor == nullptr) { 1866 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1867 /*isVarArg=*/false) 1868 ->getPointerTo(); 1869 Dtor = llvm::Constant::getNullValue(DtorTy); 1870 } 1871 if (!CGF) { 1872 auto *InitFunctionTy = 1873 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1874 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1875 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1876 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1877 CodeGenFunction InitCGF(CGM); 1878 FunctionArgList ArgList; 1879 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1880 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1881 Loc, Loc); 1882 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1883 InitCGF.FinishFunction(); 1884 return InitFunction; 1885 } 1886 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1887 } 1888 return nullptr; 1889 } 1890 1891 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1892 llvm::GlobalVariable *Addr, 1893 bool PerformInit) { 1894 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1895 !CGM.getLangOpts().OpenMPIsDevice) 1896 return false; 1897 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1898 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1899 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1900 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1901 HasRequiresUnifiedSharedMemory)) 1902 return CGM.getLangOpts().OpenMPIsDevice; 1903 VD = VD->getDefinition(CGM.getContext()); 1904 assert(VD && "Unknown VarDecl"); 1905 1906 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1907 return CGM.getLangOpts().OpenMPIsDevice; 1908 1909 QualType ASTTy = VD->getType(); 1910 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1911 1912 // Produce the unique prefix to identify the new target regions. We use 1913 // the source location of the variable declaration which we know to not 1914 // conflict with any target region. 1915 unsigned DeviceID; 1916 unsigned FileID; 1917 unsigned Line; 1918 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1919 SmallString<128> Buffer, Out; 1920 { 1921 llvm::raw_svector_ostream OS(Buffer); 1922 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1923 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1924 } 1925 1926 const Expr *Init = VD->getAnyInitializer(); 1927 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1928 llvm::Constant *Ctor; 1929 llvm::Constant *ID; 1930 if (CGM.getLangOpts().OpenMPIsDevice) { 1931 // Generate function that re-emits the declaration's initializer into 1932 // the threadprivate copy of the variable VD 1933 CodeGenFunction CtorCGF(CGM); 1934 1935 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1936 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1937 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1938 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1939 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1940 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1941 FunctionArgList(), Loc, Loc); 1942 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1943 CtorCGF.EmitAnyExprToMem( 1944 Init, Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)), 1945 Init->getType().getQualifiers(), 1946 /*IsInitializer=*/true); 1947 CtorCGF.FinishFunction(); 1948 Ctor = Fn; 1949 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1950 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1951 } else { 1952 Ctor = new llvm::GlobalVariable( 1953 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1954 llvm::GlobalValue::PrivateLinkage, 1955 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1956 ID = Ctor; 1957 } 1958 1959 // Register the information for the entry associated with the constructor. 1960 Out.clear(); 1961 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1962 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1963 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1964 } 1965 if (VD->getType().isDestructedType() != QualType::DK_none) { 1966 llvm::Constant *Dtor; 1967 llvm::Constant *ID; 1968 if (CGM.getLangOpts().OpenMPIsDevice) { 1969 // Generate function that emits destructor call for the threadprivate 1970 // copy of the variable VD 1971 CodeGenFunction DtorCGF(CGM); 1972 1973 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1974 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1975 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1976 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1977 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1978 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1979 FunctionArgList(), Loc, Loc); 1980 // Create a scope with an artificial location for the body of this 1981 // function. 1982 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1983 DtorCGF.emitDestroy( 1984 Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)), ASTTy, 1985 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1986 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1987 DtorCGF.FinishFunction(); 1988 Dtor = Fn; 1989 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1990 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1991 } else { 1992 Dtor = new llvm::GlobalVariable( 1993 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1994 llvm::GlobalValue::PrivateLinkage, 1995 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1996 ID = Dtor; 1997 } 1998 // Register the information for the entry associated with the destructor. 1999 Out.clear(); 2000 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2001 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2002 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2003 } 2004 return CGM.getLangOpts().OpenMPIsDevice; 2005 } 2006 2007 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2008 QualType VarType, 2009 StringRef Name) { 2010 std::string Suffix = getName({"artificial", ""}); 2011 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2012 llvm::GlobalVariable *GAddr = 2013 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2014 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2015 CGM.getTarget().isTLSSupported()) { 2016 GAddr->setThreadLocal(/*Val=*/true); 2017 return Address(GAddr, GAddr->getValueType(), 2018 CGM.getContext().getTypeAlignInChars(VarType)); 2019 } 2020 std::string CacheSuffix = getName({"cache", ""}); 2021 llvm::Value *Args[] = { 2022 emitUpdateLocation(CGF, SourceLocation()), 2023 getThreadID(CGF, SourceLocation()), 2024 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2025 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2026 /*isSigned=*/false), 2027 getOrCreateInternalVariable( 2028 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2029 return Address( 2030 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2031 CGF.EmitRuntimeCall( 2032 OMPBuilder.getOrCreateRuntimeFunction( 2033 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2034 Args), 2035 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2036 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 2037 } 2038 2039 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2040 const RegionCodeGenTy &ThenGen, 2041 const RegionCodeGenTy &ElseGen) { 2042 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2043 2044 // If the condition constant folds and can be elided, try to avoid emitting 2045 // the condition and the dead arm of the if/else. 2046 bool CondConstant; 2047 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2048 if (CondConstant) 2049 ThenGen(CGF); 2050 else 2051 ElseGen(CGF); 2052 return; 2053 } 2054 2055 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2056 // emit the conditional branch. 2057 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2058 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2059 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2060 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2061 2062 // Emit the 'then' code. 2063 CGF.EmitBlock(ThenBlock); 2064 ThenGen(CGF); 2065 CGF.EmitBranch(ContBlock); 2066 // Emit the 'else' code if present. 2067 // There is no need to emit line number for unconditional branch. 2068 (void)ApplyDebugLocation::CreateEmpty(CGF); 2069 CGF.EmitBlock(ElseBlock); 2070 ElseGen(CGF); 2071 // There is no need to emit line number for unconditional branch. 2072 (void)ApplyDebugLocation::CreateEmpty(CGF); 2073 CGF.EmitBranch(ContBlock); 2074 // Emit the continuation block for code after the if. 2075 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2076 } 2077 2078 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2079 llvm::Function *OutlinedFn, 2080 ArrayRef<llvm::Value *> CapturedVars, 2081 const Expr *IfCond, 2082 llvm::Value *NumThreads) { 2083 if (!CGF.HaveInsertPoint()) 2084 return; 2085 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2086 auto &M = CGM.getModule(); 2087 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2088 this](CodeGenFunction &CGF, PrePostActionTy &) { 2089 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2090 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2091 llvm::Value *Args[] = { 2092 RTLoc, 2093 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2094 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2095 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2096 RealArgs.append(std::begin(Args), std::end(Args)); 2097 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2098 2099 llvm::FunctionCallee RTLFn = 2100 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2101 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2102 }; 2103 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2104 this](CodeGenFunction &CGF, PrePostActionTy &) { 2105 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2106 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2107 // Build calls: 2108 // __kmpc_serialized_parallel(&Loc, GTid); 2109 llvm::Value *Args[] = {RTLoc, ThreadID}; 2110 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2111 M, OMPRTL___kmpc_serialized_parallel), 2112 Args); 2113 2114 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2115 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2116 Address ZeroAddrBound = 2117 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2118 /*Name=*/".bound.zero.addr"); 2119 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2120 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2121 // ThreadId for serialized parallels is 0. 2122 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2123 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2124 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2125 2126 // Ensure we do not inline the function. This is trivially true for the ones 2127 // passed to __kmpc_fork_call but the ones called in serialized regions 2128 // could be inlined. This is not a perfect but it is closer to the invariant 2129 // we want, namely, every data environment starts with a new function. 2130 // TODO: We should pass the if condition to the runtime function and do the 2131 // handling there. Much cleaner code. 2132 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2133 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2134 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2135 2136 // __kmpc_end_serialized_parallel(&Loc, GTid); 2137 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2138 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2139 M, OMPRTL___kmpc_end_serialized_parallel), 2140 EndArgs); 2141 }; 2142 if (IfCond) { 2143 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2144 } else { 2145 RegionCodeGenTy ThenRCG(ThenGen); 2146 ThenRCG(CGF); 2147 } 2148 } 2149 2150 // If we're inside an (outlined) parallel region, use the region info's 2151 // thread-ID variable (it is passed in a first argument of the outlined function 2152 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2153 // regular serial code region, get thread ID by calling kmp_int32 2154 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2155 // return the address of that temp. 2156 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2157 SourceLocation Loc) { 2158 if (auto *OMPRegionInfo = 2159 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2160 if (OMPRegionInfo->getThreadIDVariable()) 2161 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2162 2163 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2164 QualType Int32Ty = 2165 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2166 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2167 CGF.EmitStoreOfScalar(ThreadID, 2168 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2169 2170 return ThreadIDTemp; 2171 } 2172 2173 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2174 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2175 SmallString<256> Buffer; 2176 llvm::raw_svector_ostream Out(Buffer); 2177 Out << Name; 2178 StringRef RuntimeName = Out.str(); 2179 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2180 if (Elem.second) { 2181 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2182 "OMP internal variable has different type than requested"); 2183 return &*Elem.second; 2184 } 2185 2186 return Elem.second = new llvm::GlobalVariable( 2187 CGM.getModule(), Ty, /*IsConstant*/ false, 2188 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2189 Elem.first(), /*InsertBefore=*/nullptr, 2190 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2191 } 2192 2193 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2194 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2195 std::string Name = getName({Prefix, "var"}); 2196 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2197 } 2198 2199 namespace { 2200 /// Common pre(post)-action for different OpenMP constructs. 2201 class CommonActionTy final : public PrePostActionTy { 2202 llvm::FunctionCallee EnterCallee; 2203 ArrayRef<llvm::Value *> EnterArgs; 2204 llvm::FunctionCallee ExitCallee; 2205 ArrayRef<llvm::Value *> ExitArgs; 2206 bool Conditional; 2207 llvm::BasicBlock *ContBlock = nullptr; 2208 2209 public: 2210 CommonActionTy(llvm::FunctionCallee EnterCallee, 2211 ArrayRef<llvm::Value *> EnterArgs, 2212 llvm::FunctionCallee ExitCallee, 2213 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2214 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2215 ExitArgs(ExitArgs), Conditional(Conditional) {} 2216 void Enter(CodeGenFunction &CGF) override { 2217 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2218 if (Conditional) { 2219 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2220 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2221 ContBlock = CGF.createBasicBlock("omp_if.end"); 2222 // Generate the branch (If-stmt) 2223 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2224 CGF.EmitBlock(ThenBlock); 2225 } 2226 } 2227 void Done(CodeGenFunction &CGF) { 2228 // Emit the rest of blocks/branches 2229 CGF.EmitBranch(ContBlock); 2230 CGF.EmitBlock(ContBlock, true); 2231 } 2232 void Exit(CodeGenFunction &CGF) override { 2233 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2234 } 2235 }; 2236 } // anonymous namespace 2237 2238 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2239 StringRef CriticalName, 2240 const RegionCodeGenTy &CriticalOpGen, 2241 SourceLocation Loc, const Expr *Hint) { 2242 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2243 // CriticalOpGen(); 2244 // __kmpc_end_critical(ident_t *, gtid, Lock); 2245 // Prepare arguments and build a call to __kmpc_critical 2246 if (!CGF.HaveInsertPoint()) 2247 return; 2248 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2249 getCriticalRegionLock(CriticalName)}; 2250 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2251 std::end(Args)); 2252 if (Hint) { 2253 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2254 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2255 } 2256 CommonActionTy Action( 2257 OMPBuilder.getOrCreateRuntimeFunction( 2258 CGM.getModule(), 2259 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2260 EnterArgs, 2261 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2262 OMPRTL___kmpc_end_critical), 2263 Args); 2264 CriticalOpGen.setAction(Action); 2265 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2266 } 2267 2268 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2269 const RegionCodeGenTy &MasterOpGen, 2270 SourceLocation Loc) { 2271 if (!CGF.HaveInsertPoint()) 2272 return; 2273 // if(__kmpc_master(ident_t *, gtid)) { 2274 // MasterOpGen(); 2275 // __kmpc_end_master(ident_t *, gtid); 2276 // } 2277 // Prepare arguments and build a call to __kmpc_master 2278 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2279 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2280 CGM.getModule(), OMPRTL___kmpc_master), 2281 Args, 2282 OMPBuilder.getOrCreateRuntimeFunction( 2283 CGM.getModule(), OMPRTL___kmpc_end_master), 2284 Args, 2285 /*Conditional=*/true); 2286 MasterOpGen.setAction(Action); 2287 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2288 Action.Done(CGF); 2289 } 2290 2291 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2292 const RegionCodeGenTy &MaskedOpGen, 2293 SourceLocation Loc, const Expr *Filter) { 2294 if (!CGF.HaveInsertPoint()) 2295 return; 2296 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2297 // MaskedOpGen(); 2298 // __kmpc_end_masked(iden_t *, gtid); 2299 // } 2300 // Prepare arguments and build a call to __kmpc_masked 2301 llvm::Value *FilterVal = Filter 2302 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2303 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2304 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2305 FilterVal}; 2306 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2307 getThreadID(CGF, Loc)}; 2308 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2309 CGM.getModule(), OMPRTL___kmpc_masked), 2310 Args, 2311 OMPBuilder.getOrCreateRuntimeFunction( 2312 CGM.getModule(), OMPRTL___kmpc_end_masked), 2313 ArgsEnd, 2314 /*Conditional=*/true); 2315 MaskedOpGen.setAction(Action); 2316 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2317 Action.Done(CGF); 2318 } 2319 2320 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2321 SourceLocation Loc) { 2322 if (!CGF.HaveInsertPoint()) 2323 return; 2324 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2325 OMPBuilder.createTaskyield(CGF.Builder); 2326 } else { 2327 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2328 llvm::Value *Args[] = { 2329 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2330 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2331 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2332 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2333 Args); 2334 } 2335 2336 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2337 Region->emitUntiedSwitch(CGF); 2338 } 2339 2340 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2341 const RegionCodeGenTy &TaskgroupOpGen, 2342 SourceLocation Loc) { 2343 if (!CGF.HaveInsertPoint()) 2344 return; 2345 // __kmpc_taskgroup(ident_t *, gtid); 2346 // TaskgroupOpGen(); 2347 // __kmpc_end_taskgroup(ident_t *, gtid); 2348 // Prepare arguments and build a call to __kmpc_taskgroup 2349 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2350 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2351 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2352 Args, 2353 OMPBuilder.getOrCreateRuntimeFunction( 2354 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2355 Args); 2356 TaskgroupOpGen.setAction(Action); 2357 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2358 } 2359 2360 /// Given an array of pointers to variables, project the address of a 2361 /// given variable. 2362 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2363 unsigned Index, const VarDecl *Var) { 2364 // Pull out the pointer to the variable. 2365 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2366 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2367 2368 Address Addr = Address::deprecated(Ptr, CGF.getContext().getDeclAlign(Var)); 2369 Addr = CGF.Builder.CreateElementBitCast( 2370 Addr, CGF.ConvertTypeForMem(Var->getType())); 2371 return Addr; 2372 } 2373 2374 static llvm::Value *emitCopyprivateCopyFunction( 2375 CodeGenModule &CGM, llvm::Type *ArgsType, 2376 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2377 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2378 SourceLocation Loc) { 2379 ASTContext &C = CGM.getContext(); 2380 // void copy_func(void *LHSArg, void *RHSArg); 2381 FunctionArgList Args; 2382 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2383 ImplicitParamDecl::Other); 2384 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2385 ImplicitParamDecl::Other); 2386 Args.push_back(&LHSArg); 2387 Args.push_back(&RHSArg); 2388 const auto &CGFI = 2389 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2390 std::string Name = 2391 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2392 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2393 llvm::GlobalValue::InternalLinkage, Name, 2394 &CGM.getModule()); 2395 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2396 Fn->setDoesNotRecurse(); 2397 CodeGenFunction CGF(CGM); 2398 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2399 // Dest = (void*[n])(LHSArg); 2400 // Src = (void*[n])(RHSArg); 2401 Address LHS = Address::deprecated( 2402 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2403 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType), 2404 CGF.getPointerAlign()); 2405 Address RHS = Address::deprecated( 2406 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2407 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType), 2408 CGF.getPointerAlign()); 2409 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2410 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2411 // ... 2412 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2413 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2414 const auto *DestVar = 2415 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2416 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2417 2418 const auto *SrcVar = 2419 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2420 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2421 2422 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2423 QualType Type = VD->getType(); 2424 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2425 } 2426 CGF.FinishFunction(); 2427 return Fn; 2428 } 2429 2430 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2431 const RegionCodeGenTy &SingleOpGen, 2432 SourceLocation Loc, 2433 ArrayRef<const Expr *> CopyprivateVars, 2434 ArrayRef<const Expr *> SrcExprs, 2435 ArrayRef<const Expr *> DstExprs, 2436 ArrayRef<const Expr *> AssignmentOps) { 2437 if (!CGF.HaveInsertPoint()) 2438 return; 2439 assert(CopyprivateVars.size() == SrcExprs.size() && 2440 CopyprivateVars.size() == DstExprs.size() && 2441 CopyprivateVars.size() == AssignmentOps.size()); 2442 ASTContext &C = CGM.getContext(); 2443 // int32 did_it = 0; 2444 // if(__kmpc_single(ident_t *, gtid)) { 2445 // SingleOpGen(); 2446 // __kmpc_end_single(ident_t *, gtid); 2447 // did_it = 1; 2448 // } 2449 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2450 // <copy_func>, did_it); 2451 2452 Address DidIt = Address::invalid(); 2453 if (!CopyprivateVars.empty()) { 2454 // int32 did_it = 0; 2455 QualType KmpInt32Ty = 2456 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2457 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2458 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2459 } 2460 // Prepare arguments and build a call to __kmpc_single 2461 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2462 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2463 CGM.getModule(), OMPRTL___kmpc_single), 2464 Args, 2465 OMPBuilder.getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_end_single), 2467 Args, 2468 /*Conditional=*/true); 2469 SingleOpGen.setAction(Action); 2470 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2471 if (DidIt.isValid()) { 2472 // did_it = 1; 2473 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2474 } 2475 Action.Done(CGF); 2476 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2477 // <copy_func>, did_it); 2478 if (DidIt.isValid()) { 2479 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2480 QualType CopyprivateArrayTy = C.getConstantArrayType( 2481 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2482 /*IndexTypeQuals=*/0); 2483 // Create a list of all private variables for copyprivate. 2484 Address CopyprivateList = 2485 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2486 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2487 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2488 CGF.Builder.CreateStore( 2489 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2490 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2491 CGF.VoidPtrTy), 2492 Elem); 2493 } 2494 // Build function that copies private values from single region to all other 2495 // threads in the corresponding parallel region. 2496 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2497 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2498 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2499 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2500 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2501 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2502 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2503 llvm::Value *Args[] = { 2504 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2505 getThreadID(CGF, Loc), // i32 <gtid> 2506 BufSize, // size_t <buf_size> 2507 CL.getPointer(), // void *<copyprivate list> 2508 CpyFn, // void (*) (void *, void *) <copy_func> 2509 DidItVal // i32 did_it 2510 }; 2511 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2512 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2513 Args); 2514 } 2515 } 2516 2517 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2518 const RegionCodeGenTy &OrderedOpGen, 2519 SourceLocation Loc, bool IsThreads) { 2520 if (!CGF.HaveInsertPoint()) 2521 return; 2522 // __kmpc_ordered(ident_t *, gtid); 2523 // OrderedOpGen(); 2524 // __kmpc_end_ordered(ident_t *, gtid); 2525 // Prepare arguments and build a call to __kmpc_ordered 2526 if (IsThreads) { 2527 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2528 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2529 CGM.getModule(), OMPRTL___kmpc_ordered), 2530 Args, 2531 OMPBuilder.getOrCreateRuntimeFunction( 2532 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2533 Args); 2534 OrderedOpGen.setAction(Action); 2535 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2536 return; 2537 } 2538 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2539 } 2540 2541 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2542 unsigned Flags; 2543 if (Kind == OMPD_for) 2544 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2545 else if (Kind == OMPD_sections) 2546 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2547 else if (Kind == OMPD_single) 2548 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2549 else if (Kind == OMPD_barrier) 2550 Flags = OMP_IDENT_BARRIER_EXPL; 2551 else 2552 Flags = OMP_IDENT_BARRIER_IMPL; 2553 return Flags; 2554 } 2555 2556 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2557 CodeGenFunction &CGF, const OMPLoopDirective &S, 2558 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2559 // Check if the loop directive is actually a doacross loop directive. In this 2560 // case choose static, 1 schedule. 2561 if (llvm::any_of( 2562 S.getClausesOfKind<OMPOrderedClause>(), 2563 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2564 ScheduleKind = OMPC_SCHEDULE_static; 2565 // Chunk size is 1 in this case. 2566 llvm::APInt ChunkSize(32, 1); 2567 ChunkExpr = IntegerLiteral::Create( 2568 CGF.getContext(), ChunkSize, 2569 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2570 SourceLocation()); 2571 } 2572 } 2573 2574 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2575 OpenMPDirectiveKind Kind, bool EmitChecks, 2576 bool ForceSimpleCall) { 2577 // Check if we should use the OMPBuilder 2578 auto *OMPRegionInfo = 2579 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2580 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2581 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2582 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2583 return; 2584 } 2585 2586 if (!CGF.HaveInsertPoint()) 2587 return; 2588 // Build call __kmpc_cancel_barrier(loc, thread_id); 2589 // Build call __kmpc_barrier(loc, thread_id); 2590 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2591 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2592 // thread_id); 2593 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2594 getThreadID(CGF, Loc)}; 2595 if (OMPRegionInfo) { 2596 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2597 llvm::Value *Result = CGF.EmitRuntimeCall( 2598 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2599 OMPRTL___kmpc_cancel_barrier), 2600 Args); 2601 if (EmitChecks) { 2602 // if (__kmpc_cancel_barrier()) { 2603 // exit from construct; 2604 // } 2605 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2606 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2607 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2608 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2609 CGF.EmitBlock(ExitBB); 2610 // exit from construct; 2611 CodeGenFunction::JumpDest CancelDestination = 2612 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2613 CGF.EmitBranchThroughCleanup(CancelDestination); 2614 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2615 } 2616 return; 2617 } 2618 } 2619 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2620 CGM.getModule(), OMPRTL___kmpc_barrier), 2621 Args); 2622 } 2623 2624 /// Map the OpenMP loop schedule to the runtime enumeration. 2625 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2626 bool Chunked, bool Ordered) { 2627 switch (ScheduleKind) { 2628 case OMPC_SCHEDULE_static: 2629 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2630 : (Ordered ? OMP_ord_static : OMP_sch_static); 2631 case OMPC_SCHEDULE_dynamic: 2632 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2633 case OMPC_SCHEDULE_guided: 2634 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2635 case OMPC_SCHEDULE_runtime: 2636 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2637 case OMPC_SCHEDULE_auto: 2638 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2639 case OMPC_SCHEDULE_unknown: 2640 assert(!Chunked && "chunk was specified but schedule kind not known"); 2641 return Ordered ? OMP_ord_static : OMP_sch_static; 2642 } 2643 llvm_unreachable("Unexpected runtime schedule"); 2644 } 2645 2646 /// Map the OpenMP distribute schedule to the runtime enumeration. 2647 static OpenMPSchedType 2648 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2649 // only static is allowed for dist_schedule 2650 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2651 } 2652 2653 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2654 bool Chunked) const { 2655 OpenMPSchedType Schedule = 2656 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2657 return Schedule == OMP_sch_static; 2658 } 2659 2660 bool CGOpenMPRuntime::isStaticNonchunked( 2661 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2662 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2663 return Schedule == OMP_dist_sch_static; 2664 } 2665 2666 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2667 bool Chunked) const { 2668 OpenMPSchedType Schedule = 2669 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2670 return Schedule == OMP_sch_static_chunked; 2671 } 2672 2673 bool CGOpenMPRuntime::isStaticChunked( 2674 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2675 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2676 return Schedule == OMP_dist_sch_static_chunked; 2677 } 2678 2679 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2680 OpenMPSchedType Schedule = 2681 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2682 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2683 return Schedule != OMP_sch_static; 2684 } 2685 2686 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2687 OpenMPScheduleClauseModifier M1, 2688 OpenMPScheduleClauseModifier M2) { 2689 int Modifier = 0; 2690 switch (M1) { 2691 case OMPC_SCHEDULE_MODIFIER_monotonic: 2692 Modifier = OMP_sch_modifier_monotonic; 2693 break; 2694 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2695 Modifier = OMP_sch_modifier_nonmonotonic; 2696 break; 2697 case OMPC_SCHEDULE_MODIFIER_simd: 2698 if (Schedule == OMP_sch_static_chunked) 2699 Schedule = OMP_sch_static_balanced_chunked; 2700 break; 2701 case OMPC_SCHEDULE_MODIFIER_last: 2702 case OMPC_SCHEDULE_MODIFIER_unknown: 2703 break; 2704 } 2705 switch (M2) { 2706 case OMPC_SCHEDULE_MODIFIER_monotonic: 2707 Modifier = OMP_sch_modifier_monotonic; 2708 break; 2709 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2710 Modifier = OMP_sch_modifier_nonmonotonic; 2711 break; 2712 case OMPC_SCHEDULE_MODIFIER_simd: 2713 if (Schedule == OMP_sch_static_chunked) 2714 Schedule = OMP_sch_static_balanced_chunked; 2715 break; 2716 case OMPC_SCHEDULE_MODIFIER_last: 2717 case OMPC_SCHEDULE_MODIFIER_unknown: 2718 break; 2719 } 2720 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2721 // If the static schedule kind is specified or if the ordered clause is 2722 // specified, and if the nonmonotonic modifier is not specified, the effect is 2723 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2724 // modifier is specified, the effect is as if the nonmonotonic modifier is 2725 // specified. 2726 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2727 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2728 Schedule == OMP_sch_static_balanced_chunked || 2729 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2730 Schedule == OMP_dist_sch_static_chunked || 2731 Schedule == OMP_dist_sch_static)) 2732 Modifier = OMP_sch_modifier_nonmonotonic; 2733 } 2734 return Schedule | Modifier; 2735 } 2736 2737 void CGOpenMPRuntime::emitForDispatchInit( 2738 CodeGenFunction &CGF, SourceLocation Loc, 2739 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2740 bool Ordered, const DispatchRTInput &DispatchValues) { 2741 if (!CGF.HaveInsertPoint()) 2742 return; 2743 OpenMPSchedType Schedule = getRuntimeSchedule( 2744 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2745 assert(Ordered || 2746 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2747 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2748 Schedule != OMP_sch_static_balanced_chunked)); 2749 // Call __kmpc_dispatch_init( 2750 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2751 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2752 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2753 2754 // If the Chunk was not specified in the clause - use default value 1. 2755 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2756 : CGF.Builder.getIntN(IVSize, 1); 2757 llvm::Value *Args[] = { 2758 emitUpdateLocation(CGF, Loc), 2759 getThreadID(CGF, Loc), 2760 CGF.Builder.getInt32(addMonoNonMonoModifier( 2761 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2762 DispatchValues.LB, // Lower 2763 DispatchValues.UB, // Upper 2764 CGF.Builder.getIntN(IVSize, 1), // Stride 2765 Chunk // Chunk 2766 }; 2767 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2768 } 2769 2770 static void emitForStaticInitCall( 2771 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2772 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2773 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2774 const CGOpenMPRuntime::StaticRTInput &Values) { 2775 if (!CGF.HaveInsertPoint()) 2776 return; 2777 2778 assert(!Values.Ordered); 2779 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2780 Schedule == OMP_sch_static_balanced_chunked || 2781 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2782 Schedule == OMP_dist_sch_static || 2783 Schedule == OMP_dist_sch_static_chunked); 2784 2785 // Call __kmpc_for_static_init( 2786 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2787 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2788 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2789 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2790 llvm::Value *Chunk = Values.Chunk; 2791 if (Chunk == nullptr) { 2792 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2793 Schedule == OMP_dist_sch_static) && 2794 "expected static non-chunked schedule"); 2795 // If the Chunk was not specified in the clause - use default value 1. 2796 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2797 } else { 2798 assert((Schedule == OMP_sch_static_chunked || 2799 Schedule == OMP_sch_static_balanced_chunked || 2800 Schedule == OMP_ord_static_chunked || 2801 Schedule == OMP_dist_sch_static_chunked) && 2802 "expected static chunked schedule"); 2803 } 2804 llvm::Value *Args[] = { 2805 UpdateLocation, 2806 ThreadId, 2807 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2808 M2)), // Schedule type 2809 Values.IL.getPointer(), // &isLastIter 2810 Values.LB.getPointer(), // &LB 2811 Values.UB.getPointer(), // &UB 2812 Values.ST.getPointer(), // &Stride 2813 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2814 Chunk // Chunk 2815 }; 2816 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2817 } 2818 2819 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2820 SourceLocation Loc, 2821 OpenMPDirectiveKind DKind, 2822 const OpenMPScheduleTy &ScheduleKind, 2823 const StaticRTInput &Values) { 2824 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2825 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2826 assert(isOpenMPWorksharingDirective(DKind) && 2827 "Expected loop-based or sections-based directive."); 2828 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2829 isOpenMPLoopDirective(DKind) 2830 ? OMP_IDENT_WORK_LOOP 2831 : OMP_IDENT_WORK_SECTIONS); 2832 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2833 llvm::FunctionCallee StaticInitFunction = 2834 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2835 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2836 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2837 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2838 } 2839 2840 void CGOpenMPRuntime::emitDistributeStaticInit( 2841 CodeGenFunction &CGF, SourceLocation Loc, 2842 OpenMPDistScheduleClauseKind SchedKind, 2843 const CGOpenMPRuntime::StaticRTInput &Values) { 2844 OpenMPSchedType ScheduleNum = 2845 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2846 llvm::Value *UpdatedLocation = 2847 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2848 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2849 llvm::FunctionCallee StaticInitFunction; 2850 bool isGPUDistribute = 2851 CGM.getLangOpts().OpenMPIsDevice && 2852 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2853 StaticInitFunction = createForStaticInitFunction( 2854 Values.IVSize, Values.IVSigned, isGPUDistribute); 2855 2856 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2857 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2858 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2859 } 2860 2861 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2862 SourceLocation Loc, 2863 OpenMPDirectiveKind DKind) { 2864 if (!CGF.HaveInsertPoint()) 2865 return; 2866 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2867 llvm::Value *Args[] = { 2868 emitUpdateLocation(CGF, Loc, 2869 isOpenMPDistributeDirective(DKind) 2870 ? OMP_IDENT_WORK_DISTRIBUTE 2871 : isOpenMPLoopDirective(DKind) 2872 ? OMP_IDENT_WORK_LOOP 2873 : OMP_IDENT_WORK_SECTIONS), 2874 getThreadID(CGF, Loc)}; 2875 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2876 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2877 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2878 CGF.EmitRuntimeCall( 2879 OMPBuilder.getOrCreateRuntimeFunction( 2880 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2881 Args); 2882 else 2883 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2884 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2885 Args); 2886 } 2887 2888 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2889 SourceLocation Loc, 2890 unsigned IVSize, 2891 bool IVSigned) { 2892 if (!CGF.HaveInsertPoint()) 2893 return; 2894 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2895 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2896 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2897 } 2898 2899 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2900 SourceLocation Loc, unsigned IVSize, 2901 bool IVSigned, Address IL, 2902 Address LB, Address UB, 2903 Address ST) { 2904 // Call __kmpc_dispatch_next( 2905 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2906 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2907 // kmp_int[32|64] *p_stride); 2908 llvm::Value *Args[] = { 2909 emitUpdateLocation(CGF, Loc), 2910 getThreadID(CGF, Loc), 2911 IL.getPointer(), // &isLastIter 2912 LB.getPointer(), // &Lower 2913 UB.getPointer(), // &Upper 2914 ST.getPointer() // &Stride 2915 }; 2916 llvm::Value *Call = 2917 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2918 return CGF.EmitScalarConversion( 2919 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2920 CGF.getContext().BoolTy, Loc); 2921 } 2922 2923 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2924 llvm::Value *NumThreads, 2925 SourceLocation Loc) { 2926 if (!CGF.HaveInsertPoint()) 2927 return; 2928 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2929 llvm::Value *Args[] = { 2930 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2931 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2932 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2933 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2934 Args); 2935 } 2936 2937 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2938 ProcBindKind ProcBind, 2939 SourceLocation Loc) { 2940 if (!CGF.HaveInsertPoint()) 2941 return; 2942 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2943 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2944 llvm::Value *Args[] = { 2945 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2946 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2947 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2948 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2949 Args); 2950 } 2951 2952 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2953 SourceLocation Loc, llvm::AtomicOrdering AO) { 2954 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2955 OMPBuilder.createFlush(CGF.Builder); 2956 } else { 2957 if (!CGF.HaveInsertPoint()) 2958 return; 2959 // Build call void __kmpc_flush(ident_t *loc) 2960 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2961 CGM.getModule(), OMPRTL___kmpc_flush), 2962 emitUpdateLocation(CGF, Loc)); 2963 } 2964 } 2965 2966 namespace { 2967 /// Indexes of fields for type kmp_task_t. 2968 enum KmpTaskTFields { 2969 /// List of shared variables. 2970 KmpTaskTShareds, 2971 /// Task routine. 2972 KmpTaskTRoutine, 2973 /// Partition id for the untied tasks. 2974 KmpTaskTPartId, 2975 /// Function with call of destructors for private variables. 2976 Data1, 2977 /// Task priority. 2978 Data2, 2979 /// (Taskloops only) Lower bound. 2980 KmpTaskTLowerBound, 2981 /// (Taskloops only) Upper bound. 2982 KmpTaskTUpperBound, 2983 /// (Taskloops only) Stride. 2984 KmpTaskTStride, 2985 /// (Taskloops only) Is last iteration flag. 2986 KmpTaskTLastIter, 2987 /// (Taskloops only) Reduction data. 2988 KmpTaskTReductions, 2989 }; 2990 } // anonymous namespace 2991 2992 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2993 return OffloadEntriesTargetRegion.empty() && 2994 OffloadEntriesDeviceGlobalVar.empty(); 2995 } 2996 2997 /// Initialize target region entry. 2998 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2999 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3000 StringRef ParentName, unsigned LineNum, 3001 unsigned Order) { 3002 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3003 "only required for the device " 3004 "code generation."); 3005 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3006 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3007 OMPTargetRegionEntryTargetRegion); 3008 ++OffloadingEntriesNum; 3009 } 3010 3011 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3012 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3013 StringRef ParentName, unsigned LineNum, 3014 llvm::Constant *Addr, llvm::Constant *ID, 3015 OMPTargetRegionEntryKind Flags) { 3016 // If we are emitting code for a target, the entry is already initialized, 3017 // only has to be registered. 3018 if (CGM.getLangOpts().OpenMPIsDevice) { 3019 // This could happen if the device compilation is invoked standalone. 3020 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3021 return; 3022 auto &Entry = 3023 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3024 Entry.setAddress(Addr); 3025 Entry.setID(ID); 3026 Entry.setFlags(Flags); 3027 } else { 3028 if (Flags == 3029 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3030 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3031 /*IgnoreAddressId*/ true)) 3032 return; 3033 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3034 "Target region entry already registered!"); 3035 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3036 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3037 ++OffloadingEntriesNum; 3038 } 3039 } 3040 3041 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3042 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3043 bool IgnoreAddressId) const { 3044 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3045 if (PerDevice == OffloadEntriesTargetRegion.end()) 3046 return false; 3047 auto PerFile = PerDevice->second.find(FileID); 3048 if (PerFile == PerDevice->second.end()) 3049 return false; 3050 auto PerParentName = PerFile->second.find(ParentName); 3051 if (PerParentName == PerFile->second.end()) 3052 return false; 3053 auto PerLine = PerParentName->second.find(LineNum); 3054 if (PerLine == PerParentName->second.end()) 3055 return false; 3056 // Fail if this entry is already registered. 3057 if (!IgnoreAddressId && 3058 (PerLine->second.getAddress() || PerLine->second.getID())) 3059 return false; 3060 return true; 3061 } 3062 3063 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3064 const OffloadTargetRegionEntryInfoActTy &Action) { 3065 // Scan all target region entries and perform the provided action. 3066 for (const auto &D : OffloadEntriesTargetRegion) 3067 for (const auto &F : D.second) 3068 for (const auto &P : F.second) 3069 for (const auto &L : P.second) 3070 Action(D.first, F.first, P.first(), L.first, L.second); 3071 } 3072 3073 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3074 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3075 OMPTargetGlobalVarEntryKind Flags, 3076 unsigned Order) { 3077 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3078 "only required for the device " 3079 "code generation."); 3080 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3081 ++OffloadingEntriesNum; 3082 } 3083 3084 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3085 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3086 CharUnits VarSize, 3087 OMPTargetGlobalVarEntryKind Flags, 3088 llvm::GlobalValue::LinkageTypes Linkage) { 3089 if (CGM.getLangOpts().OpenMPIsDevice) { 3090 // This could happen if the device compilation is invoked standalone. 3091 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3092 return; 3093 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3094 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3095 if (Entry.getVarSize().isZero()) { 3096 Entry.setVarSize(VarSize); 3097 Entry.setLinkage(Linkage); 3098 } 3099 return; 3100 } 3101 Entry.setVarSize(VarSize); 3102 Entry.setLinkage(Linkage); 3103 Entry.setAddress(Addr); 3104 } else { 3105 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3106 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3107 assert(Entry.isValid() && Entry.getFlags() == Flags && 3108 "Entry not initialized!"); 3109 if (Entry.getVarSize().isZero()) { 3110 Entry.setVarSize(VarSize); 3111 Entry.setLinkage(Linkage); 3112 } 3113 return; 3114 } 3115 OffloadEntriesDeviceGlobalVar.try_emplace( 3116 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3117 ++OffloadingEntriesNum; 3118 } 3119 } 3120 3121 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3122 actOnDeviceGlobalVarEntriesInfo( 3123 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3124 // Scan all target region entries and perform the provided action. 3125 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3126 Action(E.getKey(), E.getValue()); 3127 } 3128 3129 void CGOpenMPRuntime::createOffloadEntry( 3130 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3131 llvm::GlobalValue::LinkageTypes Linkage) { 3132 StringRef Name = Addr->getName(); 3133 llvm::Module &M = CGM.getModule(); 3134 llvm::LLVMContext &C = M.getContext(); 3135 3136 // Create constant string with the name. 3137 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3138 3139 std::string StringName = getName({"omp_offloading", "entry_name"}); 3140 auto *Str = new llvm::GlobalVariable( 3141 M, StrPtrInit->getType(), /*isConstant=*/true, 3142 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3143 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3144 3145 llvm::Constant *Data[] = { 3146 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3147 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3148 llvm::ConstantInt::get(CGM.SizeTy, Size), 3149 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3150 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3151 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3152 llvm::GlobalVariable *Entry = createGlobalStruct( 3153 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3154 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3155 3156 // The entry has to be created in the section the linker expects it to be. 3157 Entry->setSection("omp_offloading_entries"); 3158 } 3159 3160 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3161 // Emit the offloading entries and metadata so that the device codegen side 3162 // can easily figure out what to emit. The produced metadata looks like 3163 // this: 3164 // 3165 // !omp_offload.info = !{!1, ...} 3166 // 3167 // Right now we only generate metadata for function that contain target 3168 // regions. 3169 3170 // If we are in simd mode or there are no entries, we don't need to do 3171 // anything. 3172 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3173 return; 3174 3175 llvm::Module &M = CGM.getModule(); 3176 llvm::LLVMContext &C = M.getContext(); 3177 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3178 SourceLocation, StringRef>, 3179 16> 3180 OrderedEntries(OffloadEntriesInfoManager.size()); 3181 llvm::SmallVector<StringRef, 16> ParentFunctions( 3182 OffloadEntriesInfoManager.size()); 3183 3184 // Auxiliary methods to create metadata values and strings. 3185 auto &&GetMDInt = [this](unsigned V) { 3186 return llvm::ConstantAsMetadata::get( 3187 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3188 }; 3189 3190 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3191 3192 // Create the offloading info metadata node. 3193 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3194 3195 // Create function that emits metadata for each target region entry; 3196 auto &&TargetRegionMetadataEmitter = 3197 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3198 &GetMDString]( 3199 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3200 unsigned Line, 3201 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3202 // Generate metadata for target regions. Each entry of this metadata 3203 // contains: 3204 // - Entry 0 -> Kind of this type of metadata (0). 3205 // - Entry 1 -> Device ID of the file where the entry was identified. 3206 // - Entry 2 -> File ID of the file where the entry was identified. 3207 // - Entry 3 -> Mangled name of the function where the entry was 3208 // identified. 3209 // - Entry 4 -> Line in the file where the entry was identified. 3210 // - Entry 5 -> Order the entry was created. 3211 // The first element of the metadata node is the kind. 3212 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3213 GetMDInt(FileID), GetMDString(ParentName), 3214 GetMDInt(Line), GetMDInt(E.getOrder())}; 3215 3216 SourceLocation Loc; 3217 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3218 E = CGM.getContext().getSourceManager().fileinfo_end(); 3219 I != E; ++I) { 3220 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3221 I->getFirst()->getUniqueID().getFile() == FileID) { 3222 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3223 I->getFirst(), Line, 1); 3224 break; 3225 } 3226 } 3227 // Save this entry in the right position of the ordered entries array. 3228 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3229 ParentFunctions[E.getOrder()] = ParentName; 3230 3231 // Add metadata to the named metadata node. 3232 MD->addOperand(llvm::MDNode::get(C, Ops)); 3233 }; 3234 3235 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3236 TargetRegionMetadataEmitter); 3237 3238 // Create function that emits metadata for each device global variable entry; 3239 auto &&DeviceGlobalVarMetadataEmitter = 3240 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3241 MD](StringRef MangledName, 3242 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3243 &E) { 3244 // Generate metadata for global variables. Each entry of this metadata 3245 // contains: 3246 // - Entry 0 -> Kind of this type of metadata (1). 3247 // - Entry 1 -> Mangled name of the variable. 3248 // - Entry 2 -> Declare target kind. 3249 // - Entry 3 -> Order the entry was created. 3250 // The first element of the metadata node is the kind. 3251 llvm::Metadata *Ops[] = { 3252 GetMDInt(E.getKind()), GetMDString(MangledName), 3253 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3254 3255 // Save this entry in the right position of the ordered entries array. 3256 OrderedEntries[E.getOrder()] = 3257 std::make_tuple(&E, SourceLocation(), MangledName); 3258 3259 // Add metadata to the named metadata node. 3260 MD->addOperand(llvm::MDNode::get(C, Ops)); 3261 }; 3262 3263 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3264 DeviceGlobalVarMetadataEmitter); 3265 3266 for (const auto &E : OrderedEntries) { 3267 assert(std::get<0>(E) && "All ordered entries must exist!"); 3268 if (const auto *CE = 3269 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3270 std::get<0>(E))) { 3271 if (!CE->getID() || !CE->getAddress()) { 3272 // Do not blame the entry if the parent funtion is not emitted. 3273 StringRef FnName = ParentFunctions[CE->getOrder()]; 3274 if (!CGM.GetGlobalValue(FnName)) 3275 continue; 3276 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3277 DiagnosticsEngine::Error, 3278 "Offloading entry for target region in %0 is incorrect: either the " 3279 "address or the ID is invalid."); 3280 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3281 continue; 3282 } 3283 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3284 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3285 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3286 OffloadEntryInfoDeviceGlobalVar>( 3287 std::get<0>(E))) { 3288 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3289 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3290 CE->getFlags()); 3291 switch (Flags) { 3292 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3293 if (CGM.getLangOpts().OpenMPIsDevice && 3294 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3295 continue; 3296 if (!CE->getAddress()) { 3297 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3298 DiagnosticsEngine::Error, "Offloading entry for declare target " 3299 "variable %0 is incorrect: the " 3300 "address is invalid."); 3301 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3302 continue; 3303 } 3304 // The vaiable has no definition - no need to add the entry. 3305 if (CE->getVarSize().isZero()) 3306 continue; 3307 break; 3308 } 3309 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3310 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3311 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3312 "Declaret target link address is set."); 3313 if (CGM.getLangOpts().OpenMPIsDevice) 3314 continue; 3315 if (!CE->getAddress()) { 3316 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3317 DiagnosticsEngine::Error, 3318 "Offloading entry for declare target variable is incorrect: the " 3319 "address is invalid."); 3320 CGM.getDiags().Report(DiagID); 3321 continue; 3322 } 3323 break; 3324 } 3325 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3326 CE->getVarSize().getQuantity(), Flags, 3327 CE->getLinkage()); 3328 } else { 3329 llvm_unreachable("Unsupported entry kind."); 3330 } 3331 } 3332 } 3333 3334 /// Loads all the offload entries information from the host IR 3335 /// metadata. 3336 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3337 // If we are in target mode, load the metadata from the host IR. This code has 3338 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3339 3340 if (!CGM.getLangOpts().OpenMPIsDevice) 3341 return; 3342 3343 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3344 return; 3345 3346 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3347 if (auto EC = Buf.getError()) { 3348 CGM.getDiags().Report(diag::err_cannot_open_file) 3349 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3350 return; 3351 } 3352 3353 llvm::LLVMContext C; 3354 auto ME = expectedToErrorOrAndEmitErrors( 3355 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3356 3357 if (auto EC = ME.getError()) { 3358 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3359 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3360 CGM.getDiags().Report(DiagID) 3361 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3362 return; 3363 } 3364 3365 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3366 if (!MD) 3367 return; 3368 3369 for (llvm::MDNode *MN : MD->operands()) { 3370 auto &&GetMDInt = [MN](unsigned Idx) { 3371 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3372 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3373 }; 3374 3375 auto &&GetMDString = [MN](unsigned Idx) { 3376 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3377 return V->getString(); 3378 }; 3379 3380 switch (GetMDInt(0)) { 3381 default: 3382 llvm_unreachable("Unexpected metadata!"); 3383 break; 3384 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3385 OffloadingEntryInfoTargetRegion: 3386 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3387 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3388 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3389 /*Order=*/GetMDInt(5)); 3390 break; 3391 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3392 OffloadingEntryInfoDeviceGlobalVar: 3393 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3394 /*MangledName=*/GetMDString(1), 3395 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3396 /*Flags=*/GetMDInt(2)), 3397 /*Order=*/GetMDInt(3)); 3398 break; 3399 } 3400 } 3401 } 3402 3403 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3404 if (!KmpRoutineEntryPtrTy) { 3405 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3406 ASTContext &C = CGM.getContext(); 3407 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3408 FunctionProtoType::ExtProtoInfo EPI; 3409 KmpRoutineEntryPtrQTy = C.getPointerType( 3410 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3411 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3412 } 3413 } 3414 3415 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3416 // Make sure the type of the entry is already created. This is the type we 3417 // have to create: 3418 // struct __tgt_offload_entry{ 3419 // void *addr; // Pointer to the offload entry info. 3420 // // (function or global) 3421 // char *name; // Name of the function or global. 3422 // size_t size; // Size of the entry info (0 if it a function). 3423 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3424 // int32_t reserved; // Reserved, to use by the runtime library. 3425 // }; 3426 if (TgtOffloadEntryQTy.isNull()) { 3427 ASTContext &C = CGM.getContext(); 3428 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3429 RD->startDefinition(); 3430 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3431 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3432 addFieldToRecordDecl(C, RD, C.getSizeType()); 3433 addFieldToRecordDecl( 3434 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3435 addFieldToRecordDecl( 3436 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3437 RD->completeDefinition(); 3438 RD->addAttr(PackedAttr::CreateImplicit(C)); 3439 TgtOffloadEntryQTy = C.getRecordType(RD); 3440 } 3441 return TgtOffloadEntryQTy; 3442 } 3443 3444 namespace { 3445 struct PrivateHelpersTy { 3446 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3447 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3448 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3449 PrivateElemInit(PrivateElemInit) {} 3450 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3451 const Expr *OriginalRef = nullptr; 3452 const VarDecl *Original = nullptr; 3453 const VarDecl *PrivateCopy = nullptr; 3454 const VarDecl *PrivateElemInit = nullptr; 3455 bool isLocalPrivate() const { 3456 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3457 } 3458 }; 3459 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3460 } // anonymous namespace 3461 3462 static bool isAllocatableDecl(const VarDecl *VD) { 3463 const VarDecl *CVD = VD->getCanonicalDecl(); 3464 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3465 return false; 3466 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3467 // Use the default allocation. 3468 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 3469 !AA->getAllocator()); 3470 } 3471 3472 static RecordDecl * 3473 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3474 if (!Privates.empty()) { 3475 ASTContext &C = CGM.getContext(); 3476 // Build struct .kmp_privates_t. { 3477 // /* private vars */ 3478 // }; 3479 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3480 RD->startDefinition(); 3481 for (const auto &Pair : Privates) { 3482 const VarDecl *VD = Pair.second.Original; 3483 QualType Type = VD->getType().getNonReferenceType(); 3484 // If the private variable is a local variable with lvalue ref type, 3485 // allocate the pointer instead of the pointee type. 3486 if (Pair.second.isLocalPrivate()) { 3487 if (VD->getType()->isLValueReferenceType()) 3488 Type = C.getPointerType(Type); 3489 if (isAllocatableDecl(VD)) 3490 Type = C.getPointerType(Type); 3491 } 3492 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3493 if (VD->hasAttrs()) { 3494 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3495 E(VD->getAttrs().end()); 3496 I != E; ++I) 3497 FD->addAttr(*I); 3498 } 3499 } 3500 RD->completeDefinition(); 3501 return RD; 3502 } 3503 return nullptr; 3504 } 3505 3506 static RecordDecl * 3507 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3508 QualType KmpInt32Ty, 3509 QualType KmpRoutineEntryPointerQTy) { 3510 ASTContext &C = CGM.getContext(); 3511 // Build struct kmp_task_t { 3512 // void * shareds; 3513 // kmp_routine_entry_t routine; 3514 // kmp_int32 part_id; 3515 // kmp_cmplrdata_t data1; 3516 // kmp_cmplrdata_t data2; 3517 // For taskloops additional fields: 3518 // kmp_uint64 lb; 3519 // kmp_uint64 ub; 3520 // kmp_int64 st; 3521 // kmp_int32 liter; 3522 // void * reductions; 3523 // }; 3524 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3525 UD->startDefinition(); 3526 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3527 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3528 UD->completeDefinition(); 3529 QualType KmpCmplrdataTy = C.getRecordType(UD); 3530 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3531 RD->startDefinition(); 3532 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3533 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3534 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3535 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3536 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3537 if (isOpenMPTaskLoopDirective(Kind)) { 3538 QualType KmpUInt64Ty = 3539 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3540 QualType KmpInt64Ty = 3541 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3542 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3543 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3544 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3545 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3546 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3547 } 3548 RD->completeDefinition(); 3549 return RD; 3550 } 3551 3552 static RecordDecl * 3553 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3554 ArrayRef<PrivateDataTy> Privates) { 3555 ASTContext &C = CGM.getContext(); 3556 // Build struct kmp_task_t_with_privates { 3557 // kmp_task_t task_data; 3558 // .kmp_privates_t. privates; 3559 // }; 3560 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3561 RD->startDefinition(); 3562 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3563 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3564 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3565 RD->completeDefinition(); 3566 return RD; 3567 } 3568 3569 /// Emit a proxy function which accepts kmp_task_t as the second 3570 /// argument. 3571 /// \code 3572 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3573 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3574 /// For taskloops: 3575 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3576 /// tt->reductions, tt->shareds); 3577 /// return 0; 3578 /// } 3579 /// \endcode 3580 static llvm::Function * 3581 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3582 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3583 QualType KmpTaskTWithPrivatesPtrQTy, 3584 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3585 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3586 llvm::Value *TaskPrivatesMap) { 3587 ASTContext &C = CGM.getContext(); 3588 FunctionArgList Args; 3589 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3590 ImplicitParamDecl::Other); 3591 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3592 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3593 ImplicitParamDecl::Other); 3594 Args.push_back(&GtidArg); 3595 Args.push_back(&TaskTypeArg); 3596 const auto &TaskEntryFnInfo = 3597 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3598 llvm::FunctionType *TaskEntryTy = 3599 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3600 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3601 auto *TaskEntry = llvm::Function::Create( 3602 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3603 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3604 TaskEntry->setDoesNotRecurse(); 3605 CodeGenFunction CGF(CGM); 3606 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3607 Loc, Loc); 3608 3609 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3610 // tt, 3611 // For taskloops: 3612 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3613 // tt->task_data.shareds); 3614 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3615 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3616 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3617 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3618 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3619 const auto *KmpTaskTWithPrivatesQTyRD = 3620 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3621 LValue Base = 3622 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3623 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3624 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3625 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3626 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3627 3628 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3629 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3630 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3631 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3632 CGF.ConvertTypeForMem(SharedsPtrTy)); 3633 3634 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3635 llvm::Value *PrivatesParam; 3636 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3637 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3638 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3639 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3640 } else { 3641 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3642 } 3643 3644 llvm::Value *CommonArgs[] = { 3645 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3646 CGF.Builder 3647 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF), 3648 CGF.VoidPtrTy, CGF.Int8Ty) 3649 .getPointer()}; 3650 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3651 std::end(CommonArgs)); 3652 if (isOpenMPTaskLoopDirective(Kind)) { 3653 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3654 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3655 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3656 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3657 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3658 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3659 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3660 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3661 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3662 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3663 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3664 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3665 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3666 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3667 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3668 CallArgs.push_back(LBParam); 3669 CallArgs.push_back(UBParam); 3670 CallArgs.push_back(StParam); 3671 CallArgs.push_back(LIParam); 3672 CallArgs.push_back(RParam); 3673 } 3674 CallArgs.push_back(SharedsParam); 3675 3676 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3677 CallArgs); 3678 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3679 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3680 CGF.FinishFunction(); 3681 return TaskEntry; 3682 } 3683 3684 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3685 SourceLocation Loc, 3686 QualType KmpInt32Ty, 3687 QualType KmpTaskTWithPrivatesPtrQTy, 3688 QualType KmpTaskTWithPrivatesQTy) { 3689 ASTContext &C = CGM.getContext(); 3690 FunctionArgList Args; 3691 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3692 ImplicitParamDecl::Other); 3693 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3694 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3695 ImplicitParamDecl::Other); 3696 Args.push_back(&GtidArg); 3697 Args.push_back(&TaskTypeArg); 3698 const auto &DestructorFnInfo = 3699 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3700 llvm::FunctionType *DestructorFnTy = 3701 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3702 std::string Name = 3703 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3704 auto *DestructorFn = 3705 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3706 Name, &CGM.getModule()); 3707 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3708 DestructorFnInfo); 3709 DestructorFn->setDoesNotRecurse(); 3710 CodeGenFunction CGF(CGM); 3711 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3712 Args, Loc, Loc); 3713 3714 LValue Base = CGF.EmitLoadOfPointerLValue( 3715 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3716 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3717 const auto *KmpTaskTWithPrivatesQTyRD = 3718 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3719 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3720 Base = CGF.EmitLValueForField(Base, *FI); 3721 for (const auto *Field : 3722 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3723 if (QualType::DestructionKind DtorKind = 3724 Field->getType().isDestructedType()) { 3725 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3726 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3727 } 3728 } 3729 CGF.FinishFunction(); 3730 return DestructorFn; 3731 } 3732 3733 /// Emit a privates mapping function for correct handling of private and 3734 /// firstprivate variables. 3735 /// \code 3736 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3737 /// **noalias priv1,..., <tyn> **noalias privn) { 3738 /// *priv1 = &.privates.priv1; 3739 /// ...; 3740 /// *privn = &.privates.privn; 3741 /// } 3742 /// \endcode 3743 static llvm::Value * 3744 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3745 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3746 ArrayRef<PrivateDataTy> Privates) { 3747 ASTContext &C = CGM.getContext(); 3748 FunctionArgList Args; 3749 ImplicitParamDecl TaskPrivatesArg( 3750 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3751 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3752 ImplicitParamDecl::Other); 3753 Args.push_back(&TaskPrivatesArg); 3754 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3755 unsigned Counter = 1; 3756 for (const Expr *E : Data.PrivateVars) { 3757 Args.push_back(ImplicitParamDecl::Create( 3758 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3759 C.getPointerType(C.getPointerType(E->getType())) 3760 .withConst() 3761 .withRestrict(), 3762 ImplicitParamDecl::Other)); 3763 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3764 PrivateVarsPos[VD] = Counter; 3765 ++Counter; 3766 } 3767 for (const Expr *E : Data.FirstprivateVars) { 3768 Args.push_back(ImplicitParamDecl::Create( 3769 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3770 C.getPointerType(C.getPointerType(E->getType())) 3771 .withConst() 3772 .withRestrict(), 3773 ImplicitParamDecl::Other)); 3774 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3775 PrivateVarsPos[VD] = Counter; 3776 ++Counter; 3777 } 3778 for (const Expr *E : Data.LastprivateVars) { 3779 Args.push_back(ImplicitParamDecl::Create( 3780 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3781 C.getPointerType(C.getPointerType(E->getType())) 3782 .withConst() 3783 .withRestrict(), 3784 ImplicitParamDecl::Other)); 3785 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3786 PrivateVarsPos[VD] = Counter; 3787 ++Counter; 3788 } 3789 for (const VarDecl *VD : Data.PrivateLocals) { 3790 QualType Ty = VD->getType().getNonReferenceType(); 3791 if (VD->getType()->isLValueReferenceType()) 3792 Ty = C.getPointerType(Ty); 3793 if (isAllocatableDecl(VD)) 3794 Ty = C.getPointerType(Ty); 3795 Args.push_back(ImplicitParamDecl::Create( 3796 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3797 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3798 ImplicitParamDecl::Other)); 3799 PrivateVarsPos[VD] = Counter; 3800 ++Counter; 3801 } 3802 const auto &TaskPrivatesMapFnInfo = 3803 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3804 llvm::FunctionType *TaskPrivatesMapTy = 3805 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3806 std::string Name = 3807 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3808 auto *TaskPrivatesMap = llvm::Function::Create( 3809 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3810 &CGM.getModule()); 3811 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3812 TaskPrivatesMapFnInfo); 3813 if (CGM.getLangOpts().Optimize) { 3814 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3815 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3816 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3817 } 3818 CodeGenFunction CGF(CGM); 3819 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3820 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3821 3822 // *privi = &.privates.privi; 3823 LValue Base = CGF.EmitLoadOfPointerLValue( 3824 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3825 TaskPrivatesArg.getType()->castAs<PointerType>()); 3826 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3827 Counter = 0; 3828 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3829 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3830 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3831 LValue RefLVal = 3832 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3833 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3834 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3835 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3836 ++Counter; 3837 } 3838 CGF.FinishFunction(); 3839 return TaskPrivatesMap; 3840 } 3841 3842 /// Emit initialization for private variables in task-based directives. 3843 static void emitPrivatesInit(CodeGenFunction &CGF, 3844 const OMPExecutableDirective &D, 3845 Address KmpTaskSharedsPtr, LValue TDBase, 3846 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3847 QualType SharedsTy, QualType SharedsPtrTy, 3848 const OMPTaskDataTy &Data, 3849 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3850 ASTContext &C = CGF.getContext(); 3851 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3852 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3853 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3854 ? OMPD_taskloop 3855 : OMPD_task; 3856 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3857 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3858 LValue SrcBase; 3859 bool IsTargetTask = 3860 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3861 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3862 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3863 // PointersArray, SizesArray, and MappersArray. The original variables for 3864 // these arrays are not captured and we get their addresses explicitly. 3865 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3866 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3867 SrcBase = CGF.MakeAddrLValue( 3868 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3869 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3870 CGF.ConvertTypeForMem(SharedsTy)), 3871 SharedsTy); 3872 } 3873 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3874 for (const PrivateDataTy &Pair : Privates) { 3875 // Do not initialize private locals. 3876 if (Pair.second.isLocalPrivate()) { 3877 ++FI; 3878 continue; 3879 } 3880 const VarDecl *VD = Pair.second.PrivateCopy; 3881 const Expr *Init = VD->getAnyInitializer(); 3882 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3883 !CGF.isTrivialInitializer(Init)))) { 3884 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3885 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3886 const VarDecl *OriginalVD = Pair.second.Original; 3887 // Check if the variable is the target-based BasePointersArray, 3888 // PointersArray, SizesArray, or MappersArray. 3889 LValue SharedRefLValue; 3890 QualType Type = PrivateLValue.getType(); 3891 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3892 if (IsTargetTask && !SharedField) { 3893 assert(isa<ImplicitParamDecl>(OriginalVD) && 3894 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3895 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3896 ->getNumParams() == 0 && 3897 isa<TranslationUnitDecl>( 3898 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3899 ->getDeclContext()) && 3900 "Expected artificial target data variable."); 3901 SharedRefLValue = 3902 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3903 } else if (ForDup) { 3904 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3905 SharedRefLValue = CGF.MakeAddrLValue( 3906 SharedRefLValue.getAddress(CGF).withAlignment( 3907 C.getDeclAlign(OriginalVD)), 3908 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3909 SharedRefLValue.getTBAAInfo()); 3910 } else if (CGF.LambdaCaptureFields.count( 3911 Pair.second.Original->getCanonicalDecl()) > 0 || 3912 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3913 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3914 } else { 3915 // Processing for implicitly captured variables. 3916 InlinedOpenMPRegionRAII Region( 3917 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3918 /*HasCancel=*/false, /*NoInheritance=*/true); 3919 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3920 } 3921 if (Type->isArrayType()) { 3922 // Initialize firstprivate array. 3923 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3924 // Perform simple memcpy. 3925 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3926 } else { 3927 // Initialize firstprivate array using element-by-element 3928 // initialization. 3929 CGF.EmitOMPAggregateAssign( 3930 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3931 Type, 3932 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3933 Address SrcElement) { 3934 // Clean up any temporaries needed by the initialization. 3935 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3936 InitScope.addPrivate(Elem, SrcElement); 3937 (void)InitScope.Privatize(); 3938 // Emit initialization for single element. 3939 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3940 CGF, &CapturesInfo); 3941 CGF.EmitAnyExprToMem(Init, DestElement, 3942 Init->getType().getQualifiers(), 3943 /*IsInitializer=*/false); 3944 }); 3945 } 3946 } else { 3947 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3948 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF)); 3949 (void)InitScope.Privatize(); 3950 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3951 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3952 /*capturedByInit=*/false); 3953 } 3954 } else { 3955 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3956 } 3957 } 3958 ++FI; 3959 } 3960 } 3961 3962 /// Check if duplication function is required for taskloops. 3963 static bool checkInitIsRequired(CodeGenFunction &CGF, 3964 ArrayRef<PrivateDataTy> Privates) { 3965 bool InitRequired = false; 3966 for (const PrivateDataTy &Pair : Privates) { 3967 if (Pair.second.isLocalPrivate()) 3968 continue; 3969 const VarDecl *VD = Pair.second.PrivateCopy; 3970 const Expr *Init = VD->getAnyInitializer(); 3971 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3972 !CGF.isTrivialInitializer(Init)); 3973 if (InitRequired) 3974 break; 3975 } 3976 return InitRequired; 3977 } 3978 3979 3980 /// Emit task_dup function (for initialization of 3981 /// private/firstprivate/lastprivate vars and last_iter flag) 3982 /// \code 3983 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3984 /// lastpriv) { 3985 /// // setup lastprivate flag 3986 /// task_dst->last = lastpriv; 3987 /// // could be constructor calls here... 3988 /// } 3989 /// \endcode 3990 static llvm::Value * 3991 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3992 const OMPExecutableDirective &D, 3993 QualType KmpTaskTWithPrivatesPtrQTy, 3994 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3995 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3996 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3997 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3998 ASTContext &C = CGM.getContext(); 3999 FunctionArgList Args; 4000 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4001 KmpTaskTWithPrivatesPtrQTy, 4002 ImplicitParamDecl::Other); 4003 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4004 KmpTaskTWithPrivatesPtrQTy, 4005 ImplicitParamDecl::Other); 4006 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4007 ImplicitParamDecl::Other); 4008 Args.push_back(&DstArg); 4009 Args.push_back(&SrcArg); 4010 Args.push_back(&LastprivArg); 4011 const auto &TaskDupFnInfo = 4012 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4013 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4014 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4015 auto *TaskDup = llvm::Function::Create( 4016 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4017 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4018 TaskDup->setDoesNotRecurse(); 4019 CodeGenFunction CGF(CGM); 4020 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4021 Loc); 4022 4023 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4024 CGF.GetAddrOfLocalVar(&DstArg), 4025 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4026 // task_dst->liter = lastpriv; 4027 if (WithLastIter) { 4028 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4029 LValue Base = CGF.EmitLValueForField( 4030 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4031 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4032 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4033 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4034 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4035 } 4036 4037 // Emit initial values for private copies (if any). 4038 assert(!Privates.empty()); 4039 Address KmpTaskSharedsPtr = Address::invalid(); 4040 if (!Data.FirstprivateVars.empty()) { 4041 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4042 CGF.GetAddrOfLocalVar(&SrcArg), 4043 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4044 LValue Base = CGF.EmitLValueForField( 4045 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4046 KmpTaskSharedsPtr = Address::deprecated( 4047 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4048 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4049 KmpTaskTShareds)), 4050 Loc), 4051 CGM.getNaturalTypeAlignment(SharedsTy)); 4052 } 4053 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4054 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4055 CGF.FinishFunction(); 4056 return TaskDup; 4057 } 4058 4059 /// Checks if destructor function is required to be generated. 4060 /// \return true if cleanups are required, false otherwise. 4061 static bool 4062 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4063 ArrayRef<PrivateDataTy> Privates) { 4064 for (const PrivateDataTy &P : Privates) { 4065 if (P.second.isLocalPrivate()) 4066 continue; 4067 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4068 if (Ty.isDestructedType()) 4069 return true; 4070 } 4071 return false; 4072 } 4073 4074 namespace { 4075 /// Loop generator for OpenMP iterator expression. 4076 class OMPIteratorGeneratorScope final 4077 : public CodeGenFunction::OMPPrivateScope { 4078 CodeGenFunction &CGF; 4079 const OMPIteratorExpr *E = nullptr; 4080 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4081 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4082 OMPIteratorGeneratorScope() = delete; 4083 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4084 4085 public: 4086 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4087 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4088 if (!E) 4089 return; 4090 SmallVector<llvm::Value *, 4> Uppers; 4091 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4092 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4093 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4094 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); 4095 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4096 addPrivate( 4097 HelperData.CounterVD, 4098 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); 4099 } 4100 Privatize(); 4101 4102 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4103 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4104 LValue CLVal = 4105 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4106 HelperData.CounterVD->getType()); 4107 // Counter = 0; 4108 CGF.EmitStoreOfScalar( 4109 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4110 CLVal); 4111 CodeGenFunction::JumpDest &ContDest = 4112 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4113 CodeGenFunction::JumpDest &ExitDest = 4114 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4115 // N = <number-of_iterations>; 4116 llvm::Value *N = Uppers[I]; 4117 // cont: 4118 // if (Counter < N) goto body; else goto exit; 4119 CGF.EmitBlock(ContDest.getBlock()); 4120 auto *CVal = 4121 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4122 llvm::Value *Cmp = 4123 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4124 ? CGF.Builder.CreateICmpSLT(CVal, N) 4125 : CGF.Builder.CreateICmpULT(CVal, N); 4126 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4127 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4128 // body: 4129 CGF.EmitBlock(BodyBB); 4130 // Iteri = Begini + Counter * Stepi; 4131 CGF.EmitIgnoredExpr(HelperData.Update); 4132 } 4133 } 4134 ~OMPIteratorGeneratorScope() { 4135 if (!E) 4136 return; 4137 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4138 // Counter = Counter + 1; 4139 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4140 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4141 // goto cont; 4142 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4143 // exit: 4144 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4145 } 4146 } 4147 }; 4148 } // namespace 4149 4150 static std::pair<llvm::Value *, llvm::Value *> 4151 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4152 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4153 llvm::Value *Addr; 4154 if (OASE) { 4155 const Expr *Base = OASE->getBase(); 4156 Addr = CGF.EmitScalarExpr(Base); 4157 } else { 4158 Addr = CGF.EmitLValue(E).getPointer(CGF); 4159 } 4160 llvm::Value *SizeVal; 4161 QualType Ty = E->getType(); 4162 if (OASE) { 4163 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4164 for (const Expr *SE : OASE->getDimensions()) { 4165 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4166 Sz = CGF.EmitScalarConversion( 4167 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4168 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4169 } 4170 } else if (const auto *ASE = 4171 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4172 LValue UpAddrLVal = 4173 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4174 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4175 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4176 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4177 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4178 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4179 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4180 } else { 4181 SizeVal = CGF.getTypeSize(Ty); 4182 } 4183 return std::make_pair(Addr, SizeVal); 4184 } 4185 4186 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4187 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4188 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4189 if (KmpTaskAffinityInfoTy.isNull()) { 4190 RecordDecl *KmpAffinityInfoRD = 4191 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4192 KmpAffinityInfoRD->startDefinition(); 4193 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4194 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4195 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4196 KmpAffinityInfoRD->completeDefinition(); 4197 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4198 } 4199 } 4200 4201 CGOpenMPRuntime::TaskResultTy 4202 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4203 const OMPExecutableDirective &D, 4204 llvm::Function *TaskFunction, QualType SharedsTy, 4205 Address Shareds, const OMPTaskDataTy &Data) { 4206 ASTContext &C = CGM.getContext(); 4207 llvm::SmallVector<PrivateDataTy, 4> Privates; 4208 // Aggregate privates and sort them by the alignment. 4209 const auto *I = Data.PrivateCopies.begin(); 4210 for (const Expr *E : Data.PrivateVars) { 4211 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4212 Privates.emplace_back( 4213 C.getDeclAlign(VD), 4214 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4215 /*PrivateElemInit=*/nullptr)); 4216 ++I; 4217 } 4218 I = Data.FirstprivateCopies.begin(); 4219 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4220 for (const Expr *E : Data.FirstprivateVars) { 4221 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4222 Privates.emplace_back( 4223 C.getDeclAlign(VD), 4224 PrivateHelpersTy( 4225 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4226 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4227 ++I; 4228 ++IElemInitRef; 4229 } 4230 I = Data.LastprivateCopies.begin(); 4231 for (const Expr *E : Data.LastprivateVars) { 4232 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4233 Privates.emplace_back( 4234 C.getDeclAlign(VD), 4235 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4236 /*PrivateElemInit=*/nullptr)); 4237 ++I; 4238 } 4239 for (const VarDecl *VD : Data.PrivateLocals) { 4240 if (isAllocatableDecl(VD)) 4241 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4242 else 4243 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4244 } 4245 llvm::stable_sort(Privates, 4246 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4247 return L.first > R.first; 4248 }); 4249 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4250 // Build type kmp_routine_entry_t (if not built yet). 4251 emitKmpRoutineEntryT(KmpInt32Ty); 4252 // Build type kmp_task_t (if not built yet). 4253 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4254 if (SavedKmpTaskloopTQTy.isNull()) { 4255 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4256 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4257 } 4258 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4259 } else { 4260 assert((D.getDirectiveKind() == OMPD_task || 4261 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4262 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4263 "Expected taskloop, task or target directive"); 4264 if (SavedKmpTaskTQTy.isNull()) { 4265 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4266 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4267 } 4268 KmpTaskTQTy = SavedKmpTaskTQTy; 4269 } 4270 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4271 // Build particular struct kmp_task_t for the given task. 4272 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4273 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4274 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4275 QualType KmpTaskTWithPrivatesPtrQTy = 4276 C.getPointerType(KmpTaskTWithPrivatesQTy); 4277 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4278 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4279 KmpTaskTWithPrivatesTy->getPointerTo(); 4280 llvm::Value *KmpTaskTWithPrivatesTySize = 4281 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4282 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4283 4284 // Emit initial values for private copies (if any). 4285 llvm::Value *TaskPrivatesMap = nullptr; 4286 llvm::Type *TaskPrivatesMapTy = 4287 std::next(TaskFunction->arg_begin(), 3)->getType(); 4288 if (!Privates.empty()) { 4289 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4290 TaskPrivatesMap = 4291 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4292 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4293 TaskPrivatesMap, TaskPrivatesMapTy); 4294 } else { 4295 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4296 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4297 } 4298 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4299 // kmp_task_t *tt); 4300 llvm::Function *TaskEntry = emitProxyTaskFunction( 4301 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4302 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4303 TaskPrivatesMap); 4304 4305 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4306 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4307 // kmp_routine_entry_t *task_entry); 4308 // Task flags. Format is taken from 4309 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4310 // description of kmp_tasking_flags struct. 4311 enum { 4312 TiedFlag = 0x1, 4313 FinalFlag = 0x2, 4314 DestructorsFlag = 0x8, 4315 PriorityFlag = 0x20, 4316 DetachableFlag = 0x40, 4317 }; 4318 unsigned Flags = Data.Tied ? TiedFlag : 0; 4319 bool NeedsCleanup = false; 4320 if (!Privates.empty()) { 4321 NeedsCleanup = 4322 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4323 if (NeedsCleanup) 4324 Flags = Flags | DestructorsFlag; 4325 } 4326 if (Data.Priority.getInt()) 4327 Flags = Flags | PriorityFlag; 4328 if (D.hasClausesOfKind<OMPDetachClause>()) 4329 Flags = Flags | DetachableFlag; 4330 llvm::Value *TaskFlags = 4331 Data.Final.getPointer() 4332 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4333 CGF.Builder.getInt32(FinalFlag), 4334 CGF.Builder.getInt32(/*C=*/0)) 4335 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4336 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4337 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4338 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4339 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4340 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4341 TaskEntry, KmpRoutineEntryPtrTy)}; 4342 llvm::Value *NewTask; 4343 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4344 // Check if we have any device clause associated with the directive. 4345 const Expr *Device = nullptr; 4346 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4347 Device = C->getDevice(); 4348 // Emit device ID if any otherwise use default value. 4349 llvm::Value *DeviceID; 4350 if (Device) 4351 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4352 CGF.Int64Ty, /*isSigned=*/true); 4353 else 4354 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4355 AllocArgs.push_back(DeviceID); 4356 NewTask = CGF.EmitRuntimeCall( 4357 OMPBuilder.getOrCreateRuntimeFunction( 4358 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4359 AllocArgs); 4360 } else { 4361 NewTask = 4362 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4363 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4364 AllocArgs); 4365 } 4366 // Emit detach clause initialization. 4367 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4368 // task_descriptor); 4369 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4370 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4371 LValue EvtLVal = CGF.EmitLValue(Evt); 4372 4373 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4374 // int gtid, kmp_task_t *task); 4375 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4376 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4377 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4378 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4379 OMPBuilder.getOrCreateRuntimeFunction( 4380 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4381 {Loc, Tid, NewTask}); 4382 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4383 Evt->getExprLoc()); 4384 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4385 } 4386 // Process affinity clauses. 4387 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4388 // Process list of affinity data. 4389 ASTContext &C = CGM.getContext(); 4390 Address AffinitiesArray = Address::invalid(); 4391 // Calculate number of elements to form the array of affinity data. 4392 llvm::Value *NumOfElements = nullptr; 4393 unsigned NumAffinities = 0; 4394 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4395 if (const Expr *Modifier = C->getModifier()) { 4396 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4397 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4398 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4399 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4400 NumOfElements = 4401 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4402 } 4403 } else { 4404 NumAffinities += C->varlist_size(); 4405 } 4406 } 4407 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4408 // Fields ids in kmp_task_affinity_info record. 4409 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4410 4411 QualType KmpTaskAffinityInfoArrayTy; 4412 if (NumOfElements) { 4413 NumOfElements = CGF.Builder.CreateNUWAdd( 4414 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4415 auto *OVE = new (C) OpaqueValueExpr( 4416 Loc, 4417 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4418 VK_PRValue); 4419 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4420 RValue::get(NumOfElements)); 4421 KmpTaskAffinityInfoArrayTy = 4422 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4423 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4424 // Properly emit variable-sized array. 4425 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4426 ImplicitParamDecl::Other); 4427 CGF.EmitVarDecl(*PD); 4428 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4429 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4430 /*isSigned=*/false); 4431 } else { 4432 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4433 KmpTaskAffinityInfoTy, 4434 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4435 ArrayType::Normal, /*IndexTypeQuals=*/0); 4436 AffinitiesArray = 4437 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4438 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4439 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4440 /*isSigned=*/false); 4441 } 4442 4443 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4444 // Fill array by elements without iterators. 4445 unsigned Pos = 0; 4446 bool HasIterator = false; 4447 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4448 if (C->getModifier()) { 4449 HasIterator = true; 4450 continue; 4451 } 4452 for (const Expr *E : C->varlists()) { 4453 llvm::Value *Addr; 4454 llvm::Value *Size; 4455 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4456 LValue Base = 4457 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4458 KmpTaskAffinityInfoTy); 4459 // affs[i].base_addr = &<Affinities[i].second>; 4460 LValue BaseAddrLVal = CGF.EmitLValueForField( 4461 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4462 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4463 BaseAddrLVal); 4464 // affs[i].len = sizeof(<Affinities[i].second>); 4465 LValue LenLVal = CGF.EmitLValueForField( 4466 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4467 CGF.EmitStoreOfScalar(Size, LenLVal); 4468 ++Pos; 4469 } 4470 } 4471 LValue PosLVal; 4472 if (HasIterator) { 4473 PosLVal = CGF.MakeAddrLValue( 4474 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4475 C.getSizeType()); 4476 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4477 } 4478 // Process elements with iterators. 4479 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4480 const Expr *Modifier = C->getModifier(); 4481 if (!Modifier) 4482 continue; 4483 OMPIteratorGeneratorScope IteratorScope( 4484 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4485 for (const Expr *E : C->varlists()) { 4486 llvm::Value *Addr; 4487 llvm::Value *Size; 4488 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4489 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4490 LValue Base = CGF.MakeAddrLValue( 4491 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4492 // affs[i].base_addr = &<Affinities[i].second>; 4493 LValue BaseAddrLVal = CGF.EmitLValueForField( 4494 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4495 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4496 BaseAddrLVal); 4497 // affs[i].len = sizeof(<Affinities[i].second>); 4498 LValue LenLVal = CGF.EmitLValueForField( 4499 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4500 CGF.EmitStoreOfScalar(Size, LenLVal); 4501 Idx = CGF.Builder.CreateNUWAdd( 4502 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4503 CGF.EmitStoreOfScalar(Idx, PosLVal); 4504 } 4505 } 4506 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4507 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4508 // naffins, kmp_task_affinity_info_t *affin_list); 4509 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4510 llvm::Value *GTid = getThreadID(CGF, Loc); 4511 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4512 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4513 // FIXME: Emit the function and ignore its result for now unless the 4514 // runtime function is properly implemented. 4515 (void)CGF.EmitRuntimeCall( 4516 OMPBuilder.getOrCreateRuntimeFunction( 4517 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4518 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4519 } 4520 llvm::Value *NewTaskNewTaskTTy = 4521 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4522 NewTask, KmpTaskTWithPrivatesPtrTy); 4523 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4524 KmpTaskTWithPrivatesQTy); 4525 LValue TDBase = 4526 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4527 // Fill the data in the resulting kmp_task_t record. 4528 // Copy shareds if there are any. 4529 Address KmpTaskSharedsPtr = Address::invalid(); 4530 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4531 KmpTaskSharedsPtr = Address::deprecated( 4532 CGF.EmitLoadOfScalar( 4533 CGF.EmitLValueForField( 4534 TDBase, 4535 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 4536 Loc), 4537 CGM.getNaturalTypeAlignment(SharedsTy)); 4538 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4539 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4540 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4541 } 4542 // Emit initial values for private copies (if any). 4543 TaskResultTy Result; 4544 if (!Privates.empty()) { 4545 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4546 SharedsTy, SharedsPtrTy, Data, Privates, 4547 /*ForDup=*/false); 4548 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4549 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4550 Result.TaskDupFn = emitTaskDupFunction( 4551 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4552 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4553 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4554 } 4555 } 4556 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4557 enum { Priority = 0, Destructors = 1 }; 4558 // Provide pointer to function with destructors for privates. 4559 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4560 const RecordDecl *KmpCmplrdataUD = 4561 (*FI)->getType()->getAsUnionType()->getDecl(); 4562 if (NeedsCleanup) { 4563 llvm::Value *DestructorFn = emitDestructorsFunction( 4564 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4565 KmpTaskTWithPrivatesQTy); 4566 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4567 LValue DestructorsLV = CGF.EmitLValueForField( 4568 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4569 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4570 DestructorFn, KmpRoutineEntryPtrTy), 4571 DestructorsLV); 4572 } 4573 // Set priority. 4574 if (Data.Priority.getInt()) { 4575 LValue Data2LV = CGF.EmitLValueForField( 4576 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4577 LValue PriorityLV = CGF.EmitLValueForField( 4578 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4579 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4580 } 4581 Result.NewTask = NewTask; 4582 Result.TaskEntry = TaskEntry; 4583 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4584 Result.TDBase = TDBase; 4585 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4586 return Result; 4587 } 4588 4589 namespace { 4590 /// Dependence kind for RTL. 4591 enum RTLDependenceKindTy { 4592 DepIn = 0x01, 4593 DepInOut = 0x3, 4594 DepMutexInOutSet = 0x4, 4595 DepInOutSet = 0x8 4596 }; 4597 /// Fields ids in kmp_depend_info record. 4598 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4599 } // namespace 4600 4601 /// Translates internal dependency kind into the runtime kind. 4602 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4603 RTLDependenceKindTy DepKind; 4604 switch (K) { 4605 case OMPC_DEPEND_in: 4606 DepKind = DepIn; 4607 break; 4608 // Out and InOut dependencies must use the same code. 4609 case OMPC_DEPEND_out: 4610 case OMPC_DEPEND_inout: 4611 DepKind = DepInOut; 4612 break; 4613 case OMPC_DEPEND_mutexinoutset: 4614 DepKind = DepMutexInOutSet; 4615 break; 4616 case OMPC_DEPEND_inoutset: 4617 DepKind = DepInOutSet; 4618 break; 4619 case OMPC_DEPEND_source: 4620 case OMPC_DEPEND_sink: 4621 case OMPC_DEPEND_depobj: 4622 case OMPC_DEPEND_unknown: 4623 llvm_unreachable("Unknown task dependence type"); 4624 } 4625 return DepKind; 4626 } 4627 4628 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4629 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4630 QualType &FlagsTy) { 4631 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4632 if (KmpDependInfoTy.isNull()) { 4633 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4634 KmpDependInfoRD->startDefinition(); 4635 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4636 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4637 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4638 KmpDependInfoRD->completeDefinition(); 4639 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4640 } 4641 } 4642 4643 std::pair<llvm::Value *, LValue> 4644 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4645 SourceLocation Loc) { 4646 ASTContext &C = CGM.getContext(); 4647 QualType FlagsTy; 4648 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4649 RecordDecl *KmpDependInfoRD = 4650 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4651 LValue Base = CGF.EmitLoadOfPointerLValue( 4652 DepobjLVal.getAddress(CGF), 4653 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4654 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4655 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4656 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 4657 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4658 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4659 Base.getTBAAInfo()); 4660 Address DepObjAddr = CGF.Builder.CreateGEP( 4661 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4662 LValue NumDepsBase = CGF.MakeAddrLValue( 4663 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4664 // NumDeps = deps[i].base_addr; 4665 LValue BaseAddrLVal = CGF.EmitLValueForField( 4666 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4667 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4668 return std::make_pair(NumDeps, Base); 4669 } 4670 4671 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4672 llvm::PointerUnion<unsigned *, LValue *> Pos, 4673 const OMPTaskDataTy::DependData &Data, 4674 Address DependenciesArray) { 4675 CodeGenModule &CGM = CGF.CGM; 4676 ASTContext &C = CGM.getContext(); 4677 QualType FlagsTy; 4678 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4679 RecordDecl *KmpDependInfoRD = 4680 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4681 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4682 4683 OMPIteratorGeneratorScope IteratorScope( 4684 CGF, cast_or_null<OMPIteratorExpr>( 4685 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4686 : nullptr)); 4687 for (const Expr *E : Data.DepExprs) { 4688 llvm::Value *Addr; 4689 llvm::Value *Size; 4690 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4691 LValue Base; 4692 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4693 Base = CGF.MakeAddrLValue( 4694 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4695 } else { 4696 LValue &PosLVal = *Pos.get<LValue *>(); 4697 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4698 Base = CGF.MakeAddrLValue( 4699 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4700 } 4701 // deps[i].base_addr = &<Dependencies[i].second>; 4702 LValue BaseAddrLVal = CGF.EmitLValueForField( 4703 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4704 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4705 BaseAddrLVal); 4706 // deps[i].len = sizeof(<Dependencies[i].second>); 4707 LValue LenLVal = CGF.EmitLValueForField( 4708 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4709 CGF.EmitStoreOfScalar(Size, LenLVal); 4710 // deps[i].flags = <Dependencies[i].first>; 4711 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4712 LValue FlagsLVal = CGF.EmitLValueForField( 4713 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4714 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4715 FlagsLVal); 4716 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4717 ++(*P); 4718 } else { 4719 LValue &PosLVal = *Pos.get<LValue *>(); 4720 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4721 Idx = CGF.Builder.CreateNUWAdd(Idx, 4722 llvm::ConstantInt::get(Idx->getType(), 1)); 4723 CGF.EmitStoreOfScalar(Idx, PosLVal); 4724 } 4725 } 4726 } 4727 4728 static SmallVector<llvm::Value *, 4> 4729 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4730 const OMPTaskDataTy::DependData &Data) { 4731 assert(Data.DepKind == OMPC_DEPEND_depobj && 4732 "Expected depobj dependecy kind."); 4733 SmallVector<llvm::Value *, 4> Sizes; 4734 SmallVector<LValue, 4> SizeLVals; 4735 ASTContext &C = CGF.getContext(); 4736 QualType FlagsTy; 4737 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4738 RecordDecl *KmpDependInfoRD = 4739 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4740 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4741 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4742 { 4743 OMPIteratorGeneratorScope IteratorScope( 4744 CGF, cast_or_null<OMPIteratorExpr>( 4745 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4746 : nullptr)); 4747 for (const Expr *E : Data.DepExprs) { 4748 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4749 LValue Base = CGF.EmitLoadOfPointerLValue( 4750 DepobjLVal.getAddress(CGF), 4751 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4752 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4753 Base.getAddress(CGF), KmpDependInfoPtrT, 4754 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4755 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4756 Base.getTBAAInfo()); 4757 Address DepObjAddr = CGF.Builder.CreateGEP( 4758 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4759 LValue NumDepsBase = CGF.MakeAddrLValue( 4760 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4761 // NumDeps = deps[i].base_addr; 4762 LValue BaseAddrLVal = CGF.EmitLValueForField( 4763 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4764 llvm::Value *NumDeps = 4765 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4766 LValue NumLVal = CGF.MakeAddrLValue( 4767 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4768 C.getUIntPtrType()); 4769 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4770 NumLVal.getAddress(CGF)); 4771 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4772 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4773 CGF.EmitStoreOfScalar(Add, NumLVal); 4774 SizeLVals.push_back(NumLVal); 4775 } 4776 } 4777 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4778 llvm::Value *Size = 4779 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4780 Sizes.push_back(Size); 4781 } 4782 return Sizes; 4783 } 4784 4785 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4786 LValue PosLVal, 4787 const OMPTaskDataTy::DependData &Data, 4788 Address DependenciesArray) { 4789 assert(Data.DepKind == OMPC_DEPEND_depobj && 4790 "Expected depobj dependecy kind."); 4791 ASTContext &C = CGF.getContext(); 4792 QualType FlagsTy; 4793 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4794 RecordDecl *KmpDependInfoRD = 4795 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4796 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4797 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4798 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4799 { 4800 OMPIteratorGeneratorScope IteratorScope( 4801 CGF, cast_or_null<OMPIteratorExpr>( 4802 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4803 : nullptr)); 4804 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4805 const Expr *E = Data.DepExprs[I]; 4806 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4807 LValue Base = CGF.EmitLoadOfPointerLValue( 4808 DepobjLVal.getAddress(CGF), 4809 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4810 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4811 Base.getAddress(CGF), KmpDependInfoPtrT, 4812 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4813 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4814 Base.getTBAAInfo()); 4815 4816 // Get number of elements in a single depobj. 4817 Address DepObjAddr = CGF.Builder.CreateGEP( 4818 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4819 LValue NumDepsBase = CGF.MakeAddrLValue( 4820 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4821 // NumDeps = deps[i].base_addr; 4822 LValue BaseAddrLVal = CGF.EmitLValueForField( 4823 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4824 llvm::Value *NumDeps = 4825 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4826 4827 // memcopy dependency data. 4828 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4829 ElSize, 4830 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4831 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4832 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4833 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4834 4835 // Increase pos. 4836 // pos += size; 4837 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4838 CGF.EmitStoreOfScalar(Add, PosLVal); 4839 } 4840 } 4841 } 4842 4843 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4844 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4845 SourceLocation Loc) { 4846 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4847 return D.DepExprs.empty(); 4848 })) 4849 return std::make_pair(nullptr, Address::invalid()); 4850 // Process list of dependencies. 4851 ASTContext &C = CGM.getContext(); 4852 Address DependenciesArray = Address::invalid(); 4853 llvm::Value *NumOfElements = nullptr; 4854 unsigned NumDependencies = std::accumulate( 4855 Dependencies.begin(), Dependencies.end(), 0, 4856 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4857 return D.DepKind == OMPC_DEPEND_depobj 4858 ? V 4859 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4860 }); 4861 QualType FlagsTy; 4862 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4863 bool HasDepobjDeps = false; 4864 bool HasRegularWithIterators = false; 4865 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4866 llvm::Value *NumOfRegularWithIterators = 4867 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4868 // Calculate number of depobj dependecies and regular deps with the iterators. 4869 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4870 if (D.DepKind == OMPC_DEPEND_depobj) { 4871 SmallVector<llvm::Value *, 4> Sizes = 4872 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4873 for (llvm::Value *Size : Sizes) { 4874 NumOfDepobjElements = 4875 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4876 } 4877 HasDepobjDeps = true; 4878 continue; 4879 } 4880 // Include number of iterations, if any. 4881 4882 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4883 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4884 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4885 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4886 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4887 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4888 NumOfRegularWithIterators = 4889 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4890 } 4891 HasRegularWithIterators = true; 4892 continue; 4893 } 4894 } 4895 4896 QualType KmpDependInfoArrayTy; 4897 if (HasDepobjDeps || HasRegularWithIterators) { 4898 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4899 /*isSigned=*/false); 4900 if (HasDepobjDeps) { 4901 NumOfElements = 4902 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4903 } 4904 if (HasRegularWithIterators) { 4905 NumOfElements = 4906 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4907 } 4908 auto *OVE = new (C) OpaqueValueExpr( 4909 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4910 VK_PRValue); 4911 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4912 RValue::get(NumOfElements)); 4913 KmpDependInfoArrayTy = 4914 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4915 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4916 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4917 // Properly emit variable-sized array. 4918 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4919 ImplicitParamDecl::Other); 4920 CGF.EmitVarDecl(*PD); 4921 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4922 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4923 /*isSigned=*/false); 4924 } else { 4925 KmpDependInfoArrayTy = C.getConstantArrayType( 4926 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4927 ArrayType::Normal, /*IndexTypeQuals=*/0); 4928 DependenciesArray = 4929 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4930 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4931 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4932 /*isSigned=*/false); 4933 } 4934 unsigned Pos = 0; 4935 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4936 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4937 Dependencies[I].IteratorExpr) 4938 continue; 4939 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4940 DependenciesArray); 4941 } 4942 // Copy regular dependecies with iterators. 4943 LValue PosLVal = CGF.MakeAddrLValue( 4944 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4945 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4946 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4947 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4948 !Dependencies[I].IteratorExpr) 4949 continue; 4950 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4951 DependenciesArray); 4952 } 4953 // Copy final depobj arrays without iterators. 4954 if (HasDepobjDeps) { 4955 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4956 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4957 continue; 4958 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4959 DependenciesArray); 4960 } 4961 } 4962 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4963 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4964 return std::make_pair(NumOfElements, DependenciesArray); 4965 } 4966 4967 Address CGOpenMPRuntime::emitDepobjDependClause( 4968 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4969 SourceLocation Loc) { 4970 if (Dependencies.DepExprs.empty()) 4971 return Address::invalid(); 4972 // Process list of dependencies. 4973 ASTContext &C = CGM.getContext(); 4974 Address DependenciesArray = Address::invalid(); 4975 unsigned NumDependencies = Dependencies.DepExprs.size(); 4976 QualType FlagsTy; 4977 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4978 RecordDecl *KmpDependInfoRD = 4979 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4980 4981 llvm::Value *Size; 4982 // Define type kmp_depend_info[<Dependencies.size()>]; 4983 // For depobj reserve one extra element to store the number of elements. 4984 // It is required to handle depobj(x) update(in) construct. 4985 // kmp_depend_info[<Dependencies.size()>] deps; 4986 llvm::Value *NumDepsVal; 4987 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4988 if (const auto *IE = 4989 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4990 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4991 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4992 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4993 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4994 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4995 } 4996 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4997 NumDepsVal); 4998 CharUnits SizeInBytes = 4999 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5000 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5001 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5002 NumDepsVal = 5003 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5004 } else { 5005 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5006 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5007 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5008 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5009 Size = CGM.getSize(Sz.alignTo(Align)); 5010 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5011 } 5012 // Need to allocate on the dynamic memory. 5013 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5014 // Use default allocator. 5015 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5016 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5017 5018 llvm::Value *Addr = 5019 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5020 CGM.getModule(), OMPRTL___kmpc_alloc), 5021 Args, ".dep.arr.addr"); 5022 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5023 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5024 DependenciesArray = Address::deprecated(Addr, Align); 5025 // Write number of elements in the first element of array for depobj. 5026 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5027 // deps[i].base_addr = NumDependencies; 5028 LValue BaseAddrLVal = CGF.EmitLValueForField( 5029 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5030 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5031 llvm::PointerUnion<unsigned *, LValue *> Pos; 5032 unsigned Idx = 1; 5033 LValue PosLVal; 5034 if (Dependencies.IteratorExpr) { 5035 PosLVal = CGF.MakeAddrLValue( 5036 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5037 C.getSizeType()); 5038 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5039 /*IsInit=*/true); 5040 Pos = &PosLVal; 5041 } else { 5042 Pos = &Idx; 5043 } 5044 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5045 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5046 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 5047 CGF.Int8Ty); 5048 return DependenciesArray; 5049 } 5050 5051 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5052 SourceLocation Loc) { 5053 ASTContext &C = CGM.getContext(); 5054 QualType FlagsTy; 5055 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5056 LValue Base = CGF.EmitLoadOfPointerLValue( 5057 DepobjLVal.getAddress(CGF), 5058 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5059 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5060 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5061 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 5062 CGF.ConvertTypeForMem(KmpDependInfoTy)); 5063 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5064 Addr.getElementType(), Addr.getPointer(), 5065 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5066 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5067 CGF.VoidPtrTy); 5068 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5069 // Use default allocator. 5070 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5071 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5072 5073 // _kmpc_free(gtid, addr, nullptr); 5074 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5075 CGM.getModule(), OMPRTL___kmpc_free), 5076 Args); 5077 } 5078 5079 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5080 OpenMPDependClauseKind NewDepKind, 5081 SourceLocation Loc) { 5082 ASTContext &C = CGM.getContext(); 5083 QualType FlagsTy; 5084 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5085 RecordDecl *KmpDependInfoRD = 5086 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5087 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5088 llvm::Value *NumDeps; 5089 LValue Base; 5090 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5091 5092 Address Begin = Base.getAddress(CGF); 5093 // Cast from pointer to array type to pointer to single element. 5094 llvm::Value *End = CGF.Builder.CreateGEP( 5095 Begin.getElementType(), Begin.getPointer(), NumDeps); 5096 // The basic structure here is a while-do loop. 5097 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5098 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5099 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5100 CGF.EmitBlock(BodyBB); 5101 llvm::PHINode *ElementPHI = 5102 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5103 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5104 Begin = Begin.withPointer(ElementPHI); 5105 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5106 Base.getTBAAInfo()); 5107 // deps[i].flags = NewDepKind; 5108 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5109 LValue FlagsLVal = CGF.EmitLValueForField( 5110 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5111 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5112 FlagsLVal); 5113 5114 // Shift the address forward by one element. 5115 Address ElementNext = 5116 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5117 ElementPHI->addIncoming(ElementNext.getPointer(), 5118 CGF.Builder.GetInsertBlock()); 5119 llvm::Value *IsEmpty = 5120 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5121 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5122 // Done. 5123 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5124 } 5125 5126 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5127 const OMPExecutableDirective &D, 5128 llvm::Function *TaskFunction, 5129 QualType SharedsTy, Address Shareds, 5130 const Expr *IfCond, 5131 const OMPTaskDataTy &Data) { 5132 if (!CGF.HaveInsertPoint()) 5133 return; 5134 5135 TaskResultTy Result = 5136 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5137 llvm::Value *NewTask = Result.NewTask; 5138 llvm::Function *TaskEntry = Result.TaskEntry; 5139 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5140 LValue TDBase = Result.TDBase; 5141 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5142 // Process list of dependences. 5143 Address DependenciesArray = Address::invalid(); 5144 llvm::Value *NumOfElements; 5145 std::tie(NumOfElements, DependenciesArray) = 5146 emitDependClause(CGF, Data.Dependences, Loc); 5147 5148 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5149 // libcall. 5150 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5151 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5152 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5153 // list is not empty 5154 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5155 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5156 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5157 llvm::Value *DepTaskArgs[7]; 5158 if (!Data.Dependences.empty()) { 5159 DepTaskArgs[0] = UpLoc; 5160 DepTaskArgs[1] = ThreadID; 5161 DepTaskArgs[2] = NewTask; 5162 DepTaskArgs[3] = NumOfElements; 5163 DepTaskArgs[4] = DependenciesArray.getPointer(); 5164 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5165 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5166 } 5167 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5168 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5169 if (!Data.Tied) { 5170 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5171 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5172 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5173 } 5174 if (!Data.Dependences.empty()) { 5175 CGF.EmitRuntimeCall( 5176 OMPBuilder.getOrCreateRuntimeFunction( 5177 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5178 DepTaskArgs); 5179 } else { 5180 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5181 CGM.getModule(), OMPRTL___kmpc_omp_task), 5182 TaskArgs); 5183 } 5184 // Check if parent region is untied and build return for untied task; 5185 if (auto *Region = 5186 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5187 Region->emitUntiedSwitch(CGF); 5188 }; 5189 5190 llvm::Value *DepWaitTaskArgs[6]; 5191 if (!Data.Dependences.empty()) { 5192 DepWaitTaskArgs[0] = UpLoc; 5193 DepWaitTaskArgs[1] = ThreadID; 5194 DepWaitTaskArgs[2] = NumOfElements; 5195 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5196 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5197 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5198 } 5199 auto &M = CGM.getModule(); 5200 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5201 TaskEntry, &Data, &DepWaitTaskArgs, 5202 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5203 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5204 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5205 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5206 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5207 // is specified. 5208 if (!Data.Dependences.empty()) 5209 CGF.EmitRuntimeCall( 5210 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5211 DepWaitTaskArgs); 5212 // Call proxy_task_entry(gtid, new_task); 5213 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5214 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5215 Action.Enter(CGF); 5216 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5217 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5218 OutlinedFnArgs); 5219 }; 5220 5221 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5222 // kmp_task_t *new_task); 5223 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5224 // kmp_task_t *new_task); 5225 RegionCodeGenTy RCG(CodeGen); 5226 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5227 M, OMPRTL___kmpc_omp_task_begin_if0), 5228 TaskArgs, 5229 OMPBuilder.getOrCreateRuntimeFunction( 5230 M, OMPRTL___kmpc_omp_task_complete_if0), 5231 TaskArgs); 5232 RCG.setAction(Action); 5233 RCG(CGF); 5234 }; 5235 5236 if (IfCond) { 5237 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5238 } else { 5239 RegionCodeGenTy ThenRCG(ThenCodeGen); 5240 ThenRCG(CGF); 5241 } 5242 } 5243 5244 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5245 const OMPLoopDirective &D, 5246 llvm::Function *TaskFunction, 5247 QualType SharedsTy, Address Shareds, 5248 const Expr *IfCond, 5249 const OMPTaskDataTy &Data) { 5250 if (!CGF.HaveInsertPoint()) 5251 return; 5252 TaskResultTy Result = 5253 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5254 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5255 // libcall. 5256 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5257 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5258 // sched, kmp_uint64 grainsize, void *task_dup); 5259 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5260 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5261 llvm::Value *IfVal; 5262 if (IfCond) { 5263 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5264 /*isSigned=*/true); 5265 } else { 5266 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5267 } 5268 5269 LValue LBLVal = CGF.EmitLValueForField( 5270 Result.TDBase, 5271 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5272 const auto *LBVar = 5273 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5274 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5275 LBLVal.getQuals(), 5276 /*IsInitializer=*/true); 5277 LValue UBLVal = CGF.EmitLValueForField( 5278 Result.TDBase, 5279 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5280 const auto *UBVar = 5281 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5282 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5283 UBLVal.getQuals(), 5284 /*IsInitializer=*/true); 5285 LValue StLVal = CGF.EmitLValueForField( 5286 Result.TDBase, 5287 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5288 const auto *StVar = 5289 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5290 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5291 StLVal.getQuals(), 5292 /*IsInitializer=*/true); 5293 // Store reductions address. 5294 LValue RedLVal = CGF.EmitLValueForField( 5295 Result.TDBase, 5296 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5297 if (Data.Reductions) { 5298 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5299 } else { 5300 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5301 CGF.getContext().VoidPtrTy); 5302 } 5303 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5304 llvm::Value *TaskArgs[] = { 5305 UpLoc, 5306 ThreadID, 5307 Result.NewTask, 5308 IfVal, 5309 LBLVal.getPointer(CGF), 5310 UBLVal.getPointer(CGF), 5311 CGF.EmitLoadOfScalar(StLVal, Loc), 5312 llvm::ConstantInt::getSigned( 5313 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5314 llvm::ConstantInt::getSigned( 5315 CGF.IntTy, Data.Schedule.getPointer() 5316 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5317 : NoSchedule), 5318 Data.Schedule.getPointer() 5319 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5320 /*isSigned=*/false) 5321 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5322 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5323 Result.TaskDupFn, CGF.VoidPtrTy) 5324 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5325 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5326 CGM.getModule(), OMPRTL___kmpc_taskloop), 5327 TaskArgs); 5328 } 5329 5330 /// Emit reduction operation for each element of array (required for 5331 /// array sections) LHS op = RHS. 5332 /// \param Type Type of array. 5333 /// \param LHSVar Variable on the left side of the reduction operation 5334 /// (references element of array in original variable). 5335 /// \param RHSVar Variable on the right side of the reduction operation 5336 /// (references element of array in original variable). 5337 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5338 /// RHSVar. 5339 static void EmitOMPAggregateReduction( 5340 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5341 const VarDecl *RHSVar, 5342 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5343 const Expr *, const Expr *)> &RedOpGen, 5344 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5345 const Expr *UpExpr = nullptr) { 5346 // Perform element-by-element initialization. 5347 QualType ElementTy; 5348 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5349 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5350 5351 // Drill down to the base element type on both arrays. 5352 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5353 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5354 5355 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5356 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5357 // Cast from pointer to array type to pointer to single element. 5358 llvm::Value *LHSEnd = 5359 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5360 // The basic structure here is a while-do loop. 5361 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5362 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5363 llvm::Value *IsEmpty = 5364 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5365 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5366 5367 // Enter the loop body, making that address the current address. 5368 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5369 CGF.EmitBlock(BodyBB); 5370 5371 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5372 5373 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5374 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5375 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5376 Address RHSElementCurrent = Address::deprecated( 5377 RHSElementPHI, 5378 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5379 5380 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5381 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5382 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5383 Address LHSElementCurrent = Address::deprecated( 5384 LHSElementPHI, 5385 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5386 5387 // Emit copy. 5388 CodeGenFunction::OMPPrivateScope Scope(CGF); 5389 Scope.addPrivate(LHSVar, LHSElementCurrent); 5390 Scope.addPrivate(RHSVar, RHSElementCurrent); 5391 Scope.Privatize(); 5392 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5393 Scope.ForceCleanup(); 5394 5395 // Shift the address forward by one element. 5396 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5397 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5398 "omp.arraycpy.dest.element"); 5399 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5400 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5401 "omp.arraycpy.src.element"); 5402 // Check whether we've reached the end. 5403 llvm::Value *Done = 5404 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5405 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5406 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5407 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5408 5409 // Done. 5410 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5411 } 5412 5413 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5414 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5415 /// UDR combiner function. 5416 static void emitReductionCombiner(CodeGenFunction &CGF, 5417 const Expr *ReductionOp) { 5418 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5419 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5420 if (const auto *DRE = 5421 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5422 if (const auto *DRD = 5423 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5424 std::pair<llvm::Function *, llvm::Function *> Reduction = 5425 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5426 RValue Func = RValue::get(Reduction.first); 5427 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5428 CGF.EmitIgnoredExpr(ReductionOp); 5429 return; 5430 } 5431 CGF.EmitIgnoredExpr(ReductionOp); 5432 } 5433 5434 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5435 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5436 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5437 ArrayRef<const Expr *> ReductionOps) { 5438 ASTContext &C = CGM.getContext(); 5439 5440 // void reduction_func(void *LHSArg, void *RHSArg); 5441 FunctionArgList Args; 5442 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5443 ImplicitParamDecl::Other); 5444 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5445 ImplicitParamDecl::Other); 5446 Args.push_back(&LHSArg); 5447 Args.push_back(&RHSArg); 5448 const auto &CGFI = 5449 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5450 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5451 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5452 llvm::GlobalValue::InternalLinkage, Name, 5453 &CGM.getModule()); 5454 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5455 Fn->setDoesNotRecurse(); 5456 CodeGenFunction CGF(CGM); 5457 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5458 5459 // Dst = (void*[n])(LHSArg); 5460 // Src = (void*[n])(RHSArg); 5461 Address LHS = Address::deprecated( 5462 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5463 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType), 5464 CGF.getPointerAlign()); 5465 Address RHS = Address::deprecated( 5466 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5467 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType), 5468 CGF.getPointerAlign()); 5469 5470 // ... 5471 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5472 // ... 5473 CodeGenFunction::OMPPrivateScope Scope(CGF); 5474 const auto *IPriv = Privates.begin(); 5475 unsigned Idx = 0; 5476 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5477 const auto *RHSVar = 5478 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5479 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); 5480 const auto *LHSVar = 5481 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5482 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); 5483 QualType PrivTy = (*IPriv)->getType(); 5484 if (PrivTy->isVariablyModifiedType()) { 5485 // Get array size and emit VLA type. 5486 ++Idx; 5487 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5488 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5489 const VariableArrayType *VLA = 5490 CGF.getContext().getAsVariableArrayType(PrivTy); 5491 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5492 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5493 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5494 CGF.EmitVariablyModifiedType(PrivTy); 5495 } 5496 } 5497 Scope.Privatize(); 5498 IPriv = Privates.begin(); 5499 const auto *ILHS = LHSExprs.begin(); 5500 const auto *IRHS = RHSExprs.begin(); 5501 for (const Expr *E : ReductionOps) { 5502 if ((*IPriv)->getType()->isArrayType()) { 5503 // Emit reduction for array section. 5504 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5505 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5506 EmitOMPAggregateReduction( 5507 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5508 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5509 emitReductionCombiner(CGF, E); 5510 }); 5511 } else { 5512 // Emit reduction for array subscript or single variable. 5513 emitReductionCombiner(CGF, E); 5514 } 5515 ++IPriv; 5516 ++ILHS; 5517 ++IRHS; 5518 } 5519 Scope.ForceCleanup(); 5520 CGF.FinishFunction(); 5521 return Fn; 5522 } 5523 5524 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5525 const Expr *ReductionOp, 5526 const Expr *PrivateRef, 5527 const DeclRefExpr *LHS, 5528 const DeclRefExpr *RHS) { 5529 if (PrivateRef->getType()->isArrayType()) { 5530 // Emit reduction for array section. 5531 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5532 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5533 EmitOMPAggregateReduction( 5534 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5535 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5536 emitReductionCombiner(CGF, ReductionOp); 5537 }); 5538 } else { 5539 // Emit reduction for array subscript or single variable. 5540 emitReductionCombiner(CGF, ReductionOp); 5541 } 5542 } 5543 5544 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5545 ArrayRef<const Expr *> Privates, 5546 ArrayRef<const Expr *> LHSExprs, 5547 ArrayRef<const Expr *> RHSExprs, 5548 ArrayRef<const Expr *> ReductionOps, 5549 ReductionOptionsTy Options) { 5550 if (!CGF.HaveInsertPoint()) 5551 return; 5552 5553 bool WithNowait = Options.WithNowait; 5554 bool SimpleReduction = Options.SimpleReduction; 5555 5556 // Next code should be emitted for reduction: 5557 // 5558 // static kmp_critical_name lock = { 0 }; 5559 // 5560 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5561 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5562 // ... 5563 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5564 // *(Type<n>-1*)rhs[<n>-1]); 5565 // } 5566 // 5567 // ... 5568 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5569 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5570 // RedList, reduce_func, &<lock>)) { 5571 // case 1: 5572 // ... 5573 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5574 // ... 5575 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5576 // break; 5577 // case 2: 5578 // ... 5579 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5580 // ... 5581 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5582 // break; 5583 // default:; 5584 // } 5585 // 5586 // if SimpleReduction is true, only the next code is generated: 5587 // ... 5588 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5589 // ... 5590 5591 ASTContext &C = CGM.getContext(); 5592 5593 if (SimpleReduction) { 5594 CodeGenFunction::RunCleanupsScope Scope(CGF); 5595 const auto *IPriv = Privates.begin(); 5596 const auto *ILHS = LHSExprs.begin(); 5597 const auto *IRHS = RHSExprs.begin(); 5598 for (const Expr *E : ReductionOps) { 5599 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5600 cast<DeclRefExpr>(*IRHS)); 5601 ++IPriv; 5602 ++ILHS; 5603 ++IRHS; 5604 } 5605 return; 5606 } 5607 5608 // 1. Build a list of reduction variables. 5609 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5610 auto Size = RHSExprs.size(); 5611 for (const Expr *E : Privates) { 5612 if (E->getType()->isVariablyModifiedType()) 5613 // Reserve place for array size. 5614 ++Size; 5615 } 5616 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5617 QualType ReductionArrayTy = 5618 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5619 /*IndexTypeQuals=*/0); 5620 Address ReductionList = 5621 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5622 const auto *IPriv = Privates.begin(); 5623 unsigned Idx = 0; 5624 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5625 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5626 CGF.Builder.CreateStore( 5627 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5628 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5629 Elem); 5630 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5631 // Store array size. 5632 ++Idx; 5633 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5634 llvm::Value *Size = CGF.Builder.CreateIntCast( 5635 CGF.getVLASize( 5636 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5637 .NumElts, 5638 CGF.SizeTy, /*isSigned=*/false); 5639 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5640 Elem); 5641 } 5642 } 5643 5644 // 2. Emit reduce_func(). 5645 llvm::Function *ReductionFn = emitReductionFunction( 5646 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5647 LHSExprs, RHSExprs, ReductionOps); 5648 5649 // 3. Create static kmp_critical_name lock = { 0 }; 5650 std::string Name = getName({"reduction"}); 5651 llvm::Value *Lock = getCriticalRegionLock(Name); 5652 5653 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5654 // RedList, reduce_func, &<lock>); 5655 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5656 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5657 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5658 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5659 ReductionList.getPointer(), CGF.VoidPtrTy); 5660 llvm::Value *Args[] = { 5661 IdentTLoc, // ident_t *<loc> 5662 ThreadId, // i32 <gtid> 5663 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5664 ReductionArrayTySize, // size_type sizeof(RedList) 5665 RL, // void *RedList 5666 ReductionFn, // void (*) (void *, void *) <reduce_func> 5667 Lock // kmp_critical_name *&<lock> 5668 }; 5669 llvm::Value *Res = CGF.EmitRuntimeCall( 5670 OMPBuilder.getOrCreateRuntimeFunction( 5671 CGM.getModule(), 5672 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5673 Args); 5674 5675 // 5. Build switch(res) 5676 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5677 llvm::SwitchInst *SwInst = 5678 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5679 5680 // 6. Build case 1: 5681 // ... 5682 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5683 // ... 5684 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5685 // break; 5686 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5687 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5688 CGF.EmitBlock(Case1BB); 5689 5690 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5691 llvm::Value *EndArgs[] = { 5692 IdentTLoc, // ident_t *<loc> 5693 ThreadId, // i32 <gtid> 5694 Lock // kmp_critical_name *&<lock> 5695 }; 5696 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5697 CodeGenFunction &CGF, PrePostActionTy &Action) { 5698 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5699 const auto *IPriv = Privates.begin(); 5700 const auto *ILHS = LHSExprs.begin(); 5701 const auto *IRHS = RHSExprs.begin(); 5702 for (const Expr *E : ReductionOps) { 5703 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5704 cast<DeclRefExpr>(*IRHS)); 5705 ++IPriv; 5706 ++ILHS; 5707 ++IRHS; 5708 } 5709 }; 5710 RegionCodeGenTy RCG(CodeGen); 5711 CommonActionTy Action( 5712 nullptr, llvm::None, 5713 OMPBuilder.getOrCreateRuntimeFunction( 5714 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5715 : OMPRTL___kmpc_end_reduce), 5716 EndArgs); 5717 RCG.setAction(Action); 5718 RCG(CGF); 5719 5720 CGF.EmitBranch(DefaultBB); 5721 5722 // 7. Build case 2: 5723 // ... 5724 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5725 // ... 5726 // break; 5727 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5728 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5729 CGF.EmitBlock(Case2BB); 5730 5731 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5732 CodeGenFunction &CGF, PrePostActionTy &Action) { 5733 const auto *ILHS = LHSExprs.begin(); 5734 const auto *IRHS = RHSExprs.begin(); 5735 const auto *IPriv = Privates.begin(); 5736 for (const Expr *E : ReductionOps) { 5737 const Expr *XExpr = nullptr; 5738 const Expr *EExpr = nullptr; 5739 const Expr *UpExpr = nullptr; 5740 BinaryOperatorKind BO = BO_Comma; 5741 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5742 if (BO->getOpcode() == BO_Assign) { 5743 XExpr = BO->getLHS(); 5744 UpExpr = BO->getRHS(); 5745 } 5746 } 5747 // Try to emit update expression as a simple atomic. 5748 const Expr *RHSExpr = UpExpr; 5749 if (RHSExpr) { 5750 // Analyze RHS part of the whole expression. 5751 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5752 RHSExpr->IgnoreParenImpCasts())) { 5753 // If this is a conditional operator, analyze its condition for 5754 // min/max reduction operator. 5755 RHSExpr = ACO->getCond(); 5756 } 5757 if (const auto *BORHS = 5758 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5759 EExpr = BORHS->getRHS(); 5760 BO = BORHS->getOpcode(); 5761 } 5762 } 5763 if (XExpr) { 5764 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5765 auto &&AtomicRedGen = [BO, VD, 5766 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5767 const Expr *EExpr, const Expr *UpExpr) { 5768 LValue X = CGF.EmitLValue(XExpr); 5769 RValue E; 5770 if (EExpr) 5771 E = CGF.EmitAnyExpr(EExpr); 5772 CGF.EmitOMPAtomicSimpleUpdateExpr( 5773 X, E, BO, /*IsXLHSInRHSPart=*/true, 5774 llvm::AtomicOrdering::Monotonic, Loc, 5775 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5776 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5777 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5778 CGF.emitOMPSimpleStore( 5779 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5780 VD->getType().getNonReferenceType(), Loc); 5781 PrivateScope.addPrivate(VD, LHSTemp); 5782 (void)PrivateScope.Privatize(); 5783 return CGF.EmitAnyExpr(UpExpr); 5784 }); 5785 }; 5786 if ((*IPriv)->getType()->isArrayType()) { 5787 // Emit atomic reduction for array section. 5788 const auto *RHSVar = 5789 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5790 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5791 AtomicRedGen, XExpr, EExpr, UpExpr); 5792 } else { 5793 // Emit atomic reduction for array subscript or single variable. 5794 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5795 } 5796 } else { 5797 // Emit as a critical region. 5798 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5799 const Expr *, const Expr *) { 5800 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5801 std::string Name = RT.getName({"atomic_reduction"}); 5802 RT.emitCriticalRegion( 5803 CGF, Name, 5804 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5805 Action.Enter(CGF); 5806 emitReductionCombiner(CGF, E); 5807 }, 5808 Loc); 5809 }; 5810 if ((*IPriv)->getType()->isArrayType()) { 5811 const auto *LHSVar = 5812 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5813 const auto *RHSVar = 5814 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5815 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5816 CritRedGen); 5817 } else { 5818 CritRedGen(CGF, nullptr, nullptr, nullptr); 5819 } 5820 } 5821 ++ILHS; 5822 ++IRHS; 5823 ++IPriv; 5824 } 5825 }; 5826 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5827 if (!WithNowait) { 5828 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5829 llvm::Value *EndArgs[] = { 5830 IdentTLoc, // ident_t *<loc> 5831 ThreadId, // i32 <gtid> 5832 Lock // kmp_critical_name *&<lock> 5833 }; 5834 CommonActionTy Action(nullptr, llvm::None, 5835 OMPBuilder.getOrCreateRuntimeFunction( 5836 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5837 EndArgs); 5838 AtomicRCG.setAction(Action); 5839 AtomicRCG(CGF); 5840 } else { 5841 AtomicRCG(CGF); 5842 } 5843 5844 CGF.EmitBranch(DefaultBB); 5845 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5846 } 5847 5848 /// Generates unique name for artificial threadprivate variables. 5849 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5850 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5851 const Expr *Ref) { 5852 SmallString<256> Buffer; 5853 llvm::raw_svector_ostream Out(Buffer); 5854 const clang::DeclRefExpr *DE; 5855 const VarDecl *D = ::getBaseDecl(Ref, DE); 5856 if (!D) 5857 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5858 D = D->getCanonicalDecl(); 5859 std::string Name = CGM.getOpenMPRuntime().getName( 5860 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5861 Out << Prefix << Name << "_" 5862 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5863 return std::string(Out.str()); 5864 } 5865 5866 /// Emits reduction initializer function: 5867 /// \code 5868 /// void @.red_init(void* %arg, void* %orig) { 5869 /// %0 = bitcast void* %arg to <type>* 5870 /// store <type> <init>, <type>* %0 5871 /// ret void 5872 /// } 5873 /// \endcode 5874 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5875 SourceLocation Loc, 5876 ReductionCodeGen &RCG, unsigned N) { 5877 ASTContext &C = CGM.getContext(); 5878 QualType VoidPtrTy = C.VoidPtrTy; 5879 VoidPtrTy.addRestrict(); 5880 FunctionArgList Args; 5881 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5882 ImplicitParamDecl::Other); 5883 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5884 ImplicitParamDecl::Other); 5885 Args.emplace_back(&Param); 5886 Args.emplace_back(&ParamOrig); 5887 const auto &FnInfo = 5888 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5889 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5890 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5891 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5892 Name, &CGM.getModule()); 5893 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5894 Fn->setDoesNotRecurse(); 5895 CodeGenFunction CGF(CGM); 5896 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5897 Address PrivateAddr = CGF.EmitLoadOfPointer( 5898 CGF.GetAddrOfLocalVar(&Param), 5899 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5900 llvm::Value *Size = nullptr; 5901 // If the size of the reduction item is non-constant, load it from global 5902 // threadprivate variable. 5903 if (RCG.getSizes(N).second) { 5904 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5905 CGF, CGM.getContext().getSizeType(), 5906 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5907 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5908 CGM.getContext().getSizeType(), Loc); 5909 } 5910 RCG.emitAggregateType(CGF, N, Size); 5911 Address OrigAddr = Address::invalid(); 5912 // If initializer uses initializer from declare reduction construct, emit a 5913 // pointer to the address of the original reduction item (reuired by reduction 5914 // initializer) 5915 if (RCG.usesReductionInitializer(N)) { 5916 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5917 OrigAddr = CGF.EmitLoadOfPointer( 5918 SharedAddr, 5919 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5920 } 5921 // Emit the initializer: 5922 // %0 = bitcast void* %arg to <type>* 5923 // store <type> <init>, <type>* %0 5924 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5925 [](CodeGenFunction &) { return false; }); 5926 CGF.FinishFunction(); 5927 return Fn; 5928 } 5929 5930 /// Emits reduction combiner function: 5931 /// \code 5932 /// void @.red_comb(void* %arg0, void* %arg1) { 5933 /// %lhs = bitcast void* %arg0 to <type>* 5934 /// %rhs = bitcast void* %arg1 to <type>* 5935 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5936 /// store <type> %2, <type>* %lhs 5937 /// ret void 5938 /// } 5939 /// \endcode 5940 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5941 SourceLocation Loc, 5942 ReductionCodeGen &RCG, unsigned N, 5943 const Expr *ReductionOp, 5944 const Expr *LHS, const Expr *RHS, 5945 const Expr *PrivateRef) { 5946 ASTContext &C = CGM.getContext(); 5947 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5948 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5949 FunctionArgList Args; 5950 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5951 C.VoidPtrTy, ImplicitParamDecl::Other); 5952 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5953 ImplicitParamDecl::Other); 5954 Args.emplace_back(&ParamInOut); 5955 Args.emplace_back(&ParamIn); 5956 const auto &FnInfo = 5957 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5958 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5959 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5960 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5961 Name, &CGM.getModule()); 5962 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5963 Fn->setDoesNotRecurse(); 5964 CodeGenFunction CGF(CGM); 5965 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5966 llvm::Value *Size = nullptr; 5967 // If the size of the reduction item is non-constant, load it from global 5968 // threadprivate variable. 5969 if (RCG.getSizes(N).second) { 5970 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5971 CGF, CGM.getContext().getSizeType(), 5972 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5973 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5974 CGM.getContext().getSizeType(), Loc); 5975 } 5976 RCG.emitAggregateType(CGF, N, Size); 5977 // Remap lhs and rhs variables to the addresses of the function arguments. 5978 // %lhs = bitcast void* %arg0 to <type>* 5979 // %rhs = bitcast void* %arg1 to <type>* 5980 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5981 PrivateScope.addPrivate( 5982 LHSVD, 5983 // Pull out the pointer to the variable. 5984 CGF.Builder.CreateElementBitCast( 5985 CGF.EmitLoadOfPointer( 5986 CGF.GetAddrOfLocalVar(&ParamInOut), 5987 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()), 5988 CGF.ConvertTypeForMem(LHSVD->getType()))); 5989 PrivateScope.addPrivate( 5990 RHSVD, 5991 // Pull out the pointer to the variable. 5992 CGF.Builder.CreateElementBitCast( 5993 CGF.EmitLoadOfPointer( 5994 CGF.GetAddrOfLocalVar(&ParamIn), 5995 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()), 5996 CGF.ConvertTypeForMem(RHSVD->getType()))); 5997 PrivateScope.Privatize(); 5998 // Emit the combiner body: 5999 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6000 // store <type> %2, <type>* %lhs 6001 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6002 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6003 cast<DeclRefExpr>(RHS)); 6004 CGF.FinishFunction(); 6005 return Fn; 6006 } 6007 6008 /// Emits reduction finalizer function: 6009 /// \code 6010 /// void @.red_fini(void* %arg) { 6011 /// %0 = bitcast void* %arg to <type>* 6012 /// <destroy>(<type>* %0) 6013 /// ret void 6014 /// } 6015 /// \endcode 6016 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6017 SourceLocation Loc, 6018 ReductionCodeGen &RCG, unsigned N) { 6019 if (!RCG.needCleanups(N)) 6020 return nullptr; 6021 ASTContext &C = CGM.getContext(); 6022 FunctionArgList Args; 6023 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6024 ImplicitParamDecl::Other); 6025 Args.emplace_back(&Param); 6026 const auto &FnInfo = 6027 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6028 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6029 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6030 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6031 Name, &CGM.getModule()); 6032 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6033 Fn->setDoesNotRecurse(); 6034 CodeGenFunction CGF(CGM); 6035 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6036 Address PrivateAddr = CGF.EmitLoadOfPointer( 6037 CGF.GetAddrOfLocalVar(&Param), 6038 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6039 llvm::Value *Size = nullptr; 6040 // If the size of the reduction item is non-constant, load it from global 6041 // threadprivate variable. 6042 if (RCG.getSizes(N).second) { 6043 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6044 CGF, CGM.getContext().getSizeType(), 6045 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6046 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6047 CGM.getContext().getSizeType(), Loc); 6048 } 6049 RCG.emitAggregateType(CGF, N, Size); 6050 // Emit the finalizer body: 6051 // <destroy>(<type>* %0) 6052 RCG.emitCleanups(CGF, N, PrivateAddr); 6053 CGF.FinishFunction(Loc); 6054 return Fn; 6055 } 6056 6057 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6058 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6059 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6060 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6061 return nullptr; 6062 6063 // Build typedef struct: 6064 // kmp_taskred_input { 6065 // void *reduce_shar; // shared reduction item 6066 // void *reduce_orig; // original reduction item used for initialization 6067 // size_t reduce_size; // size of data item 6068 // void *reduce_init; // data initialization routine 6069 // void *reduce_fini; // data finalization routine 6070 // void *reduce_comb; // data combiner routine 6071 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6072 // } kmp_taskred_input_t; 6073 ASTContext &C = CGM.getContext(); 6074 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6075 RD->startDefinition(); 6076 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6077 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6078 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6079 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6080 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6081 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6082 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6083 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6084 RD->completeDefinition(); 6085 QualType RDType = C.getRecordType(RD); 6086 unsigned Size = Data.ReductionVars.size(); 6087 llvm::APInt ArraySize(/*numBits=*/64, Size); 6088 QualType ArrayRDType = C.getConstantArrayType( 6089 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6090 // kmp_task_red_input_t .rd_input.[Size]; 6091 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6092 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6093 Data.ReductionCopies, Data.ReductionOps); 6094 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6095 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6096 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6097 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6098 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6099 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 6100 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6101 ".rd_input.gep."); 6102 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6103 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6104 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6105 RCG.emitSharedOrigLValue(CGF, Cnt); 6106 llvm::Value *CastedShared = 6107 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6108 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6109 // ElemLVal.reduce_orig = &Origs[Cnt]; 6110 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6111 llvm::Value *CastedOrig = 6112 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6113 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6114 RCG.emitAggregateType(CGF, Cnt); 6115 llvm::Value *SizeValInChars; 6116 llvm::Value *SizeVal; 6117 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6118 // We use delayed creation/initialization for VLAs and array sections. It is 6119 // required because runtime does not provide the way to pass the sizes of 6120 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6121 // threadprivate global variables are used to store these values and use 6122 // them in the functions. 6123 bool DelayedCreation = !!SizeVal; 6124 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6125 /*isSigned=*/false); 6126 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6127 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6128 // ElemLVal.reduce_init = init; 6129 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6130 llvm::Value *InitAddr = 6131 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6132 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6133 // ElemLVal.reduce_fini = fini; 6134 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6135 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6136 llvm::Value *FiniAddr = Fini 6137 ? CGF.EmitCastToVoidPtr(Fini) 6138 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6139 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6140 // ElemLVal.reduce_comb = comb; 6141 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6142 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6143 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6144 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6145 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6146 // ElemLVal.flags = 0; 6147 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6148 if (DelayedCreation) { 6149 CGF.EmitStoreOfScalar( 6150 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6151 FlagsLVal); 6152 } else 6153 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6154 FlagsLVal.getType()); 6155 } 6156 if (Data.IsReductionWithTaskMod) { 6157 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6158 // is_ws, int num, void *data); 6159 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6160 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6161 CGM.IntTy, /*isSigned=*/true); 6162 llvm::Value *Args[] = { 6163 IdentTLoc, GTid, 6164 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6165 /*isSigned=*/true), 6166 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6167 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6168 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6169 return CGF.EmitRuntimeCall( 6170 OMPBuilder.getOrCreateRuntimeFunction( 6171 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6172 Args); 6173 } 6174 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6175 llvm::Value *Args[] = { 6176 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6177 /*isSigned=*/true), 6178 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6179 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6180 CGM.VoidPtrTy)}; 6181 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6182 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6183 Args); 6184 } 6185 6186 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6187 SourceLocation Loc, 6188 bool IsWorksharingReduction) { 6189 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6190 // is_ws, int num, void *data); 6191 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6192 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6193 CGM.IntTy, /*isSigned=*/true); 6194 llvm::Value *Args[] = {IdentTLoc, GTid, 6195 llvm::ConstantInt::get(CGM.IntTy, 6196 IsWorksharingReduction ? 1 : 0, 6197 /*isSigned=*/true)}; 6198 (void)CGF.EmitRuntimeCall( 6199 OMPBuilder.getOrCreateRuntimeFunction( 6200 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6201 Args); 6202 } 6203 6204 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6205 SourceLocation Loc, 6206 ReductionCodeGen &RCG, 6207 unsigned N) { 6208 auto Sizes = RCG.getSizes(N); 6209 // Emit threadprivate global variable if the type is non-constant 6210 // (Sizes.second = nullptr). 6211 if (Sizes.second) { 6212 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6213 /*isSigned=*/false); 6214 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6215 CGF, CGM.getContext().getSizeType(), 6216 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6217 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6218 } 6219 } 6220 6221 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6222 SourceLocation Loc, 6223 llvm::Value *ReductionsPtr, 6224 LValue SharedLVal) { 6225 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6226 // *d); 6227 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6228 CGM.IntTy, 6229 /*isSigned=*/true), 6230 ReductionsPtr, 6231 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6232 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6233 return Address::deprecated( 6234 CGF.EmitRuntimeCall( 6235 OMPBuilder.getOrCreateRuntimeFunction( 6236 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6237 Args), 6238 SharedLVal.getAlignment()); 6239 } 6240 6241 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6242 const OMPTaskDataTy &Data) { 6243 if (!CGF.HaveInsertPoint()) 6244 return; 6245 6246 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6247 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6248 OMPBuilder.createTaskwait(CGF.Builder); 6249 } else { 6250 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6251 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6252 auto &M = CGM.getModule(); 6253 Address DependenciesArray = Address::invalid(); 6254 llvm::Value *NumOfElements; 6255 std::tie(NumOfElements, DependenciesArray) = 6256 emitDependClause(CGF, Data.Dependences, Loc); 6257 llvm::Value *DepWaitTaskArgs[6]; 6258 if (!Data.Dependences.empty()) { 6259 DepWaitTaskArgs[0] = UpLoc; 6260 DepWaitTaskArgs[1] = ThreadID; 6261 DepWaitTaskArgs[2] = NumOfElements; 6262 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6263 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6264 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6265 6266 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6267 6268 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6269 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6270 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6271 // is specified. 6272 CGF.EmitRuntimeCall( 6273 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6274 DepWaitTaskArgs); 6275 6276 } else { 6277 6278 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6279 // global_tid); 6280 llvm::Value *Args[] = {UpLoc, ThreadID}; 6281 // Ignore return result until untied tasks are supported. 6282 CGF.EmitRuntimeCall( 6283 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6284 Args); 6285 } 6286 } 6287 6288 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6289 Region->emitUntiedSwitch(CGF); 6290 } 6291 6292 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6293 OpenMPDirectiveKind InnerKind, 6294 const RegionCodeGenTy &CodeGen, 6295 bool HasCancel) { 6296 if (!CGF.HaveInsertPoint()) 6297 return; 6298 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6299 InnerKind != OMPD_critical && 6300 InnerKind != OMPD_master && 6301 InnerKind != OMPD_masked); 6302 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6303 } 6304 6305 namespace { 6306 enum RTCancelKind { 6307 CancelNoreq = 0, 6308 CancelParallel = 1, 6309 CancelLoop = 2, 6310 CancelSections = 3, 6311 CancelTaskgroup = 4 6312 }; 6313 } // anonymous namespace 6314 6315 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6316 RTCancelKind CancelKind = CancelNoreq; 6317 if (CancelRegion == OMPD_parallel) 6318 CancelKind = CancelParallel; 6319 else if (CancelRegion == OMPD_for) 6320 CancelKind = CancelLoop; 6321 else if (CancelRegion == OMPD_sections) 6322 CancelKind = CancelSections; 6323 else { 6324 assert(CancelRegion == OMPD_taskgroup); 6325 CancelKind = CancelTaskgroup; 6326 } 6327 return CancelKind; 6328 } 6329 6330 void CGOpenMPRuntime::emitCancellationPointCall( 6331 CodeGenFunction &CGF, SourceLocation Loc, 6332 OpenMPDirectiveKind CancelRegion) { 6333 if (!CGF.HaveInsertPoint()) 6334 return; 6335 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6336 // global_tid, kmp_int32 cncl_kind); 6337 if (auto *OMPRegionInfo = 6338 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6339 // For 'cancellation point taskgroup', the task region info may not have a 6340 // cancel. This may instead happen in another adjacent task. 6341 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6342 llvm::Value *Args[] = { 6343 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6344 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6345 // Ignore return result until untied tasks are supported. 6346 llvm::Value *Result = CGF.EmitRuntimeCall( 6347 OMPBuilder.getOrCreateRuntimeFunction( 6348 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6349 Args); 6350 // if (__kmpc_cancellationpoint()) { 6351 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6352 // exit from construct; 6353 // } 6354 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6355 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6356 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6357 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6358 CGF.EmitBlock(ExitBB); 6359 if (CancelRegion == OMPD_parallel) 6360 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6361 // exit from construct; 6362 CodeGenFunction::JumpDest CancelDest = 6363 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6364 CGF.EmitBranchThroughCleanup(CancelDest); 6365 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6366 } 6367 } 6368 } 6369 6370 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6371 const Expr *IfCond, 6372 OpenMPDirectiveKind CancelRegion) { 6373 if (!CGF.HaveInsertPoint()) 6374 return; 6375 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6376 // kmp_int32 cncl_kind); 6377 auto &M = CGM.getModule(); 6378 if (auto *OMPRegionInfo = 6379 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6380 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6381 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6382 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6383 llvm::Value *Args[] = { 6384 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6385 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6386 // Ignore return result until untied tasks are supported. 6387 llvm::Value *Result = CGF.EmitRuntimeCall( 6388 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6389 // if (__kmpc_cancel()) { 6390 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6391 // exit from construct; 6392 // } 6393 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6394 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6395 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6396 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6397 CGF.EmitBlock(ExitBB); 6398 if (CancelRegion == OMPD_parallel) 6399 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6400 // exit from construct; 6401 CodeGenFunction::JumpDest CancelDest = 6402 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6403 CGF.EmitBranchThroughCleanup(CancelDest); 6404 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6405 }; 6406 if (IfCond) { 6407 emitIfClause(CGF, IfCond, ThenGen, 6408 [](CodeGenFunction &, PrePostActionTy &) {}); 6409 } else { 6410 RegionCodeGenTy ThenRCG(ThenGen); 6411 ThenRCG(CGF); 6412 } 6413 } 6414 } 6415 6416 namespace { 6417 /// Cleanup action for uses_allocators support. 6418 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6419 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6420 6421 public: 6422 OMPUsesAllocatorsActionTy( 6423 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6424 : Allocators(Allocators) {} 6425 void Enter(CodeGenFunction &CGF) override { 6426 if (!CGF.HaveInsertPoint()) 6427 return; 6428 for (const auto &AllocatorData : Allocators) { 6429 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6430 CGF, AllocatorData.first, AllocatorData.second); 6431 } 6432 } 6433 void Exit(CodeGenFunction &CGF) override { 6434 if (!CGF.HaveInsertPoint()) 6435 return; 6436 for (const auto &AllocatorData : Allocators) { 6437 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6438 AllocatorData.first); 6439 } 6440 } 6441 }; 6442 } // namespace 6443 6444 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6445 const OMPExecutableDirective &D, StringRef ParentName, 6446 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6447 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6448 assert(!ParentName.empty() && "Invalid target region parent name!"); 6449 HasEmittedTargetRegion = true; 6450 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6451 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6452 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6453 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6454 if (!D.AllocatorTraits) 6455 continue; 6456 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6457 } 6458 } 6459 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6460 CodeGen.setAction(UsesAllocatorAction); 6461 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6462 IsOffloadEntry, CodeGen); 6463 } 6464 6465 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6466 const Expr *Allocator, 6467 const Expr *AllocatorTraits) { 6468 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6469 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6470 // Use default memspace handle. 6471 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6472 llvm::Value *NumTraits = llvm::ConstantInt::get( 6473 CGF.IntTy, cast<ConstantArrayType>( 6474 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6475 ->getSize() 6476 .getLimitedValue()); 6477 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6478 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6479 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 6480 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6481 AllocatorTraitsLVal.getBaseInfo(), 6482 AllocatorTraitsLVal.getTBAAInfo()); 6483 llvm::Value *Traits = 6484 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6485 6486 llvm::Value *AllocatorVal = 6487 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6488 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6489 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6490 // Store to allocator. 6491 CGF.EmitVarDecl(*cast<VarDecl>( 6492 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6493 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6494 AllocatorVal = 6495 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6496 Allocator->getType(), Allocator->getExprLoc()); 6497 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6498 } 6499 6500 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6501 const Expr *Allocator) { 6502 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6503 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6504 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6505 llvm::Value *AllocatorVal = 6506 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6507 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6508 CGF.getContext().VoidPtrTy, 6509 Allocator->getExprLoc()); 6510 (void)CGF.EmitRuntimeCall( 6511 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6512 OMPRTL___kmpc_destroy_allocator), 6513 {ThreadId, AllocatorVal}); 6514 } 6515 6516 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6517 const OMPExecutableDirective &D, StringRef ParentName, 6518 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6519 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6520 // Create a unique name for the entry function using the source location 6521 // information of the current target region. The name will be something like: 6522 // 6523 // __omp_offloading_DD_FFFF_PP_lBB 6524 // 6525 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6526 // mangled name of the function that encloses the target region and BB is the 6527 // line number of the target region. 6528 6529 const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice || 6530 !CGM.getLangOpts().OpenMPOffloadMandatory; 6531 unsigned DeviceID; 6532 unsigned FileID; 6533 unsigned Line; 6534 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6535 Line); 6536 SmallString<64> EntryFnName; 6537 { 6538 llvm::raw_svector_ostream OS(EntryFnName); 6539 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6540 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6541 } 6542 6543 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6544 6545 CodeGenFunction CGF(CGM, true); 6546 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6547 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6548 6549 if (BuildOutlinedFn) 6550 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6551 6552 // If this target outline function is not an offload entry, we don't need to 6553 // register it. 6554 if (!IsOffloadEntry) 6555 return; 6556 6557 // The target region ID is used by the runtime library to identify the current 6558 // target region, so it only has to be unique and not necessarily point to 6559 // anything. It could be the pointer to the outlined function that implements 6560 // the target region, but we aren't using that so that the compiler doesn't 6561 // need to keep that, and could therefore inline the host function if proven 6562 // worthwhile during optimization. In the other hand, if emitting code for the 6563 // device, the ID has to be the function address so that it can retrieved from 6564 // the offloading entry and launched by the runtime library. We also mark the 6565 // outlined function to have external linkage in case we are emitting code for 6566 // the device, because these functions will be entry points to the device. 6567 6568 if (CGM.getLangOpts().OpenMPIsDevice) { 6569 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6570 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6571 OutlinedFn->setDSOLocal(false); 6572 if (CGM.getTriple().isAMDGCN()) 6573 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6574 } else { 6575 std::string Name = getName({EntryFnName, "region_id"}); 6576 OutlinedFnID = new llvm::GlobalVariable( 6577 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6578 llvm::GlobalValue::WeakAnyLinkage, 6579 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6580 } 6581 6582 // If we do not allow host fallback we still need a named address to use. 6583 llvm::Constant *TargetRegionEntryAddr = OutlinedFn; 6584 if (!BuildOutlinedFn) { 6585 assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) && 6586 "Named kernel already exists?"); 6587 TargetRegionEntryAddr = new llvm::GlobalVariable( 6588 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6589 llvm::GlobalValue::InternalLinkage, 6590 llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName); 6591 } 6592 6593 // Register the information for the entry associated with this target region. 6594 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6595 DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID, 6596 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6597 6598 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6599 int32_t DefaultValTeams = -1; 6600 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6601 if (DefaultValTeams > 0 && OutlinedFn) { 6602 OutlinedFn->addFnAttr("omp_target_num_teams", 6603 std::to_string(DefaultValTeams)); 6604 } 6605 int32_t DefaultValThreads = -1; 6606 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6607 if (DefaultValThreads > 0 && OutlinedFn) { 6608 OutlinedFn->addFnAttr("omp_target_thread_limit", 6609 std::to_string(DefaultValThreads)); 6610 } 6611 6612 if (BuildOutlinedFn) 6613 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6614 } 6615 6616 /// Checks if the expression is constant or does not have non-trivial function 6617 /// calls. 6618 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6619 // We can skip constant expressions. 6620 // We can skip expressions with trivial calls or simple expressions. 6621 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6622 !E->hasNonTrivialCall(Ctx)) && 6623 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6624 } 6625 6626 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6627 const Stmt *Body) { 6628 const Stmt *Child = Body->IgnoreContainers(); 6629 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6630 Child = nullptr; 6631 for (const Stmt *S : C->body()) { 6632 if (const auto *E = dyn_cast<Expr>(S)) { 6633 if (isTrivial(Ctx, E)) 6634 continue; 6635 } 6636 // Some of the statements can be ignored. 6637 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6638 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6639 continue; 6640 // Analyze declarations. 6641 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6642 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6643 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6644 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6645 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6646 isa<UsingDirectiveDecl>(D) || 6647 isa<OMPDeclareReductionDecl>(D) || 6648 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6649 return true; 6650 const auto *VD = dyn_cast<VarDecl>(D); 6651 if (!VD) 6652 return false; 6653 return VD->hasGlobalStorage() || !VD->isUsed(); 6654 })) 6655 continue; 6656 } 6657 // Found multiple children - cannot get the one child only. 6658 if (Child) 6659 return nullptr; 6660 Child = S; 6661 } 6662 if (Child) 6663 Child = Child->IgnoreContainers(); 6664 } 6665 return Child; 6666 } 6667 6668 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6669 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6670 int32_t &DefaultVal) { 6671 6672 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6673 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6674 "Expected target-based executable directive."); 6675 switch (DirectiveKind) { 6676 case OMPD_target: { 6677 const auto *CS = D.getInnermostCapturedStmt(); 6678 const auto *Body = 6679 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6680 const Stmt *ChildStmt = 6681 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6682 if (const auto *NestedDir = 6683 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6684 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6685 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6686 const Expr *NumTeams = 6687 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6688 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6689 if (auto Constant = 6690 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6691 DefaultVal = Constant->getExtValue(); 6692 return NumTeams; 6693 } 6694 DefaultVal = 0; 6695 return nullptr; 6696 } 6697 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6698 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6699 DefaultVal = 1; 6700 return nullptr; 6701 } 6702 DefaultVal = 1; 6703 return nullptr; 6704 } 6705 // A value of -1 is used to check if we need to emit no teams region 6706 DefaultVal = -1; 6707 return nullptr; 6708 } 6709 case OMPD_target_teams: 6710 case OMPD_target_teams_distribute: 6711 case OMPD_target_teams_distribute_simd: 6712 case OMPD_target_teams_distribute_parallel_for: 6713 case OMPD_target_teams_distribute_parallel_for_simd: { 6714 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6715 const Expr *NumTeams = 6716 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6717 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6718 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6719 DefaultVal = Constant->getExtValue(); 6720 return NumTeams; 6721 } 6722 DefaultVal = 0; 6723 return nullptr; 6724 } 6725 case OMPD_target_parallel: 6726 case OMPD_target_parallel_for: 6727 case OMPD_target_parallel_for_simd: 6728 case OMPD_target_simd: 6729 DefaultVal = 1; 6730 return nullptr; 6731 case OMPD_parallel: 6732 case OMPD_for: 6733 case OMPD_parallel_for: 6734 case OMPD_parallel_master: 6735 case OMPD_parallel_sections: 6736 case OMPD_for_simd: 6737 case OMPD_parallel_for_simd: 6738 case OMPD_cancel: 6739 case OMPD_cancellation_point: 6740 case OMPD_ordered: 6741 case OMPD_threadprivate: 6742 case OMPD_allocate: 6743 case OMPD_task: 6744 case OMPD_simd: 6745 case OMPD_tile: 6746 case OMPD_unroll: 6747 case OMPD_sections: 6748 case OMPD_section: 6749 case OMPD_single: 6750 case OMPD_master: 6751 case OMPD_critical: 6752 case OMPD_taskyield: 6753 case OMPD_barrier: 6754 case OMPD_taskwait: 6755 case OMPD_taskgroup: 6756 case OMPD_atomic: 6757 case OMPD_flush: 6758 case OMPD_depobj: 6759 case OMPD_scan: 6760 case OMPD_teams: 6761 case OMPD_target_data: 6762 case OMPD_target_exit_data: 6763 case OMPD_target_enter_data: 6764 case OMPD_distribute: 6765 case OMPD_distribute_simd: 6766 case OMPD_distribute_parallel_for: 6767 case OMPD_distribute_parallel_for_simd: 6768 case OMPD_teams_distribute: 6769 case OMPD_teams_distribute_simd: 6770 case OMPD_teams_distribute_parallel_for: 6771 case OMPD_teams_distribute_parallel_for_simd: 6772 case OMPD_target_update: 6773 case OMPD_declare_simd: 6774 case OMPD_declare_variant: 6775 case OMPD_begin_declare_variant: 6776 case OMPD_end_declare_variant: 6777 case OMPD_declare_target: 6778 case OMPD_end_declare_target: 6779 case OMPD_declare_reduction: 6780 case OMPD_declare_mapper: 6781 case OMPD_taskloop: 6782 case OMPD_taskloop_simd: 6783 case OMPD_master_taskloop: 6784 case OMPD_master_taskloop_simd: 6785 case OMPD_parallel_master_taskloop: 6786 case OMPD_parallel_master_taskloop_simd: 6787 case OMPD_requires: 6788 case OMPD_metadirective: 6789 case OMPD_unknown: 6790 break; 6791 default: 6792 break; 6793 } 6794 llvm_unreachable("Unexpected directive kind."); 6795 } 6796 6797 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6798 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6799 assert(!CGF.getLangOpts().OpenMPIsDevice && 6800 "Clauses associated with the teams directive expected to be emitted " 6801 "only for the host!"); 6802 CGBuilderTy &Bld = CGF.Builder; 6803 int32_t DefaultNT = -1; 6804 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6805 if (NumTeams != nullptr) { 6806 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6807 6808 switch (DirectiveKind) { 6809 case OMPD_target: { 6810 const auto *CS = D.getInnermostCapturedStmt(); 6811 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6812 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6813 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6814 /*IgnoreResultAssign*/ true); 6815 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6816 /*isSigned=*/true); 6817 } 6818 case OMPD_target_teams: 6819 case OMPD_target_teams_distribute: 6820 case OMPD_target_teams_distribute_simd: 6821 case OMPD_target_teams_distribute_parallel_for: 6822 case OMPD_target_teams_distribute_parallel_for_simd: { 6823 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6824 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6825 /*IgnoreResultAssign*/ true); 6826 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6827 /*isSigned=*/true); 6828 } 6829 default: 6830 break; 6831 } 6832 } else if (DefaultNT == -1) { 6833 return nullptr; 6834 } 6835 6836 return Bld.getInt32(DefaultNT); 6837 } 6838 6839 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6840 llvm::Value *DefaultThreadLimitVal) { 6841 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6842 CGF.getContext(), CS->getCapturedStmt()); 6843 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6844 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6845 llvm::Value *NumThreads = nullptr; 6846 llvm::Value *CondVal = nullptr; 6847 // Handle if clause. If if clause present, the number of threads is 6848 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6849 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6850 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6851 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6852 const OMPIfClause *IfClause = nullptr; 6853 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6854 if (C->getNameModifier() == OMPD_unknown || 6855 C->getNameModifier() == OMPD_parallel) { 6856 IfClause = C; 6857 break; 6858 } 6859 } 6860 if (IfClause) { 6861 const Expr *Cond = IfClause->getCondition(); 6862 bool Result; 6863 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6864 if (!Result) 6865 return CGF.Builder.getInt32(1); 6866 } else { 6867 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6868 if (const auto *PreInit = 6869 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6870 for (const auto *I : PreInit->decls()) { 6871 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6872 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6873 } else { 6874 CodeGenFunction::AutoVarEmission Emission = 6875 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6876 CGF.EmitAutoVarCleanups(Emission); 6877 } 6878 } 6879 } 6880 CondVal = CGF.EvaluateExprAsBool(Cond); 6881 } 6882 } 6883 } 6884 // Check the value of num_threads clause iff if clause was not specified 6885 // or is not evaluated to false. 6886 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6887 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6888 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6889 const auto *NumThreadsClause = 6890 Dir->getSingleClause<OMPNumThreadsClause>(); 6891 CodeGenFunction::LexicalScope Scope( 6892 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6893 if (const auto *PreInit = 6894 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6895 for (const auto *I : PreInit->decls()) { 6896 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6897 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6898 } else { 6899 CodeGenFunction::AutoVarEmission Emission = 6900 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6901 CGF.EmitAutoVarCleanups(Emission); 6902 } 6903 } 6904 } 6905 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6906 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6907 /*isSigned=*/false); 6908 if (DefaultThreadLimitVal) 6909 NumThreads = CGF.Builder.CreateSelect( 6910 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6911 DefaultThreadLimitVal, NumThreads); 6912 } else { 6913 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6914 : CGF.Builder.getInt32(0); 6915 } 6916 // Process condition of the if clause. 6917 if (CondVal) { 6918 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6919 CGF.Builder.getInt32(1)); 6920 } 6921 return NumThreads; 6922 } 6923 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6924 return CGF.Builder.getInt32(1); 6925 return DefaultThreadLimitVal; 6926 } 6927 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6928 : CGF.Builder.getInt32(0); 6929 } 6930 6931 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6932 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6933 int32_t &DefaultVal) { 6934 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6935 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6936 "Expected target-based executable directive."); 6937 6938 switch (DirectiveKind) { 6939 case OMPD_target: 6940 // Teams have no clause thread_limit 6941 return nullptr; 6942 case OMPD_target_teams: 6943 case OMPD_target_teams_distribute: 6944 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6945 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6946 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6947 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6948 if (auto Constant = 6949 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6950 DefaultVal = Constant->getExtValue(); 6951 return ThreadLimit; 6952 } 6953 return nullptr; 6954 case OMPD_target_parallel: 6955 case OMPD_target_parallel_for: 6956 case OMPD_target_parallel_for_simd: 6957 case OMPD_target_teams_distribute_parallel_for: 6958 case OMPD_target_teams_distribute_parallel_for_simd: { 6959 Expr *ThreadLimit = nullptr; 6960 Expr *NumThreads = nullptr; 6961 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6962 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6963 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6964 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6965 if (auto Constant = 6966 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6967 DefaultVal = Constant->getExtValue(); 6968 } 6969 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6970 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6971 NumThreads = NumThreadsClause->getNumThreads(); 6972 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6973 if (auto Constant = 6974 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6975 if (Constant->getExtValue() < DefaultVal) { 6976 DefaultVal = Constant->getExtValue(); 6977 ThreadLimit = NumThreads; 6978 } 6979 } 6980 } 6981 } 6982 return ThreadLimit; 6983 } 6984 case OMPD_target_teams_distribute_simd: 6985 case OMPD_target_simd: 6986 DefaultVal = 1; 6987 return nullptr; 6988 case OMPD_parallel: 6989 case OMPD_for: 6990 case OMPD_parallel_for: 6991 case OMPD_parallel_master: 6992 case OMPD_parallel_sections: 6993 case OMPD_for_simd: 6994 case OMPD_parallel_for_simd: 6995 case OMPD_cancel: 6996 case OMPD_cancellation_point: 6997 case OMPD_ordered: 6998 case OMPD_threadprivate: 6999 case OMPD_allocate: 7000 case OMPD_task: 7001 case OMPD_simd: 7002 case OMPD_tile: 7003 case OMPD_unroll: 7004 case OMPD_sections: 7005 case OMPD_section: 7006 case OMPD_single: 7007 case OMPD_master: 7008 case OMPD_critical: 7009 case OMPD_taskyield: 7010 case OMPD_barrier: 7011 case OMPD_taskwait: 7012 case OMPD_taskgroup: 7013 case OMPD_atomic: 7014 case OMPD_flush: 7015 case OMPD_depobj: 7016 case OMPD_scan: 7017 case OMPD_teams: 7018 case OMPD_target_data: 7019 case OMPD_target_exit_data: 7020 case OMPD_target_enter_data: 7021 case OMPD_distribute: 7022 case OMPD_distribute_simd: 7023 case OMPD_distribute_parallel_for: 7024 case OMPD_distribute_parallel_for_simd: 7025 case OMPD_teams_distribute: 7026 case OMPD_teams_distribute_simd: 7027 case OMPD_teams_distribute_parallel_for: 7028 case OMPD_teams_distribute_parallel_for_simd: 7029 case OMPD_target_update: 7030 case OMPD_declare_simd: 7031 case OMPD_declare_variant: 7032 case OMPD_begin_declare_variant: 7033 case OMPD_end_declare_variant: 7034 case OMPD_declare_target: 7035 case OMPD_end_declare_target: 7036 case OMPD_declare_reduction: 7037 case OMPD_declare_mapper: 7038 case OMPD_taskloop: 7039 case OMPD_taskloop_simd: 7040 case OMPD_master_taskloop: 7041 case OMPD_master_taskloop_simd: 7042 case OMPD_parallel_master_taskloop: 7043 case OMPD_parallel_master_taskloop_simd: 7044 case OMPD_requires: 7045 case OMPD_unknown: 7046 break; 7047 default: 7048 break; 7049 } 7050 llvm_unreachable("Unsupported directive kind."); 7051 } 7052 7053 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7054 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7055 assert(!CGF.getLangOpts().OpenMPIsDevice && 7056 "Clauses associated with the teams directive expected to be emitted " 7057 "only for the host!"); 7058 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7059 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7060 "Expected target-based executable directive."); 7061 CGBuilderTy &Bld = CGF.Builder; 7062 llvm::Value *ThreadLimitVal = nullptr; 7063 llvm::Value *NumThreadsVal = nullptr; 7064 switch (DirectiveKind) { 7065 case OMPD_target: { 7066 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7067 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7068 return NumThreads; 7069 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7070 CGF.getContext(), CS->getCapturedStmt()); 7071 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7072 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7073 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7074 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7075 const auto *ThreadLimitClause = 7076 Dir->getSingleClause<OMPThreadLimitClause>(); 7077 CodeGenFunction::LexicalScope Scope( 7078 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7079 if (const auto *PreInit = 7080 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7081 for (const auto *I : PreInit->decls()) { 7082 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7083 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7084 } else { 7085 CodeGenFunction::AutoVarEmission Emission = 7086 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7087 CGF.EmitAutoVarCleanups(Emission); 7088 } 7089 } 7090 } 7091 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7092 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7093 ThreadLimitVal = 7094 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7095 } 7096 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7097 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7098 CS = Dir->getInnermostCapturedStmt(); 7099 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7100 CGF.getContext(), CS->getCapturedStmt()); 7101 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7102 } 7103 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7104 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7105 CS = Dir->getInnermostCapturedStmt(); 7106 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7107 return NumThreads; 7108 } 7109 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7110 return Bld.getInt32(1); 7111 } 7112 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7113 } 7114 case OMPD_target_teams: { 7115 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7116 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7117 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7118 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7119 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7120 ThreadLimitVal = 7121 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7122 } 7123 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7124 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7125 return NumThreads; 7126 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7127 CGF.getContext(), CS->getCapturedStmt()); 7128 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7129 if (Dir->getDirectiveKind() == OMPD_distribute) { 7130 CS = Dir->getInnermostCapturedStmt(); 7131 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7132 return NumThreads; 7133 } 7134 } 7135 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7136 } 7137 case OMPD_target_teams_distribute: 7138 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7139 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7140 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7141 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7142 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7143 ThreadLimitVal = 7144 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7145 } 7146 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7147 case OMPD_target_parallel: 7148 case OMPD_target_parallel_for: 7149 case OMPD_target_parallel_for_simd: 7150 case OMPD_target_teams_distribute_parallel_for: 7151 case OMPD_target_teams_distribute_parallel_for_simd: { 7152 llvm::Value *CondVal = nullptr; 7153 // Handle if clause. If if clause present, the number of threads is 7154 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7155 if (D.hasClausesOfKind<OMPIfClause>()) { 7156 const OMPIfClause *IfClause = nullptr; 7157 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7158 if (C->getNameModifier() == OMPD_unknown || 7159 C->getNameModifier() == OMPD_parallel) { 7160 IfClause = C; 7161 break; 7162 } 7163 } 7164 if (IfClause) { 7165 const Expr *Cond = IfClause->getCondition(); 7166 bool Result; 7167 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7168 if (!Result) 7169 return Bld.getInt32(1); 7170 } else { 7171 CodeGenFunction::RunCleanupsScope Scope(CGF); 7172 CondVal = CGF.EvaluateExprAsBool(Cond); 7173 } 7174 } 7175 } 7176 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7177 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7178 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7179 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7180 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7181 ThreadLimitVal = 7182 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7183 } 7184 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7185 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7186 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7187 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7188 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7189 NumThreadsVal = 7190 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7191 ThreadLimitVal = ThreadLimitVal 7192 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7193 ThreadLimitVal), 7194 NumThreadsVal, ThreadLimitVal) 7195 : NumThreadsVal; 7196 } 7197 if (!ThreadLimitVal) 7198 ThreadLimitVal = Bld.getInt32(0); 7199 if (CondVal) 7200 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7201 return ThreadLimitVal; 7202 } 7203 case OMPD_target_teams_distribute_simd: 7204 case OMPD_target_simd: 7205 return Bld.getInt32(1); 7206 case OMPD_parallel: 7207 case OMPD_for: 7208 case OMPD_parallel_for: 7209 case OMPD_parallel_master: 7210 case OMPD_parallel_sections: 7211 case OMPD_for_simd: 7212 case OMPD_parallel_for_simd: 7213 case OMPD_cancel: 7214 case OMPD_cancellation_point: 7215 case OMPD_ordered: 7216 case OMPD_threadprivate: 7217 case OMPD_allocate: 7218 case OMPD_task: 7219 case OMPD_simd: 7220 case OMPD_tile: 7221 case OMPD_unroll: 7222 case OMPD_sections: 7223 case OMPD_section: 7224 case OMPD_single: 7225 case OMPD_master: 7226 case OMPD_critical: 7227 case OMPD_taskyield: 7228 case OMPD_barrier: 7229 case OMPD_taskwait: 7230 case OMPD_taskgroup: 7231 case OMPD_atomic: 7232 case OMPD_flush: 7233 case OMPD_depobj: 7234 case OMPD_scan: 7235 case OMPD_teams: 7236 case OMPD_target_data: 7237 case OMPD_target_exit_data: 7238 case OMPD_target_enter_data: 7239 case OMPD_distribute: 7240 case OMPD_distribute_simd: 7241 case OMPD_distribute_parallel_for: 7242 case OMPD_distribute_parallel_for_simd: 7243 case OMPD_teams_distribute: 7244 case OMPD_teams_distribute_simd: 7245 case OMPD_teams_distribute_parallel_for: 7246 case OMPD_teams_distribute_parallel_for_simd: 7247 case OMPD_target_update: 7248 case OMPD_declare_simd: 7249 case OMPD_declare_variant: 7250 case OMPD_begin_declare_variant: 7251 case OMPD_end_declare_variant: 7252 case OMPD_declare_target: 7253 case OMPD_end_declare_target: 7254 case OMPD_declare_reduction: 7255 case OMPD_declare_mapper: 7256 case OMPD_taskloop: 7257 case OMPD_taskloop_simd: 7258 case OMPD_master_taskloop: 7259 case OMPD_master_taskloop_simd: 7260 case OMPD_parallel_master_taskloop: 7261 case OMPD_parallel_master_taskloop_simd: 7262 case OMPD_requires: 7263 case OMPD_metadirective: 7264 case OMPD_unknown: 7265 break; 7266 default: 7267 break; 7268 } 7269 llvm_unreachable("Unsupported directive kind."); 7270 } 7271 7272 namespace { 7273 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7274 7275 // Utility to handle information from clauses associated with a given 7276 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7277 // It provides a convenient interface to obtain the information and generate 7278 // code for that information. 7279 class MappableExprsHandler { 7280 public: 7281 /// Values for bit flags used to specify the mapping type for 7282 /// offloading. 7283 enum OpenMPOffloadMappingFlags : uint64_t { 7284 /// No flags 7285 OMP_MAP_NONE = 0x0, 7286 /// Allocate memory on the device and move data from host to device. 7287 OMP_MAP_TO = 0x01, 7288 /// Allocate memory on the device and move data from device to host. 7289 OMP_MAP_FROM = 0x02, 7290 /// Always perform the requested mapping action on the element, even 7291 /// if it was already mapped before. 7292 OMP_MAP_ALWAYS = 0x04, 7293 /// Delete the element from the device environment, ignoring the 7294 /// current reference count associated with the element. 7295 OMP_MAP_DELETE = 0x08, 7296 /// The element being mapped is a pointer-pointee pair; both the 7297 /// pointer and the pointee should be mapped. 7298 OMP_MAP_PTR_AND_OBJ = 0x10, 7299 /// This flags signals that the base address of an entry should be 7300 /// passed to the target kernel as an argument. 7301 OMP_MAP_TARGET_PARAM = 0x20, 7302 /// Signal that the runtime library has to return the device pointer 7303 /// in the current position for the data being mapped. Used when we have the 7304 /// use_device_ptr or use_device_addr clause. 7305 OMP_MAP_RETURN_PARAM = 0x40, 7306 /// This flag signals that the reference being passed is a pointer to 7307 /// private data. 7308 OMP_MAP_PRIVATE = 0x80, 7309 /// Pass the element to the device by value. 7310 OMP_MAP_LITERAL = 0x100, 7311 /// Implicit map 7312 OMP_MAP_IMPLICIT = 0x200, 7313 /// Close is a hint to the runtime to allocate memory close to 7314 /// the target device. 7315 OMP_MAP_CLOSE = 0x400, 7316 /// 0x800 is reserved for compatibility with XLC. 7317 /// Produce a runtime error if the data is not already allocated. 7318 OMP_MAP_PRESENT = 0x1000, 7319 // Increment and decrement a separate reference counter so that the data 7320 // cannot be unmapped within the associated region. Thus, this flag is 7321 // intended to be used on 'target' and 'target data' directives because they 7322 // are inherently structured. It is not intended to be used on 'target 7323 // enter data' and 'target exit data' directives because they are inherently 7324 // dynamic. 7325 // This is an OpenMP extension for the sake of OpenACC support. 7326 OMP_MAP_OMPX_HOLD = 0x2000, 7327 /// Signal that the runtime library should use args as an array of 7328 /// descriptor_dim pointers and use args_size as dims. Used when we have 7329 /// non-contiguous list items in target update directive 7330 OMP_MAP_NON_CONTIG = 0x100000000000, 7331 /// The 16 MSBs of the flags indicate whether the entry is member of some 7332 /// struct/class. 7333 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7334 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7335 }; 7336 7337 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7338 static unsigned getFlagMemberOffset() { 7339 unsigned Offset = 0; 7340 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7341 Remain = Remain >> 1) 7342 Offset++; 7343 return Offset; 7344 } 7345 7346 /// Class that holds debugging information for a data mapping to be passed to 7347 /// the runtime library. 7348 class MappingExprInfo { 7349 /// The variable declaration used for the data mapping. 7350 const ValueDecl *MapDecl = nullptr; 7351 /// The original expression used in the map clause, or null if there is 7352 /// none. 7353 const Expr *MapExpr = nullptr; 7354 7355 public: 7356 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7357 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7358 7359 const ValueDecl *getMapDecl() const { return MapDecl; } 7360 const Expr *getMapExpr() const { return MapExpr; } 7361 }; 7362 7363 /// Class that associates information with a base pointer to be passed to the 7364 /// runtime library. 7365 class BasePointerInfo { 7366 /// The base pointer. 7367 llvm::Value *Ptr = nullptr; 7368 /// The base declaration that refers to this device pointer, or null if 7369 /// there is none. 7370 const ValueDecl *DevPtrDecl = nullptr; 7371 7372 public: 7373 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7374 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7375 llvm::Value *operator*() const { return Ptr; } 7376 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7377 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7378 }; 7379 7380 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7381 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7382 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7383 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7384 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7385 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7386 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7387 7388 /// This structure contains combined information generated for mappable 7389 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7390 /// mappers, and non-contiguous information. 7391 struct MapCombinedInfoTy { 7392 struct StructNonContiguousInfo { 7393 bool IsNonContiguous = false; 7394 MapDimArrayTy Dims; 7395 MapNonContiguousArrayTy Offsets; 7396 MapNonContiguousArrayTy Counts; 7397 MapNonContiguousArrayTy Strides; 7398 }; 7399 MapExprsArrayTy Exprs; 7400 MapBaseValuesArrayTy BasePointers; 7401 MapValuesArrayTy Pointers; 7402 MapValuesArrayTy Sizes; 7403 MapFlagsArrayTy Types; 7404 MapMappersArrayTy Mappers; 7405 StructNonContiguousInfo NonContigInfo; 7406 7407 /// Append arrays in \a CurInfo. 7408 void append(MapCombinedInfoTy &CurInfo) { 7409 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7410 BasePointers.append(CurInfo.BasePointers.begin(), 7411 CurInfo.BasePointers.end()); 7412 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7413 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7414 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7415 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7416 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7417 CurInfo.NonContigInfo.Dims.end()); 7418 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7419 CurInfo.NonContigInfo.Offsets.end()); 7420 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7421 CurInfo.NonContigInfo.Counts.end()); 7422 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7423 CurInfo.NonContigInfo.Strides.end()); 7424 } 7425 }; 7426 7427 /// Map between a struct and the its lowest & highest elements which have been 7428 /// mapped. 7429 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7430 /// HE(FieldIndex, Pointer)} 7431 struct StructRangeInfoTy { 7432 MapCombinedInfoTy PreliminaryMapData; 7433 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7434 0, Address::invalid()}; 7435 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7436 0, Address::invalid()}; 7437 Address Base = Address::invalid(); 7438 Address LB = Address::invalid(); 7439 bool IsArraySection = false; 7440 bool HasCompleteRecord = false; 7441 }; 7442 7443 private: 7444 /// Kind that defines how a device pointer has to be returned. 7445 struct MapInfo { 7446 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7447 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7448 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7449 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7450 bool ReturnDevicePointer = false; 7451 bool IsImplicit = false; 7452 const ValueDecl *Mapper = nullptr; 7453 const Expr *VarRef = nullptr; 7454 bool ForDeviceAddr = false; 7455 7456 MapInfo() = default; 7457 MapInfo( 7458 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7459 OpenMPMapClauseKind MapType, 7460 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7461 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7462 bool ReturnDevicePointer, bool IsImplicit, 7463 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7464 bool ForDeviceAddr = false) 7465 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7466 MotionModifiers(MotionModifiers), 7467 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7468 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7469 }; 7470 7471 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7472 /// member and there is no map information about it, then emission of that 7473 /// entry is deferred until the whole struct has been processed. 7474 struct DeferredDevicePtrEntryTy { 7475 const Expr *IE = nullptr; 7476 const ValueDecl *VD = nullptr; 7477 bool ForDeviceAddr = false; 7478 7479 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7480 bool ForDeviceAddr) 7481 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7482 }; 7483 7484 /// The target directive from where the mappable clauses were extracted. It 7485 /// is either a executable directive or a user-defined mapper directive. 7486 llvm::PointerUnion<const OMPExecutableDirective *, 7487 const OMPDeclareMapperDecl *> 7488 CurDir; 7489 7490 /// Function the directive is being generated for. 7491 CodeGenFunction &CGF; 7492 7493 /// Set of all first private variables in the current directive. 7494 /// bool data is set to true if the variable is implicitly marked as 7495 /// firstprivate, false otherwise. 7496 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7497 7498 /// Map between device pointer declarations and their expression components. 7499 /// The key value for declarations in 'this' is null. 7500 llvm::DenseMap< 7501 const ValueDecl *, 7502 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7503 DevPointersMap; 7504 7505 /// Map between lambda declarations and their map type. 7506 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7507 7508 llvm::Value *getExprTypeSize(const Expr *E) const { 7509 QualType ExprTy = E->getType().getCanonicalType(); 7510 7511 // Calculate the size for array shaping expression. 7512 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7513 llvm::Value *Size = 7514 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7515 for (const Expr *SE : OAE->getDimensions()) { 7516 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7517 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7518 CGF.getContext().getSizeType(), 7519 SE->getExprLoc()); 7520 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7521 } 7522 return Size; 7523 } 7524 7525 // Reference types are ignored for mapping purposes. 7526 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7527 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7528 7529 // Given that an array section is considered a built-in type, we need to 7530 // do the calculation based on the length of the section instead of relying 7531 // on CGF.getTypeSize(E->getType()). 7532 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7533 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7534 OAE->getBase()->IgnoreParenImpCasts()) 7535 .getCanonicalType(); 7536 7537 // If there is no length associated with the expression and lower bound is 7538 // not specified too, that means we are using the whole length of the 7539 // base. 7540 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7541 !OAE->getLowerBound()) 7542 return CGF.getTypeSize(BaseTy); 7543 7544 llvm::Value *ElemSize; 7545 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7546 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7547 } else { 7548 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7549 assert(ATy && "Expecting array type if not a pointer type."); 7550 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7551 } 7552 7553 // If we don't have a length at this point, that is because we have an 7554 // array section with a single element. 7555 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7556 return ElemSize; 7557 7558 if (const Expr *LenExpr = OAE->getLength()) { 7559 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7560 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7561 CGF.getContext().getSizeType(), 7562 LenExpr->getExprLoc()); 7563 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7564 } 7565 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7566 OAE->getLowerBound() && "expected array_section[lb:]."); 7567 // Size = sizetype - lb * elemtype; 7568 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7569 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7570 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7571 CGF.getContext().getSizeType(), 7572 OAE->getLowerBound()->getExprLoc()); 7573 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7574 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7575 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7576 LengthVal = CGF.Builder.CreateSelect( 7577 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7578 return LengthVal; 7579 } 7580 return CGF.getTypeSize(ExprTy); 7581 } 7582 7583 /// Return the corresponding bits for a given map clause modifier. Add 7584 /// a flag marking the map as a pointer if requested. Add a flag marking the 7585 /// map as the first one of a series of maps that relate to the same map 7586 /// expression. 7587 OpenMPOffloadMappingFlags getMapTypeBits( 7588 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7589 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7590 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7591 OpenMPOffloadMappingFlags Bits = 7592 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7593 switch (MapType) { 7594 case OMPC_MAP_alloc: 7595 case OMPC_MAP_release: 7596 // alloc and release is the default behavior in the runtime library, i.e. 7597 // if we don't pass any bits alloc/release that is what the runtime is 7598 // going to do. Therefore, we don't need to signal anything for these two 7599 // type modifiers. 7600 break; 7601 case OMPC_MAP_to: 7602 Bits |= OMP_MAP_TO; 7603 break; 7604 case OMPC_MAP_from: 7605 Bits |= OMP_MAP_FROM; 7606 break; 7607 case OMPC_MAP_tofrom: 7608 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7609 break; 7610 case OMPC_MAP_delete: 7611 Bits |= OMP_MAP_DELETE; 7612 break; 7613 case OMPC_MAP_unknown: 7614 llvm_unreachable("Unexpected map type!"); 7615 } 7616 if (AddPtrFlag) 7617 Bits |= OMP_MAP_PTR_AND_OBJ; 7618 if (AddIsTargetParamFlag) 7619 Bits |= OMP_MAP_TARGET_PARAM; 7620 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7621 Bits |= OMP_MAP_ALWAYS; 7622 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7623 Bits |= OMP_MAP_CLOSE; 7624 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7625 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7626 Bits |= OMP_MAP_PRESENT; 7627 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7628 Bits |= OMP_MAP_OMPX_HOLD; 7629 if (IsNonContiguous) 7630 Bits |= OMP_MAP_NON_CONTIG; 7631 return Bits; 7632 } 7633 7634 /// Return true if the provided expression is a final array section. A 7635 /// final array section, is one whose length can't be proved to be one. 7636 bool isFinalArraySectionExpression(const Expr *E) const { 7637 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7638 7639 // It is not an array section and therefore not a unity-size one. 7640 if (!OASE) 7641 return false; 7642 7643 // An array section with no colon always refer to a single element. 7644 if (OASE->getColonLocFirst().isInvalid()) 7645 return false; 7646 7647 const Expr *Length = OASE->getLength(); 7648 7649 // If we don't have a length we have to check if the array has size 1 7650 // for this dimension. Also, we should always expect a length if the 7651 // base type is pointer. 7652 if (!Length) { 7653 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7654 OASE->getBase()->IgnoreParenImpCasts()) 7655 .getCanonicalType(); 7656 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7657 return ATy->getSize().getSExtValue() != 1; 7658 // If we don't have a constant dimension length, we have to consider 7659 // the current section as having any size, so it is not necessarily 7660 // unitary. If it happen to be unity size, that's user fault. 7661 return true; 7662 } 7663 7664 // Check if the length evaluates to 1. 7665 Expr::EvalResult Result; 7666 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7667 return true; // Can have more that size 1. 7668 7669 llvm::APSInt ConstLength = Result.Val.getInt(); 7670 return ConstLength.getSExtValue() != 1; 7671 } 7672 7673 /// Generate the base pointers, section pointers, sizes, map type bits, and 7674 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7675 /// map type, map or motion modifiers, and expression components. 7676 /// \a IsFirstComponent should be set to true if the provided set of 7677 /// components is the first associated with a capture. 7678 void generateInfoForComponentList( 7679 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7680 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7681 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7682 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7683 bool IsFirstComponentList, bool IsImplicit, 7684 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7685 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7686 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7687 OverlappedElements = llvm::None) const { 7688 // The following summarizes what has to be generated for each map and the 7689 // types below. The generated information is expressed in this order: 7690 // base pointer, section pointer, size, flags 7691 // (to add to the ones that come from the map type and modifier). 7692 // 7693 // double d; 7694 // int i[100]; 7695 // float *p; 7696 // 7697 // struct S1 { 7698 // int i; 7699 // float f[50]; 7700 // } 7701 // struct S2 { 7702 // int i; 7703 // float f[50]; 7704 // S1 s; 7705 // double *p; 7706 // struct S2 *ps; 7707 // int &ref; 7708 // } 7709 // S2 s; 7710 // S2 *ps; 7711 // 7712 // map(d) 7713 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7714 // 7715 // map(i) 7716 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7717 // 7718 // map(i[1:23]) 7719 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7720 // 7721 // map(p) 7722 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7723 // 7724 // map(p[1:24]) 7725 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7726 // in unified shared memory mode or for local pointers 7727 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7728 // 7729 // map(s) 7730 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7731 // 7732 // map(s.i) 7733 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7734 // 7735 // map(s.s.f) 7736 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7737 // 7738 // map(s.p) 7739 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7740 // 7741 // map(to: s.p[:22]) 7742 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7743 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7744 // &(s.p), &(s.p[0]), 22*sizeof(double), 7745 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7746 // (*) alloc space for struct members, only this is a target parameter 7747 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7748 // optimizes this entry out, same in the examples below) 7749 // (***) map the pointee (map: to) 7750 // 7751 // map(to: s.ref) 7752 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7753 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7754 // (*) alloc space for struct members, only this is a target parameter 7755 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7756 // optimizes this entry out, same in the examples below) 7757 // (***) map the pointee (map: to) 7758 // 7759 // map(s.ps) 7760 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7761 // 7762 // map(from: s.ps->s.i) 7763 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7764 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7765 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7766 // 7767 // map(to: s.ps->ps) 7768 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7769 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7770 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7771 // 7772 // map(s.ps->ps->ps) 7773 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7774 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7775 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7776 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7777 // 7778 // map(to: s.ps->ps->s.f[:22]) 7779 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7780 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7781 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7782 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7783 // 7784 // map(ps) 7785 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7786 // 7787 // map(ps->i) 7788 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7789 // 7790 // map(ps->s.f) 7791 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7792 // 7793 // map(from: ps->p) 7794 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7795 // 7796 // map(to: ps->p[:22]) 7797 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7798 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7799 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7800 // 7801 // map(ps->ps) 7802 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7803 // 7804 // map(from: ps->ps->s.i) 7805 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7806 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7807 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7808 // 7809 // map(from: ps->ps->ps) 7810 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7811 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7812 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7813 // 7814 // map(ps->ps->ps->ps) 7815 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7816 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7817 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7818 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7819 // 7820 // map(to: ps->ps->ps->s.f[:22]) 7821 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7822 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7823 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7824 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7825 // 7826 // map(to: s.f[:22]) map(from: s.p[:33]) 7827 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7828 // sizeof(double*) (**), TARGET_PARAM 7829 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7830 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7831 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7832 // (*) allocate contiguous space needed to fit all mapped members even if 7833 // we allocate space for members not mapped (in this example, 7834 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7835 // them as well because they fall between &s.f[0] and &s.p) 7836 // 7837 // map(from: s.f[:22]) map(to: ps->p[:33]) 7838 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7839 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7840 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7841 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7842 // (*) the struct this entry pertains to is the 2nd element in the list of 7843 // arguments, hence MEMBER_OF(2) 7844 // 7845 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7846 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7847 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7848 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7849 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7850 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7851 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7852 // (*) the struct this entry pertains to is the 4th element in the list 7853 // of arguments, hence MEMBER_OF(4) 7854 7855 // Track if the map information being generated is the first for a capture. 7856 bool IsCaptureFirstInfo = IsFirstComponentList; 7857 // When the variable is on a declare target link or in a to clause with 7858 // unified memory, a reference is needed to hold the host/device address 7859 // of the variable. 7860 bool RequiresReference = false; 7861 7862 // Scan the components from the base to the complete expression. 7863 auto CI = Components.rbegin(); 7864 auto CE = Components.rend(); 7865 auto I = CI; 7866 7867 // Track if the map information being generated is the first for a list of 7868 // components. 7869 bool IsExpressionFirstInfo = true; 7870 bool FirstPointerInComplexData = false; 7871 Address BP = Address::invalid(); 7872 const Expr *AssocExpr = I->getAssociatedExpression(); 7873 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7874 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7875 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7876 7877 if (isa<MemberExpr>(AssocExpr)) { 7878 // The base is the 'this' pointer. The content of the pointer is going 7879 // to be the base of the field being mapped. 7880 BP = CGF.LoadCXXThisAddress(); 7881 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7882 (OASE && 7883 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7884 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7885 } else if (OAShE && 7886 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7887 BP = Address::deprecated( 7888 CGF.EmitScalarExpr(OAShE->getBase()), 7889 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7890 } else { 7891 // The base is the reference to the variable. 7892 // BP = &Var. 7893 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7894 if (const auto *VD = 7895 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7896 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7897 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7898 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7899 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7900 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7901 RequiresReference = true; 7902 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7903 } 7904 } 7905 } 7906 7907 // If the variable is a pointer and is being dereferenced (i.e. is not 7908 // the last component), the base has to be the pointer itself, not its 7909 // reference. References are ignored for mapping purposes. 7910 QualType Ty = 7911 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7912 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7913 // No need to generate individual map information for the pointer, it 7914 // can be associated with the combined storage if shared memory mode is 7915 // active or the base declaration is not global variable. 7916 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7917 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7918 !VD || VD->hasLocalStorage()) 7919 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7920 else 7921 FirstPointerInComplexData = true; 7922 ++I; 7923 } 7924 } 7925 7926 // Track whether a component of the list should be marked as MEMBER_OF some 7927 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7928 // in a component list should be marked as MEMBER_OF, all subsequent entries 7929 // do not belong to the base struct. E.g. 7930 // struct S2 s; 7931 // s.ps->ps->ps->f[:] 7932 // (1) (2) (3) (4) 7933 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7934 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7935 // is the pointee of ps(2) which is not member of struct s, so it should not 7936 // be marked as such (it is still PTR_AND_OBJ). 7937 // The variable is initialized to false so that PTR_AND_OBJ entries which 7938 // are not struct members are not considered (e.g. array of pointers to 7939 // data). 7940 bool ShouldBeMemberOf = false; 7941 7942 // Variable keeping track of whether or not we have encountered a component 7943 // in the component list which is a member expression. Useful when we have a 7944 // pointer or a final array section, in which case it is the previous 7945 // component in the list which tells us whether we have a member expression. 7946 // E.g. X.f[:] 7947 // While processing the final array section "[:]" it is "f" which tells us 7948 // whether we are dealing with a member of a declared struct. 7949 const MemberExpr *EncounteredME = nullptr; 7950 7951 // Track for the total number of dimension. Start from one for the dummy 7952 // dimension. 7953 uint64_t DimSize = 1; 7954 7955 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7956 bool IsPrevMemberReference = false; 7957 7958 for (; I != CE; ++I) { 7959 // If the current component is member of a struct (parent struct) mark it. 7960 if (!EncounteredME) { 7961 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7962 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7963 // as MEMBER_OF the parent struct. 7964 if (EncounteredME) { 7965 ShouldBeMemberOf = true; 7966 // Do not emit as complex pointer if this is actually not array-like 7967 // expression. 7968 if (FirstPointerInComplexData) { 7969 QualType Ty = std::prev(I) 7970 ->getAssociatedDeclaration() 7971 ->getType() 7972 .getNonReferenceType(); 7973 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7974 FirstPointerInComplexData = false; 7975 } 7976 } 7977 } 7978 7979 auto Next = std::next(I); 7980 7981 // We need to generate the addresses and sizes if this is the last 7982 // component, if the component is a pointer or if it is an array section 7983 // whose length can't be proved to be one. If this is a pointer, it 7984 // becomes the base address for the following components. 7985 7986 // A final array section, is one whose length can't be proved to be one. 7987 // If the map item is non-contiguous then we don't treat any array section 7988 // as final array section. 7989 bool IsFinalArraySection = 7990 !IsNonContiguous && 7991 isFinalArraySectionExpression(I->getAssociatedExpression()); 7992 7993 // If we have a declaration for the mapping use that, otherwise use 7994 // the base declaration of the map clause. 7995 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7996 ? I->getAssociatedDeclaration() 7997 : BaseDecl; 7998 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7999 : MapExpr; 8000 8001 // Get information on whether the element is a pointer. Have to do a 8002 // special treatment for array sections given that they are built-in 8003 // types. 8004 const auto *OASE = 8005 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 8006 const auto *OAShE = 8007 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 8008 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 8009 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 8010 bool IsPointer = 8011 OAShE || 8012 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8013 .getCanonicalType() 8014 ->isAnyPointerType()) || 8015 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8016 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 8017 MapDecl && 8018 MapDecl->getType()->isLValueReferenceType(); 8019 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 8020 8021 if (OASE) 8022 ++DimSize; 8023 8024 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8025 IsFinalArraySection) { 8026 // If this is not the last component, we expect the pointer to be 8027 // associated with an array expression or member expression. 8028 assert((Next == CE || 8029 isa<MemberExpr>(Next->getAssociatedExpression()) || 8030 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8031 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8032 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8033 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8034 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8035 "Unexpected expression"); 8036 8037 Address LB = Address::invalid(); 8038 Address LowestElem = Address::invalid(); 8039 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8040 const MemberExpr *E) { 8041 const Expr *BaseExpr = E->getBase(); 8042 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8043 // scalar. 8044 LValue BaseLV; 8045 if (E->isArrow()) { 8046 LValueBaseInfo BaseInfo; 8047 TBAAAccessInfo TBAAInfo; 8048 Address Addr = 8049 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8050 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8051 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8052 } else { 8053 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8054 } 8055 return BaseLV; 8056 }; 8057 if (OAShE) { 8058 LowestElem = LB = 8059 Address::deprecated(CGF.EmitScalarExpr(OAShE->getBase()), 8060 CGF.getContext().getTypeAlignInChars( 8061 OAShE->getBase()->getType())); 8062 } else if (IsMemberReference) { 8063 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8064 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8065 LowestElem = CGF.EmitLValueForFieldInitialization( 8066 BaseLVal, cast<FieldDecl>(MapDecl)) 8067 .getAddress(CGF); 8068 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8069 .getAddress(CGF); 8070 } else { 8071 LowestElem = LB = 8072 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8073 .getAddress(CGF); 8074 } 8075 8076 // If this component is a pointer inside the base struct then we don't 8077 // need to create any entry for it - it will be combined with the object 8078 // it is pointing to into a single PTR_AND_OBJ entry. 8079 bool IsMemberPointerOrAddr = 8080 EncounteredME && 8081 (((IsPointer || ForDeviceAddr) && 8082 I->getAssociatedExpression() == EncounteredME) || 8083 (IsPrevMemberReference && !IsPointer) || 8084 (IsMemberReference && Next != CE && 8085 !Next->getAssociatedExpression()->getType()->isPointerType())); 8086 if (!OverlappedElements.empty() && Next == CE) { 8087 // Handle base element with the info for overlapped elements. 8088 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8089 assert(!IsPointer && 8090 "Unexpected base element with the pointer type."); 8091 // Mark the whole struct as the struct that requires allocation on the 8092 // device. 8093 PartialStruct.LowestElem = {0, LowestElem}; 8094 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8095 I->getAssociatedExpression()->getType()); 8096 Address HB = CGF.Builder.CreateConstGEP( 8097 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8098 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 8099 TypeSize.getQuantity() - 1); 8100 PartialStruct.HighestElem = { 8101 std::numeric_limits<decltype( 8102 PartialStruct.HighestElem.first)>::max(), 8103 HB}; 8104 PartialStruct.Base = BP; 8105 PartialStruct.LB = LB; 8106 assert( 8107 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8108 "Overlapped elements must be used only once for the variable."); 8109 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8110 // Emit data for non-overlapped data. 8111 OpenMPOffloadMappingFlags Flags = 8112 OMP_MAP_MEMBER_OF | 8113 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8114 /*AddPtrFlag=*/false, 8115 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8116 llvm::Value *Size = nullptr; 8117 // Do bitcopy of all non-overlapped structure elements. 8118 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8119 Component : OverlappedElements) { 8120 Address ComponentLB = Address::invalid(); 8121 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8122 Component) { 8123 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8124 const auto *FD = dyn_cast<FieldDecl>(VD); 8125 if (FD && FD->getType()->isLValueReferenceType()) { 8126 const auto *ME = 8127 cast<MemberExpr>(MC.getAssociatedExpression()); 8128 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8129 ComponentLB = 8130 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8131 .getAddress(CGF); 8132 } else { 8133 ComponentLB = 8134 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8135 .getAddress(CGF); 8136 } 8137 Size = CGF.Builder.CreatePtrDiff( 8138 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8139 CGF.EmitCastToVoidPtr(LB.getPointer())); 8140 break; 8141 } 8142 } 8143 assert(Size && "Failed to determine structure size"); 8144 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8145 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8146 CombinedInfo.Pointers.push_back(LB.getPointer()); 8147 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8148 Size, CGF.Int64Ty, /*isSigned=*/true)); 8149 CombinedInfo.Types.push_back(Flags); 8150 CombinedInfo.Mappers.push_back(nullptr); 8151 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8152 : 1); 8153 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8154 } 8155 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8156 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8157 CombinedInfo.Pointers.push_back(LB.getPointer()); 8158 Size = CGF.Builder.CreatePtrDiff( 8159 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8160 CGF.EmitCastToVoidPtr(LB.getPointer())); 8161 CombinedInfo.Sizes.push_back( 8162 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8163 CombinedInfo.Types.push_back(Flags); 8164 CombinedInfo.Mappers.push_back(nullptr); 8165 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8166 : 1); 8167 break; 8168 } 8169 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8170 if (!IsMemberPointerOrAddr || 8171 (Next == CE && MapType != OMPC_MAP_unknown)) { 8172 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8173 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8174 CombinedInfo.Pointers.push_back(LB.getPointer()); 8175 CombinedInfo.Sizes.push_back( 8176 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8177 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8178 : 1); 8179 8180 // If Mapper is valid, the last component inherits the mapper. 8181 bool HasMapper = Mapper && Next == CE; 8182 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8183 8184 // We need to add a pointer flag for each map that comes from the 8185 // same expression except for the first one. We also need to signal 8186 // this map is the first one that relates with the current capture 8187 // (there is a set of entries for each capture). 8188 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8189 MapType, MapModifiers, MotionModifiers, IsImplicit, 8190 !IsExpressionFirstInfo || RequiresReference || 8191 FirstPointerInComplexData || IsMemberReference, 8192 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8193 8194 if (!IsExpressionFirstInfo || IsMemberReference) { 8195 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8196 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8197 if (IsPointer || (IsMemberReference && Next != CE)) 8198 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8199 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8200 8201 if (ShouldBeMemberOf) { 8202 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8203 // should be later updated with the correct value of MEMBER_OF. 8204 Flags |= OMP_MAP_MEMBER_OF; 8205 // From now on, all subsequent PTR_AND_OBJ entries should not be 8206 // marked as MEMBER_OF. 8207 ShouldBeMemberOf = false; 8208 } 8209 } 8210 8211 CombinedInfo.Types.push_back(Flags); 8212 } 8213 8214 // If we have encountered a member expression so far, keep track of the 8215 // mapped member. If the parent is "*this", then the value declaration 8216 // is nullptr. 8217 if (EncounteredME) { 8218 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8219 unsigned FieldIndex = FD->getFieldIndex(); 8220 8221 // Update info about the lowest and highest elements for this struct 8222 if (!PartialStruct.Base.isValid()) { 8223 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8224 if (IsFinalArraySection) { 8225 Address HB = 8226 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8227 .getAddress(CGF); 8228 PartialStruct.HighestElem = {FieldIndex, HB}; 8229 } else { 8230 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8231 } 8232 PartialStruct.Base = BP; 8233 PartialStruct.LB = BP; 8234 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8235 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8236 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8237 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8238 } 8239 } 8240 8241 // Need to emit combined struct for array sections. 8242 if (IsFinalArraySection || IsNonContiguous) 8243 PartialStruct.IsArraySection = true; 8244 8245 // If we have a final array section, we are done with this expression. 8246 if (IsFinalArraySection) 8247 break; 8248 8249 // The pointer becomes the base for the next element. 8250 if (Next != CE) 8251 BP = IsMemberReference ? LowestElem : LB; 8252 8253 IsExpressionFirstInfo = false; 8254 IsCaptureFirstInfo = false; 8255 FirstPointerInComplexData = false; 8256 IsPrevMemberReference = IsMemberReference; 8257 } else if (FirstPointerInComplexData) { 8258 QualType Ty = Components.rbegin() 8259 ->getAssociatedDeclaration() 8260 ->getType() 8261 .getNonReferenceType(); 8262 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8263 FirstPointerInComplexData = false; 8264 } 8265 } 8266 // If ran into the whole component - allocate the space for the whole 8267 // record. 8268 if (!EncounteredME) 8269 PartialStruct.HasCompleteRecord = true; 8270 8271 if (!IsNonContiguous) 8272 return; 8273 8274 const ASTContext &Context = CGF.getContext(); 8275 8276 // For supporting stride in array section, we need to initialize the first 8277 // dimension size as 1, first offset as 0, and first count as 1 8278 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8279 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8280 MapValuesArrayTy CurStrides; 8281 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8282 uint64_t ElementTypeSize; 8283 8284 // Collect Size information for each dimension and get the element size as 8285 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8286 // should be [10, 10] and the first stride is 4 btyes. 8287 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8288 Components) { 8289 const Expr *AssocExpr = Component.getAssociatedExpression(); 8290 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8291 8292 if (!OASE) 8293 continue; 8294 8295 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8296 auto *CAT = Context.getAsConstantArrayType(Ty); 8297 auto *VAT = Context.getAsVariableArrayType(Ty); 8298 8299 // We need all the dimension size except for the last dimension. 8300 assert((VAT || CAT || &Component == &*Components.begin()) && 8301 "Should be either ConstantArray or VariableArray if not the " 8302 "first Component"); 8303 8304 // Get element size if CurStrides is empty. 8305 if (CurStrides.empty()) { 8306 const Type *ElementType = nullptr; 8307 if (CAT) 8308 ElementType = CAT->getElementType().getTypePtr(); 8309 else if (VAT) 8310 ElementType = VAT->getElementType().getTypePtr(); 8311 else 8312 assert(&Component == &*Components.begin() && 8313 "Only expect pointer (non CAT or VAT) when this is the " 8314 "first Component"); 8315 // If ElementType is null, then it means the base is a pointer 8316 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8317 // for next iteration. 8318 if (ElementType) { 8319 // For the case that having pointer as base, we need to remove one 8320 // level of indirection. 8321 if (&Component != &*Components.begin()) 8322 ElementType = ElementType->getPointeeOrArrayElementType(); 8323 ElementTypeSize = 8324 Context.getTypeSizeInChars(ElementType).getQuantity(); 8325 CurStrides.push_back( 8326 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8327 } 8328 } 8329 // Get dimension value except for the last dimension since we don't need 8330 // it. 8331 if (DimSizes.size() < Components.size() - 1) { 8332 if (CAT) 8333 DimSizes.push_back(llvm::ConstantInt::get( 8334 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8335 else if (VAT) 8336 DimSizes.push_back(CGF.Builder.CreateIntCast( 8337 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8338 /*IsSigned=*/false)); 8339 } 8340 } 8341 8342 // Skip the dummy dimension since we have already have its information. 8343 auto *DI = DimSizes.begin() + 1; 8344 // Product of dimension. 8345 llvm::Value *DimProd = 8346 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8347 8348 // Collect info for non-contiguous. Notice that offset, count, and stride 8349 // are only meaningful for array-section, so we insert a null for anything 8350 // other than array-section. 8351 // Also, the size of offset, count, and stride are not the same as 8352 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8353 // count, and stride are the same as the number of non-contiguous 8354 // declaration in target update to/from clause. 8355 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8356 Components) { 8357 const Expr *AssocExpr = Component.getAssociatedExpression(); 8358 8359 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8360 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8361 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8362 /*isSigned=*/false); 8363 CurOffsets.push_back(Offset); 8364 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8365 CurStrides.push_back(CurStrides.back()); 8366 continue; 8367 } 8368 8369 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8370 8371 if (!OASE) 8372 continue; 8373 8374 // Offset 8375 const Expr *OffsetExpr = OASE->getLowerBound(); 8376 llvm::Value *Offset = nullptr; 8377 if (!OffsetExpr) { 8378 // If offset is absent, then we just set it to zero. 8379 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8380 } else { 8381 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8382 CGF.Int64Ty, 8383 /*isSigned=*/false); 8384 } 8385 CurOffsets.push_back(Offset); 8386 8387 // Count 8388 const Expr *CountExpr = OASE->getLength(); 8389 llvm::Value *Count = nullptr; 8390 if (!CountExpr) { 8391 // In Clang, once a high dimension is an array section, we construct all 8392 // the lower dimension as array section, however, for case like 8393 // arr[0:2][2], Clang construct the inner dimension as an array section 8394 // but it actually is not in an array section form according to spec. 8395 if (!OASE->getColonLocFirst().isValid() && 8396 !OASE->getColonLocSecond().isValid()) { 8397 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8398 } else { 8399 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8400 // When the length is absent it defaults to ⌈(size − 8401 // lower-bound)/stride⌉, where size is the size of the array 8402 // dimension. 8403 const Expr *StrideExpr = OASE->getStride(); 8404 llvm::Value *Stride = 8405 StrideExpr 8406 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8407 CGF.Int64Ty, /*isSigned=*/false) 8408 : nullptr; 8409 if (Stride) 8410 Count = CGF.Builder.CreateUDiv( 8411 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8412 else 8413 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8414 } 8415 } else { 8416 Count = CGF.EmitScalarExpr(CountExpr); 8417 } 8418 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8419 CurCounts.push_back(Count); 8420 8421 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8422 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8423 // Offset Count Stride 8424 // D0 0 1 4 (int) <- dummy dimension 8425 // D1 0 2 8 (2 * (1) * 4) 8426 // D2 1 2 20 (1 * (1 * 5) * 4) 8427 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8428 const Expr *StrideExpr = OASE->getStride(); 8429 llvm::Value *Stride = 8430 StrideExpr 8431 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8432 CGF.Int64Ty, /*isSigned=*/false) 8433 : nullptr; 8434 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8435 if (Stride) 8436 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8437 else 8438 CurStrides.push_back(DimProd); 8439 if (DI != DimSizes.end()) 8440 ++DI; 8441 } 8442 8443 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8444 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8445 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8446 } 8447 8448 /// Return the adjusted map modifiers if the declaration a capture refers to 8449 /// appears in a first-private clause. This is expected to be used only with 8450 /// directives that start with 'target'. 8451 MappableExprsHandler::OpenMPOffloadMappingFlags 8452 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8453 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8454 8455 // A first private variable captured by reference will use only the 8456 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8457 // declaration is known as first-private in this handler. 8458 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8459 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8460 return MappableExprsHandler::OMP_MAP_TO | 8461 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8462 return MappableExprsHandler::OMP_MAP_PRIVATE | 8463 MappableExprsHandler::OMP_MAP_TO; 8464 } 8465 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8466 if (I != LambdasMap.end()) 8467 // for map(to: lambda): using user specified map type. 8468 return getMapTypeBits( 8469 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8470 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8471 /*AddPtrFlag=*/false, 8472 /*AddIsTargetParamFlag=*/false, 8473 /*isNonContiguous=*/false); 8474 return MappableExprsHandler::OMP_MAP_TO | 8475 MappableExprsHandler::OMP_MAP_FROM; 8476 } 8477 8478 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8479 // Rotate by getFlagMemberOffset() bits. 8480 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8481 << getFlagMemberOffset()); 8482 } 8483 8484 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8485 OpenMPOffloadMappingFlags MemberOfFlag) { 8486 // If the entry is PTR_AND_OBJ but has not been marked with the special 8487 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8488 // marked as MEMBER_OF. 8489 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8490 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8491 return; 8492 8493 // Reset the placeholder value to prepare the flag for the assignment of the 8494 // proper MEMBER_OF value. 8495 Flags &= ~OMP_MAP_MEMBER_OF; 8496 Flags |= MemberOfFlag; 8497 } 8498 8499 void getPlainLayout(const CXXRecordDecl *RD, 8500 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8501 bool AsBase) const { 8502 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8503 8504 llvm::StructType *St = 8505 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8506 8507 unsigned NumElements = St->getNumElements(); 8508 llvm::SmallVector< 8509 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8510 RecordLayout(NumElements); 8511 8512 // Fill bases. 8513 for (const auto &I : RD->bases()) { 8514 if (I.isVirtual()) 8515 continue; 8516 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8517 // Ignore empty bases. 8518 if (Base->isEmpty() || CGF.getContext() 8519 .getASTRecordLayout(Base) 8520 .getNonVirtualSize() 8521 .isZero()) 8522 continue; 8523 8524 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8525 RecordLayout[FieldIndex] = Base; 8526 } 8527 // Fill in virtual bases. 8528 for (const auto &I : RD->vbases()) { 8529 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8530 // Ignore empty bases. 8531 if (Base->isEmpty()) 8532 continue; 8533 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8534 if (RecordLayout[FieldIndex]) 8535 continue; 8536 RecordLayout[FieldIndex] = Base; 8537 } 8538 // Fill in all the fields. 8539 assert(!RD->isUnion() && "Unexpected union."); 8540 for (const auto *Field : RD->fields()) { 8541 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8542 // will fill in later.) 8543 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8544 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8545 RecordLayout[FieldIndex] = Field; 8546 } 8547 } 8548 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8549 &Data : RecordLayout) { 8550 if (Data.isNull()) 8551 continue; 8552 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8553 getPlainLayout(Base, Layout, /*AsBase=*/true); 8554 else 8555 Layout.push_back(Data.get<const FieldDecl *>()); 8556 } 8557 } 8558 8559 /// Generate all the base pointers, section pointers, sizes, map types, and 8560 /// mappers for the extracted mappable expressions (all included in \a 8561 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8562 /// pair of the relevant declaration and index where it occurs is appended to 8563 /// the device pointers info array. 8564 void generateAllInfoForClauses( 8565 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8566 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8567 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8568 // We have to process the component lists that relate with the same 8569 // declaration in a single chunk so that we can generate the map flags 8570 // correctly. Therefore, we organize all lists in a map. 8571 enum MapKind { Present, Allocs, Other, Total }; 8572 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8573 SmallVector<SmallVector<MapInfo, 8>, 4>> 8574 Info; 8575 8576 // Helper function to fill the information map for the different supported 8577 // clauses. 8578 auto &&InfoGen = 8579 [&Info, &SkipVarSet]( 8580 const ValueDecl *D, MapKind Kind, 8581 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8582 OpenMPMapClauseKind MapType, 8583 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8584 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8585 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8586 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8587 if (SkipVarSet.contains(D)) 8588 return; 8589 auto It = Info.find(D); 8590 if (It == Info.end()) 8591 It = Info 8592 .insert(std::make_pair( 8593 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8594 .first; 8595 It->second[Kind].emplace_back( 8596 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8597 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8598 }; 8599 8600 for (const auto *Cl : Clauses) { 8601 const auto *C = dyn_cast<OMPMapClause>(Cl); 8602 if (!C) 8603 continue; 8604 MapKind Kind = Other; 8605 if (llvm::is_contained(C->getMapTypeModifiers(), 8606 OMPC_MAP_MODIFIER_present)) 8607 Kind = Present; 8608 else if (C->getMapType() == OMPC_MAP_alloc) 8609 Kind = Allocs; 8610 const auto *EI = C->getVarRefs().begin(); 8611 for (const auto L : C->component_lists()) { 8612 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8613 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8614 C->getMapTypeModifiers(), llvm::None, 8615 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8616 E); 8617 ++EI; 8618 } 8619 } 8620 for (const auto *Cl : Clauses) { 8621 const auto *C = dyn_cast<OMPToClause>(Cl); 8622 if (!C) 8623 continue; 8624 MapKind Kind = Other; 8625 if (llvm::is_contained(C->getMotionModifiers(), 8626 OMPC_MOTION_MODIFIER_present)) 8627 Kind = Present; 8628 const auto *EI = C->getVarRefs().begin(); 8629 for (const auto L : C->component_lists()) { 8630 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8631 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8632 C->isImplicit(), std::get<2>(L), *EI); 8633 ++EI; 8634 } 8635 } 8636 for (const auto *Cl : Clauses) { 8637 const auto *C = dyn_cast<OMPFromClause>(Cl); 8638 if (!C) 8639 continue; 8640 MapKind Kind = Other; 8641 if (llvm::is_contained(C->getMotionModifiers(), 8642 OMPC_MOTION_MODIFIER_present)) 8643 Kind = Present; 8644 const auto *EI = C->getVarRefs().begin(); 8645 for (const auto L : C->component_lists()) { 8646 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8647 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8648 C->isImplicit(), std::get<2>(L), *EI); 8649 ++EI; 8650 } 8651 } 8652 8653 // Look at the use_device_ptr clause information and mark the existing map 8654 // entries as such. If there is no map information for an entry in the 8655 // use_device_ptr list, we create one with map type 'alloc' and zero size 8656 // section. It is the user fault if that was not mapped before. If there is 8657 // no map information and the pointer is a struct member, then we defer the 8658 // emission of that entry until the whole struct has been processed. 8659 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8660 SmallVector<DeferredDevicePtrEntryTy, 4>> 8661 DeferredInfo; 8662 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8663 8664 for (const auto *Cl : Clauses) { 8665 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8666 if (!C) 8667 continue; 8668 for (const auto L : C->component_lists()) { 8669 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8670 std::get<1>(L); 8671 assert(!Components.empty() && 8672 "Not expecting empty list of components!"); 8673 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8674 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8675 const Expr *IE = Components.back().getAssociatedExpression(); 8676 // If the first component is a member expression, we have to look into 8677 // 'this', which maps to null in the map of map information. Otherwise 8678 // look directly for the information. 8679 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8680 8681 // We potentially have map information for this declaration already. 8682 // Look for the first set of components that refer to it. 8683 if (It != Info.end()) { 8684 bool Found = false; 8685 for (auto &Data : It->second) { 8686 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8687 return MI.Components.back().getAssociatedDeclaration() == VD; 8688 }); 8689 // If we found a map entry, signal that the pointer has to be 8690 // returned and move on to the next declaration. Exclude cases where 8691 // the base pointer is mapped as array subscript, array section or 8692 // array shaping. The base address is passed as a pointer to base in 8693 // this case and cannot be used as a base for use_device_ptr list 8694 // item. 8695 if (CI != Data.end()) { 8696 auto PrevCI = std::next(CI->Components.rbegin()); 8697 const auto *VarD = dyn_cast<VarDecl>(VD); 8698 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8699 isa<MemberExpr>(IE) || 8700 !VD->getType().getNonReferenceType()->isPointerType() || 8701 PrevCI == CI->Components.rend() || 8702 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8703 VarD->hasLocalStorage()) { 8704 CI->ReturnDevicePointer = true; 8705 Found = true; 8706 break; 8707 } 8708 } 8709 } 8710 if (Found) 8711 continue; 8712 } 8713 8714 // We didn't find any match in our map information - generate a zero 8715 // size array section - if the pointer is a struct member we defer this 8716 // action until the whole struct has been processed. 8717 if (isa<MemberExpr>(IE)) { 8718 // Insert the pointer into Info to be processed by 8719 // generateInfoForComponentList. Because it is a member pointer 8720 // without a pointee, no entry will be generated for it, therefore 8721 // we need to generate one after the whole struct has been processed. 8722 // Nonetheless, generateInfoForComponentList must be called to take 8723 // the pointer into account for the calculation of the range of the 8724 // partial struct. 8725 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8726 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8727 nullptr); 8728 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8729 } else { 8730 llvm::Value *Ptr = 8731 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8732 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8733 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8734 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8735 UseDevicePtrCombinedInfo.Sizes.push_back( 8736 llvm::Constant::getNullValue(CGF.Int64Ty)); 8737 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8738 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8739 } 8740 } 8741 } 8742 8743 // Look at the use_device_addr clause information and mark the existing map 8744 // entries as such. If there is no map information for an entry in the 8745 // use_device_addr list, we create one with map type 'alloc' and zero size 8746 // section. It is the user fault if that was not mapped before. If there is 8747 // no map information and the pointer is a struct member, then we defer the 8748 // emission of that entry until the whole struct has been processed. 8749 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8750 for (const auto *Cl : Clauses) { 8751 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8752 if (!C) 8753 continue; 8754 for (const auto L : C->component_lists()) { 8755 assert(!std::get<1>(L).empty() && 8756 "Not expecting empty list of components!"); 8757 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8758 if (!Processed.insert(VD).second) 8759 continue; 8760 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8761 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8762 // If the first component is a member expression, we have to look into 8763 // 'this', which maps to null in the map of map information. Otherwise 8764 // look directly for the information. 8765 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8766 8767 // We potentially have map information for this declaration already. 8768 // Look for the first set of components that refer to it. 8769 if (It != Info.end()) { 8770 bool Found = false; 8771 for (auto &Data : It->second) { 8772 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8773 return MI.Components.back().getAssociatedDeclaration() == VD; 8774 }); 8775 // If we found a map entry, signal that the pointer has to be 8776 // returned and move on to the next declaration. 8777 if (CI != Data.end()) { 8778 CI->ReturnDevicePointer = true; 8779 Found = true; 8780 break; 8781 } 8782 } 8783 if (Found) 8784 continue; 8785 } 8786 8787 // We didn't find any match in our map information - generate a zero 8788 // size array section - if the pointer is a struct member we defer this 8789 // action until the whole struct has been processed. 8790 if (isa<MemberExpr>(IE)) { 8791 // Insert the pointer into Info to be processed by 8792 // generateInfoForComponentList. Because it is a member pointer 8793 // without a pointee, no entry will be generated for it, therefore 8794 // we need to generate one after the whole struct has been processed. 8795 // Nonetheless, generateInfoForComponentList must be called to take 8796 // the pointer into account for the calculation of the range of the 8797 // partial struct. 8798 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8799 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8800 nullptr, nullptr, /*ForDeviceAddr=*/true); 8801 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8802 } else { 8803 llvm::Value *Ptr; 8804 if (IE->isGLValue()) 8805 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8806 else 8807 Ptr = CGF.EmitScalarExpr(IE); 8808 CombinedInfo.Exprs.push_back(VD); 8809 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8810 CombinedInfo.Pointers.push_back(Ptr); 8811 CombinedInfo.Sizes.push_back( 8812 llvm::Constant::getNullValue(CGF.Int64Ty)); 8813 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8814 CombinedInfo.Mappers.push_back(nullptr); 8815 } 8816 } 8817 } 8818 8819 for (const auto &Data : Info) { 8820 StructRangeInfoTy PartialStruct; 8821 // Temporary generated information. 8822 MapCombinedInfoTy CurInfo; 8823 const Decl *D = Data.first; 8824 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8825 for (const auto &M : Data.second) { 8826 for (const MapInfo &L : M) { 8827 assert(!L.Components.empty() && 8828 "Not expecting declaration with no component lists."); 8829 8830 // Remember the current base pointer index. 8831 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8832 CurInfo.NonContigInfo.IsNonContiguous = 8833 L.Components.back().isNonContiguous(); 8834 generateInfoForComponentList( 8835 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8836 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8837 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8838 8839 // If this entry relates with a device pointer, set the relevant 8840 // declaration and add the 'return pointer' flag. 8841 if (L.ReturnDevicePointer) { 8842 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8843 "Unexpected number of mapped base pointers."); 8844 8845 const ValueDecl *RelevantVD = 8846 L.Components.back().getAssociatedDeclaration(); 8847 assert(RelevantVD && 8848 "No relevant declaration related with device pointer??"); 8849 8850 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8851 RelevantVD); 8852 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8853 } 8854 } 8855 } 8856 8857 // Append any pending zero-length pointers which are struct members and 8858 // used with use_device_ptr or use_device_addr. 8859 auto CI = DeferredInfo.find(Data.first); 8860 if (CI != DeferredInfo.end()) { 8861 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8862 llvm::Value *BasePtr; 8863 llvm::Value *Ptr; 8864 if (L.ForDeviceAddr) { 8865 if (L.IE->isGLValue()) 8866 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8867 else 8868 Ptr = this->CGF.EmitScalarExpr(L.IE); 8869 BasePtr = Ptr; 8870 // Entry is RETURN_PARAM. Also, set the placeholder value 8871 // MEMBER_OF=FFFF so that the entry is later updated with the 8872 // correct value of MEMBER_OF. 8873 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8874 } else { 8875 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8876 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8877 L.IE->getExprLoc()); 8878 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8879 // placeholder value MEMBER_OF=FFFF so that the entry is later 8880 // updated with the correct value of MEMBER_OF. 8881 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8882 OMP_MAP_MEMBER_OF); 8883 } 8884 CurInfo.Exprs.push_back(L.VD); 8885 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8886 CurInfo.Pointers.push_back(Ptr); 8887 CurInfo.Sizes.push_back( 8888 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8889 CurInfo.Mappers.push_back(nullptr); 8890 } 8891 } 8892 // If there is an entry in PartialStruct it means we have a struct with 8893 // individual members mapped. Emit an extra combined entry. 8894 if (PartialStruct.Base.isValid()) { 8895 CurInfo.NonContigInfo.Dims.push_back(0); 8896 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8897 } 8898 8899 // We need to append the results of this capture to what we already 8900 // have. 8901 CombinedInfo.append(CurInfo); 8902 } 8903 // Append data for use_device_ptr clauses. 8904 CombinedInfo.append(UseDevicePtrCombinedInfo); 8905 } 8906 8907 public: 8908 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8909 : CurDir(&Dir), CGF(CGF) { 8910 // Extract firstprivate clause information. 8911 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8912 for (const auto *D : C->varlists()) 8913 FirstPrivateDecls.try_emplace( 8914 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8915 // Extract implicit firstprivates from uses_allocators clauses. 8916 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8917 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8918 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8919 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8920 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8921 /*Implicit=*/true); 8922 else if (const auto *VD = dyn_cast<VarDecl>( 8923 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8924 ->getDecl())) 8925 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8926 } 8927 } 8928 // Extract device pointer clause information. 8929 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8930 for (auto L : C->component_lists()) 8931 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8932 // Extract map information. 8933 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8934 if (C->getMapType() != OMPC_MAP_to) 8935 continue; 8936 for (auto L : C->component_lists()) { 8937 const ValueDecl *VD = std::get<0>(L); 8938 const auto *RD = VD ? VD->getType() 8939 .getCanonicalType() 8940 .getNonReferenceType() 8941 ->getAsCXXRecordDecl() 8942 : nullptr; 8943 if (RD && RD->isLambda()) 8944 LambdasMap.try_emplace(std::get<0>(L), C); 8945 } 8946 } 8947 } 8948 8949 /// Constructor for the declare mapper directive. 8950 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8951 : CurDir(&Dir), CGF(CGF) {} 8952 8953 /// Generate code for the combined entry if we have a partially mapped struct 8954 /// and take care of the mapping flags of the arguments corresponding to 8955 /// individual struct members. 8956 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8957 MapFlagsArrayTy &CurTypes, 8958 const StructRangeInfoTy &PartialStruct, 8959 const ValueDecl *VD = nullptr, 8960 bool NotTargetParams = true) const { 8961 if (CurTypes.size() == 1 && 8962 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8963 !PartialStruct.IsArraySection) 8964 return; 8965 Address LBAddr = PartialStruct.LowestElem.second; 8966 Address HBAddr = PartialStruct.HighestElem.second; 8967 if (PartialStruct.HasCompleteRecord) { 8968 LBAddr = PartialStruct.LB; 8969 HBAddr = PartialStruct.LB; 8970 } 8971 CombinedInfo.Exprs.push_back(VD); 8972 // Base is the base of the struct 8973 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8974 // Pointer is the address of the lowest element 8975 llvm::Value *LB = LBAddr.getPointer(); 8976 CombinedInfo.Pointers.push_back(LB); 8977 // There should not be a mapper for a combined entry. 8978 CombinedInfo.Mappers.push_back(nullptr); 8979 // Size is (addr of {highest+1} element) - (addr of lowest element) 8980 llvm::Value *HB = HBAddr.getPointer(); 8981 llvm::Value *HAddr = 8982 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8983 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8984 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8985 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8986 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8987 /*isSigned=*/false); 8988 CombinedInfo.Sizes.push_back(Size); 8989 // Map type is always TARGET_PARAM, if generate info for captures. 8990 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8991 : OMP_MAP_TARGET_PARAM); 8992 // If any element has the present modifier, then make sure the runtime 8993 // doesn't attempt to allocate the struct. 8994 if (CurTypes.end() != 8995 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8996 return Type & OMP_MAP_PRESENT; 8997 })) 8998 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8999 // Remove TARGET_PARAM flag from the first element 9000 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 9001 // If any element has the ompx_hold modifier, then make sure the runtime 9002 // uses the hold reference count for the struct as a whole so that it won't 9003 // be unmapped by an extra dynamic reference count decrement. Add it to all 9004 // elements as well so the runtime knows which reference count to check 9005 // when determining whether it's time for device-to-host transfers of 9006 // individual elements. 9007 if (CurTypes.end() != 9008 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9009 return Type & OMP_MAP_OMPX_HOLD; 9010 })) { 9011 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 9012 for (auto &M : CurTypes) 9013 M |= OMP_MAP_OMPX_HOLD; 9014 } 9015 9016 // All other current entries will be MEMBER_OF the combined entry 9017 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9018 // 0xFFFF in the MEMBER_OF field). 9019 OpenMPOffloadMappingFlags MemberOfFlag = 9020 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 9021 for (auto &M : CurTypes) 9022 setCorrectMemberOfFlag(M, MemberOfFlag); 9023 } 9024 9025 /// Generate all the base pointers, section pointers, sizes, map types, and 9026 /// mappers for the extracted mappable expressions (all included in \a 9027 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9028 /// pair of the relevant declaration and index where it occurs is appended to 9029 /// the device pointers info array. 9030 void generateAllInfo( 9031 MapCombinedInfoTy &CombinedInfo, 9032 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9033 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9034 assert(CurDir.is<const OMPExecutableDirective *>() && 9035 "Expect a executable directive"); 9036 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9037 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9038 } 9039 9040 /// Generate all the base pointers, section pointers, sizes, map types, and 9041 /// mappers for the extracted map clauses of user-defined mapper (all included 9042 /// in \a CombinedInfo). 9043 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9044 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9045 "Expect a declare mapper directive"); 9046 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9047 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9048 } 9049 9050 /// Emit capture info for lambdas for variables captured by reference. 9051 void generateInfoForLambdaCaptures( 9052 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9053 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9054 const auto *RD = VD->getType() 9055 .getCanonicalType() 9056 .getNonReferenceType() 9057 ->getAsCXXRecordDecl(); 9058 if (!RD || !RD->isLambda()) 9059 return; 9060 Address VDAddr = 9061 Address::deprecated(Arg, CGF.getContext().getDeclAlign(VD)); 9062 LValue VDLVal = CGF.MakeAddrLValue( 9063 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 9064 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9065 FieldDecl *ThisCapture = nullptr; 9066 RD->getCaptureFields(Captures, ThisCapture); 9067 if (ThisCapture) { 9068 LValue ThisLVal = 9069 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9070 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9071 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9072 VDLVal.getPointer(CGF)); 9073 CombinedInfo.Exprs.push_back(VD); 9074 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9075 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9076 CombinedInfo.Sizes.push_back( 9077 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9078 CGF.Int64Ty, /*isSigned=*/true)); 9079 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9080 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9081 CombinedInfo.Mappers.push_back(nullptr); 9082 } 9083 for (const LambdaCapture &LC : RD->captures()) { 9084 if (!LC.capturesVariable()) 9085 continue; 9086 const VarDecl *VD = LC.getCapturedVar(); 9087 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9088 continue; 9089 auto It = Captures.find(VD); 9090 assert(It != Captures.end() && "Found lambda capture without field."); 9091 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9092 if (LC.getCaptureKind() == LCK_ByRef) { 9093 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9094 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9095 VDLVal.getPointer(CGF)); 9096 CombinedInfo.Exprs.push_back(VD); 9097 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9098 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9099 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9100 CGF.getTypeSize( 9101 VD->getType().getCanonicalType().getNonReferenceType()), 9102 CGF.Int64Ty, /*isSigned=*/true)); 9103 } else { 9104 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9105 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9106 VDLVal.getPointer(CGF)); 9107 CombinedInfo.Exprs.push_back(VD); 9108 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9109 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9110 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9111 } 9112 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9113 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9114 CombinedInfo.Mappers.push_back(nullptr); 9115 } 9116 } 9117 9118 /// Set correct indices for lambdas captures. 9119 void adjustMemberOfForLambdaCaptures( 9120 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9121 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9122 MapFlagsArrayTy &Types) const { 9123 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9124 // Set correct member_of idx for all implicit lambda captures. 9125 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9126 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9127 continue; 9128 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9129 assert(BasePtr && "Unable to find base lambda address."); 9130 int TgtIdx = -1; 9131 for (unsigned J = I; J > 0; --J) { 9132 unsigned Idx = J - 1; 9133 if (Pointers[Idx] != BasePtr) 9134 continue; 9135 TgtIdx = Idx; 9136 break; 9137 } 9138 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9139 // All other current entries will be MEMBER_OF the combined entry 9140 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9141 // 0xFFFF in the MEMBER_OF field). 9142 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9143 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9144 } 9145 } 9146 9147 /// Generate the base pointers, section pointers, sizes, map types, and 9148 /// mappers associated to a given capture (all included in \a CombinedInfo). 9149 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9150 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9151 StructRangeInfoTy &PartialStruct) const { 9152 assert(!Cap->capturesVariableArrayType() && 9153 "Not expecting to generate map info for a variable array type!"); 9154 9155 // We need to know when we generating information for the first component 9156 const ValueDecl *VD = Cap->capturesThis() 9157 ? nullptr 9158 : Cap->getCapturedVar()->getCanonicalDecl(); 9159 9160 // for map(to: lambda): skip here, processing it in 9161 // generateDefaultMapInfo 9162 if (LambdasMap.count(VD)) 9163 return; 9164 9165 // If this declaration appears in a is_device_ptr clause we just have to 9166 // pass the pointer by value. If it is a reference to a declaration, we just 9167 // pass its value. 9168 if (DevPointersMap.count(VD)) { 9169 CombinedInfo.Exprs.push_back(VD); 9170 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9171 CombinedInfo.Pointers.push_back(Arg); 9172 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9173 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9174 /*isSigned=*/true)); 9175 CombinedInfo.Types.push_back( 9176 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9177 OMP_MAP_TARGET_PARAM); 9178 CombinedInfo.Mappers.push_back(nullptr); 9179 return; 9180 } 9181 9182 using MapData = 9183 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9184 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9185 const ValueDecl *, const Expr *>; 9186 SmallVector<MapData, 4> DeclComponentLists; 9187 assert(CurDir.is<const OMPExecutableDirective *>() && 9188 "Expect a executable directive"); 9189 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9190 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9191 const auto *EI = C->getVarRefs().begin(); 9192 for (const auto L : C->decl_component_lists(VD)) { 9193 const ValueDecl *VDecl, *Mapper; 9194 // The Expression is not correct if the mapping is implicit 9195 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9196 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9197 std::tie(VDecl, Components, Mapper) = L; 9198 assert(VDecl == VD && "We got information for the wrong declaration??"); 9199 assert(!Components.empty() && 9200 "Not expecting declaration with no component lists."); 9201 DeclComponentLists.emplace_back(Components, C->getMapType(), 9202 C->getMapTypeModifiers(), 9203 C->isImplicit(), Mapper, E); 9204 ++EI; 9205 } 9206 } 9207 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9208 const MapData &RHS) { 9209 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9210 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9211 bool HasPresent = 9212 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9213 bool HasAllocs = MapType == OMPC_MAP_alloc; 9214 MapModifiers = std::get<2>(RHS); 9215 MapType = std::get<1>(LHS); 9216 bool HasPresentR = 9217 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9218 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9219 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9220 }); 9221 9222 // Find overlapping elements (including the offset from the base element). 9223 llvm::SmallDenseMap< 9224 const MapData *, 9225 llvm::SmallVector< 9226 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9227 4> 9228 OverlappedData; 9229 size_t Count = 0; 9230 for (const MapData &L : DeclComponentLists) { 9231 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9232 OpenMPMapClauseKind MapType; 9233 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9234 bool IsImplicit; 9235 const ValueDecl *Mapper; 9236 const Expr *VarRef; 9237 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9238 L; 9239 ++Count; 9240 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9241 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9242 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9243 VarRef) = L1; 9244 auto CI = Components.rbegin(); 9245 auto CE = Components.rend(); 9246 auto SI = Components1.rbegin(); 9247 auto SE = Components1.rend(); 9248 for (; CI != CE && SI != SE; ++CI, ++SI) { 9249 if (CI->getAssociatedExpression()->getStmtClass() != 9250 SI->getAssociatedExpression()->getStmtClass()) 9251 break; 9252 // Are we dealing with different variables/fields? 9253 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9254 break; 9255 } 9256 // Found overlapping if, at least for one component, reached the head 9257 // of the components list. 9258 if (CI == CE || SI == SE) { 9259 // Ignore it if it is the same component. 9260 if (CI == CE && SI == SE) 9261 continue; 9262 const auto It = (SI == SE) ? CI : SI; 9263 // If one component is a pointer and another one is a kind of 9264 // dereference of this pointer (array subscript, section, dereference, 9265 // etc.), it is not an overlapping. 9266 // Same, if one component is a base and another component is a 9267 // dereferenced pointer memberexpr with the same base. 9268 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9269 (std::prev(It)->getAssociatedDeclaration() && 9270 std::prev(It) 9271 ->getAssociatedDeclaration() 9272 ->getType() 9273 ->isPointerType()) || 9274 (It->getAssociatedDeclaration() && 9275 It->getAssociatedDeclaration()->getType()->isPointerType() && 9276 std::next(It) != CE && std::next(It) != SE)) 9277 continue; 9278 const MapData &BaseData = CI == CE ? L : L1; 9279 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9280 SI == SE ? Components : Components1; 9281 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9282 OverlappedElements.getSecond().push_back(SubData); 9283 } 9284 } 9285 } 9286 // Sort the overlapped elements for each item. 9287 llvm::SmallVector<const FieldDecl *, 4> Layout; 9288 if (!OverlappedData.empty()) { 9289 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9290 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9291 while (BaseType != OrigType) { 9292 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9293 OrigType = BaseType->getPointeeOrArrayElementType(); 9294 } 9295 9296 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9297 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9298 else { 9299 const auto *RD = BaseType->getAsRecordDecl(); 9300 Layout.append(RD->field_begin(), RD->field_end()); 9301 } 9302 } 9303 for (auto &Pair : OverlappedData) { 9304 llvm::stable_sort( 9305 Pair.getSecond(), 9306 [&Layout]( 9307 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9308 OMPClauseMappableExprCommon::MappableExprComponentListRef 9309 Second) { 9310 auto CI = First.rbegin(); 9311 auto CE = First.rend(); 9312 auto SI = Second.rbegin(); 9313 auto SE = Second.rend(); 9314 for (; CI != CE && SI != SE; ++CI, ++SI) { 9315 if (CI->getAssociatedExpression()->getStmtClass() != 9316 SI->getAssociatedExpression()->getStmtClass()) 9317 break; 9318 // Are we dealing with different variables/fields? 9319 if (CI->getAssociatedDeclaration() != 9320 SI->getAssociatedDeclaration()) 9321 break; 9322 } 9323 9324 // Lists contain the same elements. 9325 if (CI == CE && SI == SE) 9326 return false; 9327 9328 // List with less elements is less than list with more elements. 9329 if (CI == CE || SI == SE) 9330 return CI == CE; 9331 9332 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9333 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9334 if (FD1->getParent() == FD2->getParent()) 9335 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9336 const auto *It = 9337 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9338 return FD == FD1 || FD == FD2; 9339 }); 9340 return *It == FD1; 9341 }); 9342 } 9343 9344 // Associated with a capture, because the mapping flags depend on it. 9345 // Go through all of the elements with the overlapped elements. 9346 bool IsFirstComponentList = true; 9347 for (const auto &Pair : OverlappedData) { 9348 const MapData &L = *Pair.getFirst(); 9349 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9350 OpenMPMapClauseKind MapType; 9351 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9352 bool IsImplicit; 9353 const ValueDecl *Mapper; 9354 const Expr *VarRef; 9355 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9356 L; 9357 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9358 OverlappedComponents = Pair.getSecond(); 9359 generateInfoForComponentList( 9360 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9361 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9362 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9363 IsFirstComponentList = false; 9364 } 9365 // Go through other elements without overlapped elements. 9366 for (const MapData &L : DeclComponentLists) { 9367 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9368 OpenMPMapClauseKind MapType; 9369 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9370 bool IsImplicit; 9371 const ValueDecl *Mapper; 9372 const Expr *VarRef; 9373 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9374 L; 9375 auto It = OverlappedData.find(&L); 9376 if (It == OverlappedData.end()) 9377 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9378 Components, CombinedInfo, PartialStruct, 9379 IsFirstComponentList, IsImplicit, Mapper, 9380 /*ForDeviceAddr=*/false, VD, VarRef); 9381 IsFirstComponentList = false; 9382 } 9383 } 9384 9385 /// Generate the default map information for a given capture \a CI, 9386 /// record field declaration \a RI and captured value \a CV. 9387 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9388 const FieldDecl &RI, llvm::Value *CV, 9389 MapCombinedInfoTy &CombinedInfo) const { 9390 bool IsImplicit = true; 9391 // Do the default mapping. 9392 if (CI.capturesThis()) { 9393 CombinedInfo.Exprs.push_back(nullptr); 9394 CombinedInfo.BasePointers.push_back(CV); 9395 CombinedInfo.Pointers.push_back(CV); 9396 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9397 CombinedInfo.Sizes.push_back( 9398 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9399 CGF.Int64Ty, /*isSigned=*/true)); 9400 // Default map type. 9401 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9402 } else if (CI.capturesVariableByCopy()) { 9403 const VarDecl *VD = CI.getCapturedVar(); 9404 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9405 CombinedInfo.BasePointers.push_back(CV); 9406 CombinedInfo.Pointers.push_back(CV); 9407 if (!RI.getType()->isAnyPointerType()) { 9408 // We have to signal to the runtime captures passed by value that are 9409 // not pointers. 9410 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9411 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9412 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9413 } else { 9414 // Pointers are implicitly mapped with a zero size and no flags 9415 // (other than first map that is added for all implicit maps). 9416 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9417 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9418 } 9419 auto I = FirstPrivateDecls.find(VD); 9420 if (I != FirstPrivateDecls.end()) 9421 IsImplicit = I->getSecond(); 9422 } else { 9423 assert(CI.capturesVariable() && "Expected captured reference."); 9424 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9425 QualType ElementType = PtrTy->getPointeeType(); 9426 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9427 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9428 // The default map type for a scalar/complex type is 'to' because by 9429 // default the value doesn't have to be retrieved. For an aggregate 9430 // type, the default is 'tofrom'. 9431 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9432 const VarDecl *VD = CI.getCapturedVar(); 9433 auto I = FirstPrivateDecls.find(VD); 9434 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9435 CombinedInfo.BasePointers.push_back(CV); 9436 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9437 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9438 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9439 AlignmentSource::Decl)); 9440 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9441 } else { 9442 CombinedInfo.Pointers.push_back(CV); 9443 } 9444 if (I != FirstPrivateDecls.end()) 9445 IsImplicit = I->getSecond(); 9446 } 9447 // Every default map produces a single argument which is a target parameter. 9448 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9449 9450 // Add flag stating this is an implicit map. 9451 if (IsImplicit) 9452 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9453 9454 // No user-defined mapper for default mapping. 9455 CombinedInfo.Mappers.push_back(nullptr); 9456 } 9457 }; 9458 } // anonymous namespace 9459 9460 static void emitNonContiguousDescriptor( 9461 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9462 CGOpenMPRuntime::TargetDataInfo &Info) { 9463 CodeGenModule &CGM = CGF.CGM; 9464 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9465 &NonContigInfo = CombinedInfo.NonContigInfo; 9466 9467 // Build an array of struct descriptor_dim and then assign it to 9468 // offload_args. 9469 // 9470 // struct descriptor_dim { 9471 // uint64_t offset; 9472 // uint64_t count; 9473 // uint64_t stride 9474 // }; 9475 ASTContext &C = CGF.getContext(); 9476 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9477 RecordDecl *RD; 9478 RD = C.buildImplicitRecord("descriptor_dim"); 9479 RD->startDefinition(); 9480 addFieldToRecordDecl(C, RD, Int64Ty); 9481 addFieldToRecordDecl(C, RD, Int64Ty); 9482 addFieldToRecordDecl(C, RD, Int64Ty); 9483 RD->completeDefinition(); 9484 QualType DimTy = C.getRecordType(RD); 9485 9486 enum { OffsetFD = 0, CountFD, StrideFD }; 9487 // We need two index variable here since the size of "Dims" is the same as the 9488 // size of Components, however, the size of offset, count, and stride is equal 9489 // to the size of base declaration that is non-contiguous. 9490 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9491 // Skip emitting ir if dimension size is 1 since it cannot be 9492 // non-contiguous. 9493 if (NonContigInfo.Dims[I] == 1) 9494 continue; 9495 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9496 QualType ArrayTy = 9497 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9498 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9499 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9500 unsigned RevIdx = EE - II - 1; 9501 LValue DimsLVal = CGF.MakeAddrLValue( 9502 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9503 // Offset 9504 LValue OffsetLVal = CGF.EmitLValueForField( 9505 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9506 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9507 // Count 9508 LValue CountLVal = CGF.EmitLValueForField( 9509 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9510 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9511 // Stride 9512 LValue StrideLVal = CGF.EmitLValueForField( 9513 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9514 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9515 } 9516 // args[I] = &dims 9517 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9518 DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty); 9519 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9520 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9521 Info.PointersArray, 0, I); 9522 Address PAddr = Address::deprecated(P, CGF.getPointerAlign()); 9523 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9524 ++L; 9525 } 9526 } 9527 9528 // Try to extract the base declaration from a `this->x` expression if possible. 9529 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9530 if (!E) 9531 return nullptr; 9532 9533 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9534 if (const MemberExpr *ME = 9535 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9536 return ME->getMemberDecl(); 9537 return nullptr; 9538 } 9539 9540 /// Emit a string constant containing the names of the values mapped to the 9541 /// offloading runtime library. 9542 llvm::Constant * 9543 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9544 MappableExprsHandler::MappingExprInfo &MapExprs) { 9545 9546 uint32_t SrcLocStrSize; 9547 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9548 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9549 9550 SourceLocation Loc; 9551 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9552 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9553 Loc = VD->getLocation(); 9554 else 9555 Loc = MapExprs.getMapExpr()->getExprLoc(); 9556 } else { 9557 Loc = MapExprs.getMapDecl()->getLocation(); 9558 } 9559 9560 std::string ExprName; 9561 if (MapExprs.getMapExpr()) { 9562 PrintingPolicy P(CGF.getContext().getLangOpts()); 9563 llvm::raw_string_ostream OS(ExprName); 9564 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9565 OS.flush(); 9566 } else { 9567 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9568 } 9569 9570 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9571 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9572 PLoc.getLine(), PLoc.getColumn(), 9573 SrcLocStrSize); 9574 } 9575 9576 /// Emit the arrays used to pass the captures and map information to the 9577 /// offloading runtime library. If there is no map or capture information, 9578 /// return nullptr by reference. 9579 static void emitOffloadingArrays( 9580 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9581 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9582 bool IsNonContiguous = false) { 9583 CodeGenModule &CGM = CGF.CGM; 9584 ASTContext &Ctx = CGF.getContext(); 9585 9586 // Reset the array information. 9587 Info.clearArrayInfo(); 9588 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9589 9590 if (Info.NumberOfPtrs) { 9591 // Detect if we have any capture size requiring runtime evaluation of the 9592 // size so that a constant array could be eventually used. 9593 9594 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9595 QualType PointerArrayType = Ctx.getConstantArrayType( 9596 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9597 /*IndexTypeQuals=*/0); 9598 9599 Info.BasePointersArray = 9600 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9601 Info.PointersArray = 9602 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9603 Address MappersArray = 9604 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9605 Info.MappersArray = MappersArray.getPointer(); 9606 9607 // If we don't have any VLA types or other types that require runtime 9608 // evaluation, we can use a constant array for the map sizes, otherwise we 9609 // need to fill up the arrays as we do for the pointers. 9610 QualType Int64Ty = 9611 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9612 SmallVector<llvm::Constant *> ConstSizes( 9613 CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9614 llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size()); 9615 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9616 if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) { 9617 if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) { 9618 if (IsNonContiguous && (CombinedInfo.Types[I] & 9619 MappableExprsHandler::OMP_MAP_NON_CONTIG)) 9620 ConstSizes[I] = llvm::ConstantInt::get( 9621 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]); 9622 else 9623 ConstSizes[I] = CI; 9624 continue; 9625 } 9626 } 9627 RuntimeSizes.set(I); 9628 } 9629 9630 if (RuntimeSizes.all()) { 9631 QualType SizeArrayType = Ctx.getConstantArrayType( 9632 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9633 /*IndexTypeQuals=*/0); 9634 Info.SizesArray = 9635 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9636 } else { 9637 auto *SizesArrayInit = llvm::ConstantArray::get( 9638 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9639 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9640 auto *SizesArrayGbl = new llvm::GlobalVariable( 9641 CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, 9642 llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name); 9643 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9644 if (RuntimeSizes.any()) { 9645 QualType SizeArrayType = Ctx.getConstantArrayType( 9646 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9647 /*IndexTypeQuals=*/0); 9648 Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes"); 9649 llvm::Value *GblConstPtr = 9650 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9651 SizesArrayGbl, CGM.Int64Ty->getPointerTo()); 9652 CGF.Builder.CreateMemCpy( 9653 Buffer, 9654 Address(GblConstPtr, CGM.Int64Ty, 9655 CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth( 9656 /*DestWidth=*/64, /*Signed=*/false))), 9657 CGF.getTypeSize(SizeArrayType)); 9658 Info.SizesArray = Buffer.getPointer(); 9659 } else { 9660 Info.SizesArray = SizesArrayGbl; 9661 } 9662 } 9663 9664 // The map types are always constant so we don't need to generate code to 9665 // fill arrays. Instead, we create an array constant. 9666 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9667 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9668 std::string MaptypesName = 9669 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9670 auto *MapTypesArrayGbl = 9671 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9672 Info.MapTypesArray = MapTypesArrayGbl; 9673 9674 // The information types are only built if there is debug information 9675 // requested. 9676 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9677 Info.MapNamesArray = llvm::Constant::getNullValue( 9678 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9679 } else { 9680 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9681 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9682 }; 9683 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9684 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9685 std::string MapnamesName = 9686 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9687 auto *MapNamesArrayGbl = 9688 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9689 Info.MapNamesArray = MapNamesArrayGbl; 9690 } 9691 9692 // If there's a present map type modifier, it must not be applied to the end 9693 // of a region, so generate a separate map type array in that case. 9694 if (Info.separateBeginEndCalls()) { 9695 bool EndMapTypesDiffer = false; 9696 for (uint64_t &Type : Mapping) { 9697 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9698 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9699 EndMapTypesDiffer = true; 9700 } 9701 } 9702 if (EndMapTypesDiffer) { 9703 MapTypesArrayGbl = 9704 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9705 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9706 } 9707 } 9708 9709 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9710 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9711 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9712 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9713 Info.BasePointersArray, 0, I); 9714 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9715 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9716 Address BPAddr = 9717 Address::deprecated(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9718 CGF.Builder.CreateStore(BPVal, BPAddr); 9719 9720 if (Info.requiresDevicePointerInfo()) 9721 if (const ValueDecl *DevVD = 9722 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9723 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9724 9725 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9726 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9727 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9728 Info.PointersArray, 0, I); 9729 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9730 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9731 Address PAddr = 9732 Address::deprecated(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9733 CGF.Builder.CreateStore(PVal, PAddr); 9734 9735 if (RuntimeSizes.test(I)) { 9736 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9737 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9738 Info.SizesArray, 9739 /*Idx0=*/0, 9740 /*Idx1=*/I); 9741 Address SAddr = 9742 Address::deprecated(S, Ctx.getTypeAlignInChars(Int64Ty)); 9743 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9744 CGM.Int64Ty, 9745 /*isSigned=*/true), 9746 SAddr); 9747 } 9748 9749 // Fill up the mapper array. 9750 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9751 if (CombinedInfo.Mappers[I]) { 9752 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9753 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9754 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9755 Info.HasMapper = true; 9756 } 9757 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9758 CGF.Builder.CreateStore(MFunc, MAddr); 9759 } 9760 } 9761 9762 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9763 Info.NumberOfPtrs == 0) 9764 return; 9765 9766 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9767 } 9768 9769 namespace { 9770 /// Additional arguments for emitOffloadingArraysArgument function. 9771 struct ArgumentsOptions { 9772 bool ForEndCall = false; 9773 ArgumentsOptions() = default; 9774 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9775 }; 9776 } // namespace 9777 9778 /// Emit the arguments to be passed to the runtime library based on the 9779 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9780 /// ForEndCall, emit map types to be passed for the end of the region instead of 9781 /// the beginning. 9782 static void emitOffloadingArraysArgument( 9783 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9784 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9785 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9786 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9787 const ArgumentsOptions &Options = ArgumentsOptions()) { 9788 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9789 "expected region end call to runtime only when end call is separate"); 9790 CodeGenModule &CGM = CGF.CGM; 9791 if (Info.NumberOfPtrs) { 9792 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9793 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9794 Info.BasePointersArray, 9795 /*Idx0=*/0, /*Idx1=*/0); 9796 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9797 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9798 Info.PointersArray, 9799 /*Idx0=*/0, 9800 /*Idx1=*/0); 9801 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9802 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9803 /*Idx0=*/0, /*Idx1=*/0); 9804 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9805 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9806 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9807 : Info.MapTypesArray, 9808 /*Idx0=*/0, 9809 /*Idx1=*/0); 9810 9811 // Only emit the mapper information arrays if debug information is 9812 // requested. 9813 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9814 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9815 else 9816 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9817 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9818 Info.MapNamesArray, 9819 /*Idx0=*/0, 9820 /*Idx1=*/0); 9821 // If there is no user-defined mapper, set the mapper array to nullptr to 9822 // avoid an unnecessary data privatization 9823 if (!Info.HasMapper) 9824 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9825 else 9826 MappersArrayArg = 9827 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9828 } else { 9829 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9830 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9831 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9832 MapTypesArrayArg = 9833 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9834 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9835 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9836 } 9837 } 9838 9839 /// Check for inner distribute directive. 9840 static const OMPExecutableDirective * 9841 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9842 const auto *CS = D.getInnermostCapturedStmt(); 9843 const auto *Body = 9844 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9845 const Stmt *ChildStmt = 9846 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9847 9848 if (const auto *NestedDir = 9849 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9850 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9851 switch (D.getDirectiveKind()) { 9852 case OMPD_target: 9853 if (isOpenMPDistributeDirective(DKind)) 9854 return NestedDir; 9855 if (DKind == OMPD_teams) { 9856 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9857 /*IgnoreCaptured=*/true); 9858 if (!Body) 9859 return nullptr; 9860 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9861 if (const auto *NND = 9862 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9863 DKind = NND->getDirectiveKind(); 9864 if (isOpenMPDistributeDirective(DKind)) 9865 return NND; 9866 } 9867 } 9868 return nullptr; 9869 case OMPD_target_teams: 9870 if (isOpenMPDistributeDirective(DKind)) 9871 return NestedDir; 9872 return nullptr; 9873 case OMPD_target_parallel: 9874 case OMPD_target_simd: 9875 case OMPD_target_parallel_for: 9876 case OMPD_target_parallel_for_simd: 9877 return nullptr; 9878 case OMPD_target_teams_distribute: 9879 case OMPD_target_teams_distribute_simd: 9880 case OMPD_target_teams_distribute_parallel_for: 9881 case OMPD_target_teams_distribute_parallel_for_simd: 9882 case OMPD_parallel: 9883 case OMPD_for: 9884 case OMPD_parallel_for: 9885 case OMPD_parallel_master: 9886 case OMPD_parallel_sections: 9887 case OMPD_for_simd: 9888 case OMPD_parallel_for_simd: 9889 case OMPD_cancel: 9890 case OMPD_cancellation_point: 9891 case OMPD_ordered: 9892 case OMPD_threadprivate: 9893 case OMPD_allocate: 9894 case OMPD_task: 9895 case OMPD_simd: 9896 case OMPD_tile: 9897 case OMPD_unroll: 9898 case OMPD_sections: 9899 case OMPD_section: 9900 case OMPD_single: 9901 case OMPD_master: 9902 case OMPD_critical: 9903 case OMPD_taskyield: 9904 case OMPD_barrier: 9905 case OMPD_taskwait: 9906 case OMPD_taskgroup: 9907 case OMPD_atomic: 9908 case OMPD_flush: 9909 case OMPD_depobj: 9910 case OMPD_scan: 9911 case OMPD_teams: 9912 case OMPD_target_data: 9913 case OMPD_target_exit_data: 9914 case OMPD_target_enter_data: 9915 case OMPD_distribute: 9916 case OMPD_distribute_simd: 9917 case OMPD_distribute_parallel_for: 9918 case OMPD_distribute_parallel_for_simd: 9919 case OMPD_teams_distribute: 9920 case OMPD_teams_distribute_simd: 9921 case OMPD_teams_distribute_parallel_for: 9922 case OMPD_teams_distribute_parallel_for_simd: 9923 case OMPD_target_update: 9924 case OMPD_declare_simd: 9925 case OMPD_declare_variant: 9926 case OMPD_begin_declare_variant: 9927 case OMPD_end_declare_variant: 9928 case OMPD_declare_target: 9929 case OMPD_end_declare_target: 9930 case OMPD_declare_reduction: 9931 case OMPD_declare_mapper: 9932 case OMPD_taskloop: 9933 case OMPD_taskloop_simd: 9934 case OMPD_master_taskloop: 9935 case OMPD_master_taskloop_simd: 9936 case OMPD_parallel_master_taskloop: 9937 case OMPD_parallel_master_taskloop_simd: 9938 case OMPD_requires: 9939 case OMPD_metadirective: 9940 case OMPD_unknown: 9941 default: 9942 llvm_unreachable("Unexpected directive."); 9943 } 9944 } 9945 9946 return nullptr; 9947 } 9948 9949 /// Emit the user-defined mapper function. The code generation follows the 9950 /// pattern in the example below. 9951 /// \code 9952 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9953 /// void *base, void *begin, 9954 /// int64_t size, int64_t type, 9955 /// void *name = nullptr) { 9956 /// // Allocate space for an array section first or add a base/begin for 9957 /// // pointer dereference. 9958 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9959 /// !maptype.IsDelete) 9960 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9961 /// size*sizeof(Ty), clearToFromMember(type)); 9962 /// // Map members. 9963 /// for (unsigned i = 0; i < size; i++) { 9964 /// // For each component specified by this mapper: 9965 /// for (auto c : begin[i]->all_components) { 9966 /// if (c.hasMapper()) 9967 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9968 /// c.arg_type, c.arg_name); 9969 /// else 9970 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9971 /// c.arg_begin, c.arg_size, c.arg_type, 9972 /// c.arg_name); 9973 /// } 9974 /// } 9975 /// // Delete the array section. 9976 /// if (size > 1 && maptype.IsDelete) 9977 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9978 /// size*sizeof(Ty), clearToFromMember(type)); 9979 /// } 9980 /// \endcode 9981 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9982 CodeGenFunction *CGF) { 9983 if (UDMMap.count(D) > 0) 9984 return; 9985 ASTContext &C = CGM.getContext(); 9986 QualType Ty = D->getType(); 9987 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9988 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9989 auto *MapperVarDecl = 9990 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9991 SourceLocation Loc = D->getLocation(); 9992 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9993 9994 // Prepare mapper function arguments and attributes. 9995 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9996 C.VoidPtrTy, ImplicitParamDecl::Other); 9997 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9998 ImplicitParamDecl::Other); 9999 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 10000 C.VoidPtrTy, ImplicitParamDecl::Other); 10001 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 10002 ImplicitParamDecl::Other); 10003 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 10004 ImplicitParamDecl::Other); 10005 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 10006 ImplicitParamDecl::Other); 10007 FunctionArgList Args; 10008 Args.push_back(&HandleArg); 10009 Args.push_back(&BaseArg); 10010 Args.push_back(&BeginArg); 10011 Args.push_back(&SizeArg); 10012 Args.push_back(&TypeArg); 10013 Args.push_back(&NameArg); 10014 const CGFunctionInfo &FnInfo = 10015 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 10016 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 10017 SmallString<64> TyStr; 10018 llvm::raw_svector_ostream Out(TyStr); 10019 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 10020 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 10021 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 10022 Name, &CGM.getModule()); 10023 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 10024 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 10025 // Start the mapper function code generation. 10026 CodeGenFunction MapperCGF(CGM); 10027 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 10028 // Compute the starting and end addresses of array elements. 10029 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 10030 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 10031 C.getPointerType(Int64Ty), Loc); 10032 // Prepare common arguments for array initiation and deletion. 10033 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 10034 MapperCGF.GetAddrOfLocalVar(&HandleArg), 10035 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10036 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10037 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10038 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10039 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10040 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10041 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10042 // Convert the size in bytes into the number of array elements. 10043 Size = MapperCGF.Builder.CreateExactUDiv( 10044 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10045 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10046 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10047 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP( 10048 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size); 10049 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10050 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10051 C.getPointerType(Int64Ty), Loc); 10052 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10053 MapperCGF.GetAddrOfLocalVar(&NameArg), 10054 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10055 10056 // Emit array initiation if this is an array section and \p MapType indicates 10057 // that memory allocation is required. 10058 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10059 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10060 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10061 10062 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10063 10064 // Emit the loop header block. 10065 MapperCGF.EmitBlock(HeadBB); 10066 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10067 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10068 // Evaluate whether the initial condition is satisfied. 10069 llvm::Value *IsEmpty = 10070 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10071 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10072 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10073 10074 // Emit the loop body block. 10075 MapperCGF.EmitBlock(BodyBB); 10076 llvm::BasicBlock *LastBB = BodyBB; 10077 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10078 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10079 PtrPHI->addIncoming(PtrBegin, EntryBB); 10080 Address PtrCurrent = 10081 Address::deprecated(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 10082 .getAlignment() 10083 .alignmentOfArrayElement(ElementSize)); 10084 // Privatize the declared variable of mapper to be the current array element. 10085 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10086 Scope.addPrivate(MapperVarDecl, PtrCurrent); 10087 (void)Scope.Privatize(); 10088 10089 // Get map clause information. Fill up the arrays with all mapped variables. 10090 MappableExprsHandler::MapCombinedInfoTy Info; 10091 MappableExprsHandler MEHandler(*D, MapperCGF); 10092 MEHandler.generateAllInfoForMapper(Info); 10093 10094 // Call the runtime API __tgt_mapper_num_components to get the number of 10095 // pre-existing components. 10096 llvm::Value *OffloadingArgs[] = {Handle}; 10097 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10098 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10099 OMPRTL___tgt_mapper_num_components), 10100 OffloadingArgs); 10101 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10102 PreviousSize, 10103 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10104 10105 // Fill up the runtime mapper handle for all components. 10106 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10107 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10108 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10109 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10110 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10111 llvm::Value *CurSizeArg = Info.Sizes[I]; 10112 llvm::Value *CurNameArg = 10113 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10114 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10115 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10116 10117 // Extract the MEMBER_OF field from the map type. 10118 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10119 llvm::Value *MemberMapType = 10120 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10121 10122 // Combine the map type inherited from user-defined mapper with that 10123 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10124 // bits of the \a MapType, which is the input argument of the mapper 10125 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10126 // bits of MemberMapType. 10127 // [OpenMP 5.0], 1.2.6. map-type decay. 10128 // | alloc | to | from | tofrom | release | delete 10129 // ---------------------------------------------------------- 10130 // alloc | alloc | alloc | alloc | alloc | release | delete 10131 // to | alloc | to | alloc | to | release | delete 10132 // from | alloc | alloc | from | from | release | delete 10133 // tofrom | alloc | to | from | tofrom | release | delete 10134 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10135 MapType, 10136 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10137 MappableExprsHandler::OMP_MAP_FROM)); 10138 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10139 llvm::BasicBlock *AllocElseBB = 10140 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10141 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10142 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10143 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10144 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10145 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10146 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10147 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10148 MapperCGF.EmitBlock(AllocBB); 10149 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10150 MemberMapType, 10151 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10152 MappableExprsHandler::OMP_MAP_FROM))); 10153 MapperCGF.Builder.CreateBr(EndBB); 10154 MapperCGF.EmitBlock(AllocElseBB); 10155 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10156 LeftToFrom, 10157 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10158 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10159 // In case of to, clear OMP_MAP_FROM. 10160 MapperCGF.EmitBlock(ToBB); 10161 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10162 MemberMapType, 10163 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10164 MapperCGF.Builder.CreateBr(EndBB); 10165 MapperCGF.EmitBlock(ToElseBB); 10166 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10167 LeftToFrom, 10168 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10169 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10170 // In case of from, clear OMP_MAP_TO. 10171 MapperCGF.EmitBlock(FromBB); 10172 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10173 MemberMapType, 10174 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10175 // In case of tofrom, do nothing. 10176 MapperCGF.EmitBlock(EndBB); 10177 LastBB = EndBB; 10178 llvm::PHINode *CurMapType = 10179 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10180 CurMapType->addIncoming(AllocMapType, AllocBB); 10181 CurMapType->addIncoming(ToMapType, ToBB); 10182 CurMapType->addIncoming(FromMapType, FromBB); 10183 CurMapType->addIncoming(MemberMapType, ToElseBB); 10184 10185 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10186 CurSizeArg, CurMapType, CurNameArg}; 10187 if (Info.Mappers[I]) { 10188 // Call the corresponding mapper function. 10189 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10190 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10191 assert(MapperFunc && "Expect a valid mapper function is available."); 10192 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10193 } else { 10194 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10195 // data structure. 10196 MapperCGF.EmitRuntimeCall( 10197 OMPBuilder.getOrCreateRuntimeFunction( 10198 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10199 OffloadingArgs); 10200 } 10201 } 10202 10203 // Update the pointer to point to the next element that needs to be mapped, 10204 // and check whether we have mapped all elements. 10205 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType(); 10206 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10207 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10208 PtrPHI->addIncoming(PtrNext, LastBB); 10209 llvm::Value *IsDone = 10210 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10211 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10212 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10213 10214 MapperCGF.EmitBlock(ExitBB); 10215 // Emit array deletion if this is an array section and \p MapType indicates 10216 // that deletion is required. 10217 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10218 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10219 10220 // Emit the function exit block. 10221 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10222 MapperCGF.FinishFunction(); 10223 UDMMap.try_emplace(D, Fn); 10224 if (CGF) { 10225 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10226 Decls.second.push_back(D); 10227 } 10228 } 10229 10230 /// Emit the array initialization or deletion portion for user-defined mapper 10231 /// code generation. First, it evaluates whether an array section is mapped and 10232 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10233 /// true, and \a MapType indicates to not delete this array, array 10234 /// initialization code is generated. If \a IsInit is false, and \a MapType 10235 /// indicates to not this array, array deletion code is generated. 10236 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10237 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10238 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10239 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10240 bool IsInit) { 10241 StringRef Prefix = IsInit ? ".init" : ".del"; 10242 10243 // Evaluate if this is an array section. 10244 llvm::BasicBlock *BodyBB = 10245 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10246 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10247 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10248 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10249 MapType, 10250 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10251 llvm::Value *DeleteCond; 10252 llvm::Value *Cond; 10253 if (IsInit) { 10254 // base != begin? 10255 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 10256 // IsPtrAndObj? 10257 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10258 MapType, 10259 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10260 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10261 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10262 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10263 DeleteCond = MapperCGF.Builder.CreateIsNull( 10264 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10265 } else { 10266 Cond = IsArray; 10267 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10268 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10269 } 10270 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10271 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10272 10273 MapperCGF.EmitBlock(BodyBB); 10274 // Get the array size by multiplying element size and element number (i.e., \p 10275 // Size). 10276 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10277 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10278 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10279 // memory allocation/deletion purpose only. 10280 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10281 MapType, 10282 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10283 MappableExprsHandler::OMP_MAP_FROM))); 10284 MapTypeArg = MapperCGF.Builder.CreateOr( 10285 MapTypeArg, 10286 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10287 10288 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10289 // data structure. 10290 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10291 ArraySize, MapTypeArg, MapName}; 10292 MapperCGF.EmitRuntimeCall( 10293 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10294 OMPRTL___tgt_push_mapper_component), 10295 OffloadingArgs); 10296 } 10297 10298 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10299 const OMPDeclareMapperDecl *D) { 10300 auto I = UDMMap.find(D); 10301 if (I != UDMMap.end()) 10302 return I->second; 10303 emitUserDefinedMapper(D); 10304 return UDMMap.lookup(D); 10305 } 10306 10307 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10308 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10309 llvm::Value *DeviceID, 10310 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10311 const OMPLoopDirective &D)> 10312 SizeEmitter) { 10313 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10314 const OMPExecutableDirective *TD = &D; 10315 // Get nested teams distribute kind directive, if any. 10316 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10317 TD = getNestedDistributeDirective(CGM.getContext(), D); 10318 if (!TD) 10319 return; 10320 const auto *LD = cast<OMPLoopDirective>(TD); 10321 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10322 PrePostActionTy &) { 10323 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10324 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10325 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10326 CGF.EmitRuntimeCall( 10327 OMPBuilder.getOrCreateRuntimeFunction( 10328 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10329 Args); 10330 } 10331 }; 10332 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10333 } 10334 10335 void CGOpenMPRuntime::emitTargetCall( 10336 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10337 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10338 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10339 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10340 const OMPLoopDirective &D)> 10341 SizeEmitter) { 10342 if (!CGF.HaveInsertPoint()) 10343 return; 10344 10345 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice && 10346 CGM.getLangOpts().OpenMPOffloadMandatory; 10347 10348 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 10349 10350 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10351 D.hasClausesOfKind<OMPNowaitClause>(); 10352 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10353 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10354 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10355 PrePostActionTy &) { 10356 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10357 }; 10358 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10359 10360 CodeGenFunction::OMPTargetDataInfo InputInfo; 10361 llvm::Value *MapTypesArray = nullptr; 10362 llvm::Value *MapNamesArray = nullptr; 10363 // Generate code for the host fallback function. 10364 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, 10365 &CS, OffloadingMandatory](CodeGenFunction &CGF) { 10366 if (OffloadingMandatory) { 10367 CGF.Builder.CreateUnreachable(); 10368 } else { 10369 if (RequiresOuterTask) { 10370 CapturedVars.clear(); 10371 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10372 } 10373 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10374 } 10375 }; 10376 // Fill up the pointer arrays and transfer execution to the device. 10377 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray, 10378 &MapNamesArray, SizeEmitter, 10379 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10380 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10381 // Reverse offloading is not supported, so just execute on the host. 10382 FallbackGen(CGF); 10383 return; 10384 } 10385 10386 // On top of the arrays that were filled up, the target offloading call 10387 // takes as arguments the device id as well as the host pointer. The host 10388 // pointer is used by the runtime library to identify the current target 10389 // region, so it only has to be unique and not necessarily point to 10390 // anything. It could be the pointer to the outlined function that 10391 // implements the target region, but we aren't using that so that the 10392 // compiler doesn't need to keep that, and could therefore inline the host 10393 // function if proven worthwhile during optimization. 10394 10395 // From this point on, we need to have an ID of the target region defined. 10396 assert(OutlinedFnID && "Invalid outlined function ID!"); 10397 (void)OutlinedFnID; 10398 10399 // Emit device ID if any. 10400 llvm::Value *DeviceID; 10401 if (Device.getPointer()) { 10402 assert((Device.getInt() == OMPC_DEVICE_unknown || 10403 Device.getInt() == OMPC_DEVICE_device_num) && 10404 "Expected device_num modifier."); 10405 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10406 DeviceID = 10407 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10408 } else { 10409 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10410 } 10411 10412 // Emit the number of elements in the offloading arrays. 10413 llvm::Value *PointerNum = 10414 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10415 10416 // Return value of the runtime offloading call. 10417 llvm::Value *Return; 10418 10419 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10420 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10421 10422 // Source location for the ident struct 10423 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10424 10425 // Emit tripcount for the target loop-based directive. 10426 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10427 10428 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10429 // The target region is an outlined function launched by the runtime 10430 // via calls __tgt_target() or __tgt_target_teams(). 10431 // 10432 // __tgt_target() launches a target region with one team and one thread, 10433 // executing a serial region. This master thread may in turn launch 10434 // more threads within its team upon encountering a parallel region, 10435 // however, no additional teams can be launched on the device. 10436 // 10437 // __tgt_target_teams() launches a target region with one or more teams, 10438 // each with one or more threads. This call is required for target 10439 // constructs such as: 10440 // 'target teams' 10441 // 'target' / 'teams' 10442 // 'target teams distribute parallel for' 10443 // 'target parallel' 10444 // and so on. 10445 // 10446 // Note that on the host and CPU targets, the runtime implementation of 10447 // these calls simply call the outlined function without forking threads. 10448 // The outlined functions themselves have runtime calls to 10449 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10450 // the compiler in emitTeamsCall() and emitParallelCall(). 10451 // 10452 // In contrast, on the NVPTX target, the implementation of 10453 // __tgt_target_teams() launches a GPU kernel with the requested number 10454 // of teams and threads so no additional calls to the runtime are required. 10455 if (NumTeams) { 10456 // If we have NumTeams defined this means that we have an enclosed teams 10457 // region. Therefore we also expect to have NumThreads defined. These two 10458 // values should be defined in the presence of a teams directive, 10459 // regardless of having any clauses associated. If the user is using teams 10460 // but no clauses, these two values will be the default that should be 10461 // passed to the runtime library - a 32-bit integer with the value zero. 10462 assert(NumThreads && "Thread limit expression should be available along " 10463 "with number of teams."); 10464 SmallVector<llvm::Value *> OffloadingArgs = { 10465 RTLoc, 10466 DeviceID, 10467 OutlinedFnID, 10468 PointerNum, 10469 InputInfo.BasePointersArray.getPointer(), 10470 InputInfo.PointersArray.getPointer(), 10471 InputInfo.SizesArray.getPointer(), 10472 MapTypesArray, 10473 MapNamesArray, 10474 InputInfo.MappersArray.getPointer(), 10475 NumTeams, 10476 NumThreads}; 10477 if (HasNowait) { 10478 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10479 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10480 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10481 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10482 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10483 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10484 } 10485 Return = CGF.EmitRuntimeCall( 10486 OMPBuilder.getOrCreateRuntimeFunction( 10487 CGM.getModule(), HasNowait 10488 ? OMPRTL___tgt_target_teams_nowait_mapper 10489 : OMPRTL___tgt_target_teams_mapper), 10490 OffloadingArgs); 10491 } else { 10492 SmallVector<llvm::Value *> OffloadingArgs = { 10493 RTLoc, 10494 DeviceID, 10495 OutlinedFnID, 10496 PointerNum, 10497 InputInfo.BasePointersArray.getPointer(), 10498 InputInfo.PointersArray.getPointer(), 10499 InputInfo.SizesArray.getPointer(), 10500 MapTypesArray, 10501 MapNamesArray, 10502 InputInfo.MappersArray.getPointer()}; 10503 if (HasNowait) { 10504 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10505 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10506 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10507 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10508 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10509 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10510 } 10511 Return = CGF.EmitRuntimeCall( 10512 OMPBuilder.getOrCreateRuntimeFunction( 10513 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10514 : OMPRTL___tgt_target_mapper), 10515 OffloadingArgs); 10516 } 10517 10518 // Check the error code and execute the host version if required. 10519 llvm::BasicBlock *OffloadFailedBlock = 10520 CGF.createBasicBlock("omp_offload.failed"); 10521 llvm::BasicBlock *OffloadContBlock = 10522 CGF.createBasicBlock("omp_offload.cont"); 10523 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10524 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10525 10526 CGF.EmitBlock(OffloadFailedBlock); 10527 FallbackGen(CGF); 10528 10529 CGF.EmitBranch(OffloadContBlock); 10530 10531 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10532 }; 10533 10534 // Notify that the host version must be executed. 10535 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10536 FallbackGen(CGF); 10537 }; 10538 10539 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10540 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10541 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10542 // Fill up the arrays with all the captured variables. 10543 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10544 10545 // Get mappable expression information. 10546 MappableExprsHandler MEHandler(D, CGF); 10547 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10548 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10549 10550 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10551 auto *CV = CapturedVars.begin(); 10552 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10553 CE = CS.capture_end(); 10554 CI != CE; ++CI, ++RI, ++CV) { 10555 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10556 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10557 10558 // VLA sizes are passed to the outlined region by copy and do not have map 10559 // information associated. 10560 if (CI->capturesVariableArrayType()) { 10561 CurInfo.Exprs.push_back(nullptr); 10562 CurInfo.BasePointers.push_back(*CV); 10563 CurInfo.Pointers.push_back(*CV); 10564 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10565 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10566 // Copy to the device as an argument. No need to retrieve it. 10567 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10568 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10569 MappableExprsHandler::OMP_MAP_IMPLICIT); 10570 CurInfo.Mappers.push_back(nullptr); 10571 } else { 10572 // If we have any information in the map clause, we use it, otherwise we 10573 // just do a default mapping. 10574 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10575 if (!CI->capturesThis()) 10576 MappedVarSet.insert(CI->getCapturedVar()); 10577 else 10578 MappedVarSet.insert(nullptr); 10579 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10580 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10581 // Generate correct mapping for variables captured by reference in 10582 // lambdas. 10583 if (CI->capturesVariable()) 10584 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10585 CurInfo, LambdaPointers); 10586 } 10587 // We expect to have at least an element of information for this capture. 10588 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10589 "Non-existing map pointer for capture!"); 10590 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10591 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10592 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10593 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10594 "Inconsistent map information sizes!"); 10595 10596 // If there is an entry in PartialStruct it means we have a struct with 10597 // individual members mapped. Emit an extra combined entry. 10598 if (PartialStruct.Base.isValid()) { 10599 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10600 MEHandler.emitCombinedEntry( 10601 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10602 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10603 } 10604 10605 // We need to append the results of this capture to what we already have. 10606 CombinedInfo.append(CurInfo); 10607 } 10608 // Adjust MEMBER_OF flags for the lambdas captures. 10609 MEHandler.adjustMemberOfForLambdaCaptures( 10610 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10611 CombinedInfo.Types); 10612 // Map any list items in a map clause that were not captures because they 10613 // weren't referenced within the construct. 10614 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10615 10616 TargetDataInfo Info; 10617 // Fill up the arrays and create the arguments. 10618 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10619 emitOffloadingArraysArgument( 10620 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10621 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10622 {/*ForEndCall=*/false}); 10623 10624 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10625 InputInfo.BasePointersArray = 10626 Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign()); 10627 InputInfo.PointersArray = 10628 Address::deprecated(Info.PointersArray, CGM.getPointerAlign()); 10629 InputInfo.SizesArray = 10630 Address::deprecated(Info.SizesArray, CGM.getPointerAlign()); 10631 InputInfo.MappersArray = 10632 Address::deprecated(Info.MappersArray, CGM.getPointerAlign()); 10633 MapTypesArray = Info.MapTypesArray; 10634 MapNamesArray = Info.MapNamesArray; 10635 if (RequiresOuterTask) 10636 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10637 else 10638 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10639 }; 10640 10641 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10642 CodeGenFunction &CGF, PrePostActionTy &) { 10643 if (RequiresOuterTask) { 10644 CodeGenFunction::OMPTargetDataInfo InputInfo; 10645 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10646 } else { 10647 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10648 } 10649 }; 10650 10651 // If we have a target function ID it means that we need to support 10652 // offloading, otherwise, just execute on the host. We need to execute on host 10653 // regardless of the conditional in the if clause if, e.g., the user do not 10654 // specify target triples. 10655 if (OutlinedFnID) { 10656 if (IfCond) { 10657 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10658 } else { 10659 RegionCodeGenTy ThenRCG(TargetThenGen); 10660 ThenRCG(CGF); 10661 } 10662 } else { 10663 RegionCodeGenTy ElseRCG(TargetElseGen); 10664 ElseRCG(CGF); 10665 } 10666 } 10667 10668 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10669 StringRef ParentName) { 10670 if (!S) 10671 return; 10672 10673 // Codegen OMP target directives that offload compute to the device. 10674 bool RequiresDeviceCodegen = 10675 isa<OMPExecutableDirective>(S) && 10676 isOpenMPTargetExecutionDirective( 10677 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10678 10679 if (RequiresDeviceCodegen) { 10680 const auto &E = *cast<OMPExecutableDirective>(S); 10681 unsigned DeviceID; 10682 unsigned FileID; 10683 unsigned Line; 10684 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10685 FileID, Line); 10686 10687 // Is this a target region that should not be emitted as an entry point? If 10688 // so just signal we are done with this target region. 10689 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10690 ParentName, Line)) 10691 return; 10692 10693 switch (E.getDirectiveKind()) { 10694 case OMPD_target: 10695 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10696 cast<OMPTargetDirective>(E)); 10697 break; 10698 case OMPD_target_parallel: 10699 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10700 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10701 break; 10702 case OMPD_target_teams: 10703 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10704 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10705 break; 10706 case OMPD_target_teams_distribute: 10707 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10708 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10709 break; 10710 case OMPD_target_teams_distribute_simd: 10711 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10712 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10713 break; 10714 case OMPD_target_parallel_for: 10715 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10716 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10717 break; 10718 case OMPD_target_parallel_for_simd: 10719 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10720 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10721 break; 10722 case OMPD_target_simd: 10723 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10724 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10725 break; 10726 case OMPD_target_teams_distribute_parallel_for: 10727 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10728 CGM, ParentName, 10729 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10730 break; 10731 case OMPD_target_teams_distribute_parallel_for_simd: 10732 CodeGenFunction:: 10733 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10734 CGM, ParentName, 10735 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10736 break; 10737 case OMPD_parallel: 10738 case OMPD_for: 10739 case OMPD_parallel_for: 10740 case OMPD_parallel_master: 10741 case OMPD_parallel_sections: 10742 case OMPD_for_simd: 10743 case OMPD_parallel_for_simd: 10744 case OMPD_cancel: 10745 case OMPD_cancellation_point: 10746 case OMPD_ordered: 10747 case OMPD_threadprivate: 10748 case OMPD_allocate: 10749 case OMPD_task: 10750 case OMPD_simd: 10751 case OMPD_tile: 10752 case OMPD_unroll: 10753 case OMPD_sections: 10754 case OMPD_section: 10755 case OMPD_single: 10756 case OMPD_master: 10757 case OMPD_critical: 10758 case OMPD_taskyield: 10759 case OMPD_barrier: 10760 case OMPD_taskwait: 10761 case OMPD_taskgroup: 10762 case OMPD_atomic: 10763 case OMPD_flush: 10764 case OMPD_depobj: 10765 case OMPD_scan: 10766 case OMPD_teams: 10767 case OMPD_target_data: 10768 case OMPD_target_exit_data: 10769 case OMPD_target_enter_data: 10770 case OMPD_distribute: 10771 case OMPD_distribute_simd: 10772 case OMPD_distribute_parallel_for: 10773 case OMPD_distribute_parallel_for_simd: 10774 case OMPD_teams_distribute: 10775 case OMPD_teams_distribute_simd: 10776 case OMPD_teams_distribute_parallel_for: 10777 case OMPD_teams_distribute_parallel_for_simd: 10778 case OMPD_target_update: 10779 case OMPD_declare_simd: 10780 case OMPD_declare_variant: 10781 case OMPD_begin_declare_variant: 10782 case OMPD_end_declare_variant: 10783 case OMPD_declare_target: 10784 case OMPD_end_declare_target: 10785 case OMPD_declare_reduction: 10786 case OMPD_declare_mapper: 10787 case OMPD_taskloop: 10788 case OMPD_taskloop_simd: 10789 case OMPD_master_taskloop: 10790 case OMPD_master_taskloop_simd: 10791 case OMPD_parallel_master_taskloop: 10792 case OMPD_parallel_master_taskloop_simd: 10793 case OMPD_requires: 10794 case OMPD_metadirective: 10795 case OMPD_unknown: 10796 default: 10797 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10798 } 10799 return; 10800 } 10801 10802 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10803 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10804 return; 10805 10806 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10807 return; 10808 } 10809 10810 // If this is a lambda function, look into its body. 10811 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10812 S = L->getBody(); 10813 10814 // Keep looking for target regions recursively. 10815 for (const Stmt *II : S->children()) 10816 scanForTargetRegionsFunctions(II, ParentName); 10817 } 10818 10819 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10820 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10821 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10822 if (!DevTy) 10823 return false; 10824 // Do not emit device_type(nohost) functions for the host. 10825 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10826 return true; 10827 // Do not emit device_type(host) functions for the device. 10828 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10829 return true; 10830 return false; 10831 } 10832 10833 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10834 // If emitting code for the host, we do not process FD here. Instead we do 10835 // the normal code generation. 10836 if (!CGM.getLangOpts().OpenMPIsDevice) { 10837 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10838 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10839 CGM.getLangOpts().OpenMPIsDevice)) 10840 return true; 10841 return false; 10842 } 10843 10844 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10845 // Try to detect target regions in the function. 10846 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10847 StringRef Name = CGM.getMangledName(GD); 10848 scanForTargetRegionsFunctions(FD->getBody(), Name); 10849 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10850 CGM.getLangOpts().OpenMPIsDevice)) 10851 return true; 10852 } 10853 10854 // Do not to emit function if it is not marked as declare target. 10855 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10856 AlreadyEmittedTargetDecls.count(VD) == 0; 10857 } 10858 10859 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10860 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10861 CGM.getLangOpts().OpenMPIsDevice)) 10862 return true; 10863 10864 if (!CGM.getLangOpts().OpenMPIsDevice) 10865 return false; 10866 10867 // Check if there are Ctors/Dtors in this declaration and look for target 10868 // regions in it. We use the complete variant to produce the kernel name 10869 // mangling. 10870 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10871 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10872 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10873 StringRef ParentName = 10874 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10875 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10876 } 10877 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10878 StringRef ParentName = 10879 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10880 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10881 } 10882 } 10883 10884 // Do not to emit variable if it is not marked as declare target. 10885 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10886 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10887 cast<VarDecl>(GD.getDecl())); 10888 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10889 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10890 HasRequiresUnifiedSharedMemory)) { 10891 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10892 return true; 10893 } 10894 return false; 10895 } 10896 10897 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10898 llvm::Constant *Addr) { 10899 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10900 !CGM.getLangOpts().OpenMPIsDevice) 10901 return; 10902 10903 // If we have host/nohost variables, they do not need to be registered. 10904 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10905 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10906 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10907 return; 10908 10909 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10910 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10911 if (!Res) { 10912 if (CGM.getLangOpts().OpenMPIsDevice) { 10913 // Register non-target variables being emitted in device code (debug info 10914 // may cause this). 10915 StringRef VarName = CGM.getMangledName(VD); 10916 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10917 } 10918 return; 10919 } 10920 // Register declare target variables. 10921 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10922 StringRef VarName; 10923 CharUnits VarSize; 10924 llvm::GlobalValue::LinkageTypes Linkage; 10925 10926 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10927 !HasRequiresUnifiedSharedMemory) { 10928 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10929 VarName = CGM.getMangledName(VD); 10930 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10931 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10932 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10933 } else { 10934 VarSize = CharUnits::Zero(); 10935 } 10936 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10937 // Temp solution to prevent optimizations of the internal variables. 10938 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10939 // Do not create a "ref-variable" if the original is not also available 10940 // on the host. 10941 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10942 return; 10943 std::string RefName = getName({VarName, "ref"}); 10944 if (!CGM.GetGlobalValue(RefName)) { 10945 llvm::Constant *AddrRef = 10946 getOrCreateInternalVariable(Addr->getType(), RefName); 10947 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10948 GVAddrRef->setConstant(/*Val=*/true); 10949 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10950 GVAddrRef->setInitializer(Addr); 10951 CGM.addCompilerUsedGlobal(GVAddrRef); 10952 } 10953 } 10954 } else { 10955 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10956 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10957 HasRequiresUnifiedSharedMemory)) && 10958 "Declare target attribute must link or to with unified memory."); 10959 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10960 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10961 else 10962 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10963 10964 if (CGM.getLangOpts().OpenMPIsDevice) { 10965 VarName = Addr->getName(); 10966 Addr = nullptr; 10967 } else { 10968 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10969 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10970 } 10971 VarSize = CGM.getPointerSize(); 10972 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10973 } 10974 10975 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10976 VarName, Addr, VarSize, Flags, Linkage); 10977 } 10978 10979 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10980 if (isa<FunctionDecl>(GD.getDecl()) || 10981 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10982 return emitTargetFunctions(GD); 10983 10984 return emitTargetGlobalVariable(GD); 10985 } 10986 10987 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10988 for (const VarDecl *VD : DeferredGlobalVariables) { 10989 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10990 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10991 if (!Res) 10992 continue; 10993 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10994 !HasRequiresUnifiedSharedMemory) { 10995 CGM.EmitGlobal(VD); 10996 } else { 10997 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10998 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10999 HasRequiresUnifiedSharedMemory)) && 11000 "Expected link clause or to clause with unified memory."); 11001 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 11002 } 11003 } 11004 } 11005 11006 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 11007 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 11008 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 11009 " Expected target-based directive."); 11010 } 11011 11012 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 11013 for (const OMPClause *Clause : D->clauselists()) { 11014 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 11015 HasRequiresUnifiedSharedMemory = true; 11016 } else if (const auto *AC = 11017 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 11018 switch (AC->getAtomicDefaultMemOrderKind()) { 11019 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 11020 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 11021 break; 11022 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 11023 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 11024 break; 11025 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 11026 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 11027 break; 11028 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 11029 break; 11030 } 11031 } 11032 } 11033 } 11034 11035 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 11036 return RequiresAtomicOrdering; 11037 } 11038 11039 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11040 LangAS &AS) { 11041 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11042 return false; 11043 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11044 switch(A->getAllocatorType()) { 11045 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11046 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11047 // Not supported, fallback to the default mem space. 11048 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11049 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11050 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11051 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11052 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11053 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11054 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11055 AS = LangAS::Default; 11056 return true; 11057 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11058 llvm_unreachable("Expected predefined allocator for the variables with the " 11059 "static storage."); 11060 } 11061 return false; 11062 } 11063 11064 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11065 return HasRequiresUnifiedSharedMemory; 11066 } 11067 11068 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11069 CodeGenModule &CGM) 11070 : CGM(CGM) { 11071 if (CGM.getLangOpts().OpenMPIsDevice) { 11072 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11073 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11074 } 11075 } 11076 11077 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11078 if (CGM.getLangOpts().OpenMPIsDevice) 11079 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11080 } 11081 11082 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11083 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11084 return true; 11085 11086 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11087 // Do not to emit function if it is marked as declare target as it was already 11088 // emitted. 11089 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11090 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11091 if (auto *F = dyn_cast_or_null<llvm::Function>( 11092 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11093 return !F->isDeclaration(); 11094 return false; 11095 } 11096 return true; 11097 } 11098 11099 return !AlreadyEmittedTargetDecls.insert(D).second; 11100 } 11101 11102 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11103 // If we don't have entries or if we are emitting code for the device, we 11104 // don't need to do anything. 11105 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11106 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11107 (OffloadEntriesInfoManager.empty() && 11108 !HasEmittedDeclareTargetRegion && 11109 !HasEmittedTargetRegion)) 11110 return nullptr; 11111 11112 // Create and register the function that handles the requires directives. 11113 ASTContext &C = CGM.getContext(); 11114 11115 llvm::Function *RequiresRegFn; 11116 { 11117 CodeGenFunction CGF(CGM); 11118 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11119 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11120 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11121 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11122 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11123 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11124 // TODO: check for other requires clauses. 11125 // The requires directive takes effect only when a target region is 11126 // present in the compilation unit. Otherwise it is ignored and not 11127 // passed to the runtime. This avoids the runtime from throwing an error 11128 // for mismatching requires clauses across compilation units that don't 11129 // contain at least 1 target region. 11130 assert((HasEmittedTargetRegion || 11131 HasEmittedDeclareTargetRegion || 11132 !OffloadEntriesInfoManager.empty()) && 11133 "Target or declare target region expected."); 11134 if (HasRequiresUnifiedSharedMemory) 11135 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11136 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11137 CGM.getModule(), OMPRTL___tgt_register_requires), 11138 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11139 CGF.FinishFunction(); 11140 } 11141 return RequiresRegFn; 11142 } 11143 11144 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11145 const OMPExecutableDirective &D, 11146 SourceLocation Loc, 11147 llvm::Function *OutlinedFn, 11148 ArrayRef<llvm::Value *> CapturedVars) { 11149 if (!CGF.HaveInsertPoint()) 11150 return; 11151 11152 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11153 CodeGenFunction::RunCleanupsScope Scope(CGF); 11154 11155 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11156 llvm::Value *Args[] = { 11157 RTLoc, 11158 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11159 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11160 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11161 RealArgs.append(std::begin(Args), std::end(Args)); 11162 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11163 11164 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11165 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11166 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11167 } 11168 11169 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11170 const Expr *NumTeams, 11171 const Expr *ThreadLimit, 11172 SourceLocation Loc) { 11173 if (!CGF.HaveInsertPoint()) 11174 return; 11175 11176 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11177 11178 llvm::Value *NumTeamsVal = 11179 NumTeams 11180 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11181 CGF.CGM.Int32Ty, /* isSigned = */ true) 11182 : CGF.Builder.getInt32(0); 11183 11184 llvm::Value *ThreadLimitVal = 11185 ThreadLimit 11186 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11187 CGF.CGM.Int32Ty, /* isSigned = */ true) 11188 : CGF.Builder.getInt32(0); 11189 11190 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11191 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11192 ThreadLimitVal}; 11193 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11194 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11195 PushNumTeamsArgs); 11196 } 11197 11198 void CGOpenMPRuntime::emitTargetDataCalls( 11199 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11200 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11201 if (!CGF.HaveInsertPoint()) 11202 return; 11203 11204 // Action used to replace the default codegen action and turn privatization 11205 // off. 11206 PrePostActionTy NoPrivAction; 11207 11208 // Generate the code for the opening of the data environment. Capture all the 11209 // arguments of the runtime call by reference because they are used in the 11210 // closing of the region. 11211 auto &&BeginThenGen = [this, &D, Device, &Info, 11212 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11213 // Fill up the arrays with all the mapped variables. 11214 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11215 11216 // Get map clause information. 11217 MappableExprsHandler MEHandler(D, CGF); 11218 MEHandler.generateAllInfo(CombinedInfo); 11219 11220 // Fill up the arrays and create the arguments. 11221 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11222 /*IsNonContiguous=*/true); 11223 11224 llvm::Value *BasePointersArrayArg = nullptr; 11225 llvm::Value *PointersArrayArg = nullptr; 11226 llvm::Value *SizesArrayArg = nullptr; 11227 llvm::Value *MapTypesArrayArg = nullptr; 11228 llvm::Value *MapNamesArrayArg = nullptr; 11229 llvm::Value *MappersArrayArg = nullptr; 11230 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11231 SizesArrayArg, MapTypesArrayArg, 11232 MapNamesArrayArg, MappersArrayArg, Info); 11233 11234 // Emit device ID if any. 11235 llvm::Value *DeviceID = nullptr; 11236 if (Device) { 11237 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11238 CGF.Int64Ty, /*isSigned=*/true); 11239 } else { 11240 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11241 } 11242 11243 // Emit the number of elements in the offloading arrays. 11244 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11245 // 11246 // Source location for the ident struct 11247 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11248 11249 llvm::Value *OffloadingArgs[] = {RTLoc, 11250 DeviceID, 11251 PointerNum, 11252 BasePointersArrayArg, 11253 PointersArrayArg, 11254 SizesArrayArg, 11255 MapTypesArrayArg, 11256 MapNamesArrayArg, 11257 MappersArrayArg}; 11258 CGF.EmitRuntimeCall( 11259 OMPBuilder.getOrCreateRuntimeFunction( 11260 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11261 OffloadingArgs); 11262 11263 // If device pointer privatization is required, emit the body of the region 11264 // here. It will have to be duplicated: with and without privatization. 11265 if (!Info.CaptureDeviceAddrMap.empty()) 11266 CodeGen(CGF); 11267 }; 11268 11269 // Generate code for the closing of the data region. 11270 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11271 PrePostActionTy &) { 11272 assert(Info.isValid() && "Invalid data environment closing arguments."); 11273 11274 llvm::Value *BasePointersArrayArg = nullptr; 11275 llvm::Value *PointersArrayArg = nullptr; 11276 llvm::Value *SizesArrayArg = nullptr; 11277 llvm::Value *MapTypesArrayArg = nullptr; 11278 llvm::Value *MapNamesArrayArg = nullptr; 11279 llvm::Value *MappersArrayArg = nullptr; 11280 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11281 SizesArrayArg, MapTypesArrayArg, 11282 MapNamesArrayArg, MappersArrayArg, Info, 11283 {/*ForEndCall=*/true}); 11284 11285 // Emit device ID if any. 11286 llvm::Value *DeviceID = nullptr; 11287 if (Device) { 11288 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11289 CGF.Int64Ty, /*isSigned=*/true); 11290 } else { 11291 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11292 } 11293 11294 // Emit the number of elements in the offloading arrays. 11295 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11296 11297 // Source location for the ident struct 11298 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11299 11300 llvm::Value *OffloadingArgs[] = {RTLoc, 11301 DeviceID, 11302 PointerNum, 11303 BasePointersArrayArg, 11304 PointersArrayArg, 11305 SizesArrayArg, 11306 MapTypesArrayArg, 11307 MapNamesArrayArg, 11308 MappersArrayArg}; 11309 CGF.EmitRuntimeCall( 11310 OMPBuilder.getOrCreateRuntimeFunction( 11311 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11312 OffloadingArgs); 11313 }; 11314 11315 // If we need device pointer privatization, we need to emit the body of the 11316 // region with no privatization in the 'else' branch of the conditional. 11317 // Otherwise, we don't have to do anything. 11318 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11319 PrePostActionTy &) { 11320 if (!Info.CaptureDeviceAddrMap.empty()) { 11321 CodeGen.setAction(NoPrivAction); 11322 CodeGen(CGF); 11323 } 11324 }; 11325 11326 // We don't have to do anything to close the region if the if clause evaluates 11327 // to false. 11328 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11329 11330 if (IfCond) { 11331 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11332 } else { 11333 RegionCodeGenTy RCG(BeginThenGen); 11334 RCG(CGF); 11335 } 11336 11337 // If we don't require privatization of device pointers, we emit the body in 11338 // between the runtime calls. This avoids duplicating the body code. 11339 if (Info.CaptureDeviceAddrMap.empty()) { 11340 CodeGen.setAction(NoPrivAction); 11341 CodeGen(CGF); 11342 } 11343 11344 if (IfCond) { 11345 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11346 } else { 11347 RegionCodeGenTy RCG(EndThenGen); 11348 RCG(CGF); 11349 } 11350 } 11351 11352 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11353 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11354 const Expr *Device) { 11355 if (!CGF.HaveInsertPoint()) 11356 return; 11357 11358 assert((isa<OMPTargetEnterDataDirective>(D) || 11359 isa<OMPTargetExitDataDirective>(D) || 11360 isa<OMPTargetUpdateDirective>(D)) && 11361 "Expecting either target enter, exit data, or update directives."); 11362 11363 CodeGenFunction::OMPTargetDataInfo InputInfo; 11364 llvm::Value *MapTypesArray = nullptr; 11365 llvm::Value *MapNamesArray = nullptr; 11366 // Generate the code for the opening of the data environment. 11367 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11368 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11369 // Emit device ID if any. 11370 llvm::Value *DeviceID = nullptr; 11371 if (Device) { 11372 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11373 CGF.Int64Ty, /*isSigned=*/true); 11374 } else { 11375 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11376 } 11377 11378 // Emit the number of elements in the offloading arrays. 11379 llvm::Constant *PointerNum = 11380 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11381 11382 // Source location for the ident struct 11383 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11384 11385 llvm::Value *OffloadingArgs[] = {RTLoc, 11386 DeviceID, 11387 PointerNum, 11388 InputInfo.BasePointersArray.getPointer(), 11389 InputInfo.PointersArray.getPointer(), 11390 InputInfo.SizesArray.getPointer(), 11391 MapTypesArray, 11392 MapNamesArray, 11393 InputInfo.MappersArray.getPointer()}; 11394 11395 // Select the right runtime function call for each standalone 11396 // directive. 11397 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11398 RuntimeFunction RTLFn; 11399 switch (D.getDirectiveKind()) { 11400 case OMPD_target_enter_data: 11401 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11402 : OMPRTL___tgt_target_data_begin_mapper; 11403 break; 11404 case OMPD_target_exit_data: 11405 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11406 : OMPRTL___tgt_target_data_end_mapper; 11407 break; 11408 case OMPD_target_update: 11409 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11410 : OMPRTL___tgt_target_data_update_mapper; 11411 break; 11412 case OMPD_parallel: 11413 case OMPD_for: 11414 case OMPD_parallel_for: 11415 case OMPD_parallel_master: 11416 case OMPD_parallel_sections: 11417 case OMPD_for_simd: 11418 case OMPD_parallel_for_simd: 11419 case OMPD_cancel: 11420 case OMPD_cancellation_point: 11421 case OMPD_ordered: 11422 case OMPD_threadprivate: 11423 case OMPD_allocate: 11424 case OMPD_task: 11425 case OMPD_simd: 11426 case OMPD_tile: 11427 case OMPD_unroll: 11428 case OMPD_sections: 11429 case OMPD_section: 11430 case OMPD_single: 11431 case OMPD_master: 11432 case OMPD_critical: 11433 case OMPD_taskyield: 11434 case OMPD_barrier: 11435 case OMPD_taskwait: 11436 case OMPD_taskgroup: 11437 case OMPD_atomic: 11438 case OMPD_flush: 11439 case OMPD_depobj: 11440 case OMPD_scan: 11441 case OMPD_teams: 11442 case OMPD_target_data: 11443 case OMPD_distribute: 11444 case OMPD_distribute_simd: 11445 case OMPD_distribute_parallel_for: 11446 case OMPD_distribute_parallel_for_simd: 11447 case OMPD_teams_distribute: 11448 case OMPD_teams_distribute_simd: 11449 case OMPD_teams_distribute_parallel_for: 11450 case OMPD_teams_distribute_parallel_for_simd: 11451 case OMPD_declare_simd: 11452 case OMPD_declare_variant: 11453 case OMPD_begin_declare_variant: 11454 case OMPD_end_declare_variant: 11455 case OMPD_declare_target: 11456 case OMPD_end_declare_target: 11457 case OMPD_declare_reduction: 11458 case OMPD_declare_mapper: 11459 case OMPD_taskloop: 11460 case OMPD_taskloop_simd: 11461 case OMPD_master_taskloop: 11462 case OMPD_master_taskloop_simd: 11463 case OMPD_parallel_master_taskloop: 11464 case OMPD_parallel_master_taskloop_simd: 11465 case OMPD_target: 11466 case OMPD_target_simd: 11467 case OMPD_target_teams_distribute: 11468 case OMPD_target_teams_distribute_simd: 11469 case OMPD_target_teams_distribute_parallel_for: 11470 case OMPD_target_teams_distribute_parallel_for_simd: 11471 case OMPD_target_teams: 11472 case OMPD_target_parallel: 11473 case OMPD_target_parallel_for: 11474 case OMPD_target_parallel_for_simd: 11475 case OMPD_requires: 11476 case OMPD_metadirective: 11477 case OMPD_unknown: 11478 default: 11479 llvm_unreachable("Unexpected standalone target data directive."); 11480 break; 11481 } 11482 CGF.EmitRuntimeCall( 11483 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11484 OffloadingArgs); 11485 }; 11486 11487 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11488 &MapNamesArray](CodeGenFunction &CGF, 11489 PrePostActionTy &) { 11490 // Fill up the arrays with all the mapped variables. 11491 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11492 11493 // Get map clause information. 11494 MappableExprsHandler MEHandler(D, CGF); 11495 MEHandler.generateAllInfo(CombinedInfo); 11496 11497 TargetDataInfo Info; 11498 // Fill up the arrays and create the arguments. 11499 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11500 /*IsNonContiguous=*/true); 11501 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11502 D.hasClausesOfKind<OMPNowaitClause>(); 11503 emitOffloadingArraysArgument( 11504 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11505 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11506 {/*ForEndCall=*/false}); 11507 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11508 InputInfo.BasePointersArray = 11509 Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign()); 11510 InputInfo.PointersArray = 11511 Address::deprecated(Info.PointersArray, CGM.getPointerAlign()); 11512 InputInfo.SizesArray = 11513 Address::deprecated(Info.SizesArray, CGM.getPointerAlign()); 11514 InputInfo.MappersArray = 11515 Address::deprecated(Info.MappersArray, CGM.getPointerAlign()); 11516 MapTypesArray = Info.MapTypesArray; 11517 MapNamesArray = Info.MapNamesArray; 11518 if (RequiresOuterTask) 11519 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11520 else 11521 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11522 }; 11523 11524 if (IfCond) { 11525 emitIfClause(CGF, IfCond, TargetThenGen, 11526 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11527 } else { 11528 RegionCodeGenTy ThenRCG(TargetThenGen); 11529 ThenRCG(CGF); 11530 } 11531 } 11532 11533 namespace { 11534 /// Kind of parameter in a function with 'declare simd' directive. 11535 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11536 /// Attribute set of the parameter. 11537 struct ParamAttrTy { 11538 ParamKindTy Kind = Vector; 11539 llvm::APSInt StrideOrArg; 11540 llvm::APSInt Alignment; 11541 }; 11542 } // namespace 11543 11544 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11545 ArrayRef<ParamAttrTy> ParamAttrs) { 11546 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11547 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11548 // of that clause. The VLEN value must be power of 2. 11549 // In other case the notion of the function`s "characteristic data type" (CDT) 11550 // is used to compute the vector length. 11551 // CDT is defined in the following order: 11552 // a) For non-void function, the CDT is the return type. 11553 // b) If the function has any non-uniform, non-linear parameters, then the 11554 // CDT is the type of the first such parameter. 11555 // c) If the CDT determined by a) or b) above is struct, union, or class 11556 // type which is pass-by-value (except for the type that maps to the 11557 // built-in complex data type), the characteristic data type is int. 11558 // d) If none of the above three cases is applicable, the CDT is int. 11559 // The VLEN is then determined based on the CDT and the size of vector 11560 // register of that ISA for which current vector version is generated. The 11561 // VLEN is computed using the formula below: 11562 // VLEN = sizeof(vector_register) / sizeof(CDT), 11563 // where vector register size specified in section 3.2.1 Registers and the 11564 // Stack Frame of original AMD64 ABI document. 11565 QualType RetType = FD->getReturnType(); 11566 if (RetType.isNull()) 11567 return 0; 11568 ASTContext &C = FD->getASTContext(); 11569 QualType CDT; 11570 if (!RetType.isNull() && !RetType->isVoidType()) { 11571 CDT = RetType; 11572 } else { 11573 unsigned Offset = 0; 11574 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11575 if (ParamAttrs[Offset].Kind == Vector) 11576 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11577 ++Offset; 11578 } 11579 if (CDT.isNull()) { 11580 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11581 if (ParamAttrs[I + Offset].Kind == Vector) { 11582 CDT = FD->getParamDecl(I)->getType(); 11583 break; 11584 } 11585 } 11586 } 11587 } 11588 if (CDT.isNull()) 11589 CDT = C.IntTy; 11590 CDT = CDT->getCanonicalTypeUnqualified(); 11591 if (CDT->isRecordType() || CDT->isUnionType()) 11592 CDT = C.IntTy; 11593 return C.getTypeSize(CDT); 11594 } 11595 11596 static void 11597 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11598 const llvm::APSInt &VLENVal, 11599 ArrayRef<ParamAttrTy> ParamAttrs, 11600 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11601 struct ISADataTy { 11602 char ISA; 11603 unsigned VecRegSize; 11604 }; 11605 ISADataTy ISAData[] = { 11606 { 11607 'b', 128 11608 }, // SSE 11609 { 11610 'c', 256 11611 }, // AVX 11612 { 11613 'd', 256 11614 }, // AVX2 11615 { 11616 'e', 512 11617 }, // AVX512 11618 }; 11619 llvm::SmallVector<char, 2> Masked; 11620 switch (State) { 11621 case OMPDeclareSimdDeclAttr::BS_Undefined: 11622 Masked.push_back('N'); 11623 Masked.push_back('M'); 11624 break; 11625 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11626 Masked.push_back('N'); 11627 break; 11628 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11629 Masked.push_back('M'); 11630 break; 11631 } 11632 for (char Mask : Masked) { 11633 for (const ISADataTy &Data : ISAData) { 11634 SmallString<256> Buffer; 11635 llvm::raw_svector_ostream Out(Buffer); 11636 Out << "_ZGV" << Data.ISA << Mask; 11637 if (!VLENVal) { 11638 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11639 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11640 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11641 } else { 11642 Out << VLENVal; 11643 } 11644 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11645 switch (ParamAttr.Kind){ 11646 case LinearWithVarStride: 11647 Out << 's' << ParamAttr.StrideOrArg; 11648 break; 11649 case Linear: 11650 Out << 'l'; 11651 if (ParamAttr.StrideOrArg != 1) 11652 Out << ParamAttr.StrideOrArg; 11653 break; 11654 case Uniform: 11655 Out << 'u'; 11656 break; 11657 case Vector: 11658 Out << 'v'; 11659 break; 11660 } 11661 if (!!ParamAttr.Alignment) 11662 Out << 'a' << ParamAttr.Alignment; 11663 } 11664 Out << '_' << Fn->getName(); 11665 Fn->addFnAttr(Out.str()); 11666 } 11667 } 11668 } 11669 11670 // This are the Functions that are needed to mangle the name of the 11671 // vector functions generated by the compiler, according to the rules 11672 // defined in the "Vector Function ABI specifications for AArch64", 11673 // available at 11674 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11675 11676 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11677 /// 11678 /// TODO: Need to implement the behavior for reference marked with a 11679 /// var or no linear modifiers (1.b in the section). For this, we 11680 /// need to extend ParamKindTy to support the linear modifiers. 11681 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11682 QT = QT.getCanonicalType(); 11683 11684 if (QT->isVoidType()) 11685 return false; 11686 11687 if (Kind == ParamKindTy::Uniform) 11688 return false; 11689 11690 if (Kind == ParamKindTy::Linear) 11691 return false; 11692 11693 // TODO: Handle linear references with modifiers 11694 11695 if (Kind == ParamKindTy::LinearWithVarStride) 11696 return false; 11697 11698 return true; 11699 } 11700 11701 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11702 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11703 QT = QT.getCanonicalType(); 11704 unsigned Size = C.getTypeSize(QT); 11705 11706 // Only scalars and complex within 16 bytes wide set PVB to true. 11707 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11708 return false; 11709 11710 if (QT->isFloatingType()) 11711 return true; 11712 11713 if (QT->isIntegerType()) 11714 return true; 11715 11716 if (QT->isPointerType()) 11717 return true; 11718 11719 // TODO: Add support for complex types (section 3.1.2, item 2). 11720 11721 return false; 11722 } 11723 11724 /// Computes the lane size (LS) of a return type or of an input parameter, 11725 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11726 /// TODO: Add support for references, section 3.2.1, item 1. 11727 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11728 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11729 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11730 if (getAArch64PBV(PTy, C)) 11731 return C.getTypeSize(PTy); 11732 } 11733 if (getAArch64PBV(QT, C)) 11734 return C.getTypeSize(QT); 11735 11736 return C.getTypeSize(C.getUIntPtrType()); 11737 } 11738 11739 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11740 // signature of the scalar function, as defined in 3.2.2 of the 11741 // AAVFABI. 11742 static std::tuple<unsigned, unsigned, bool> 11743 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11744 QualType RetType = FD->getReturnType().getCanonicalType(); 11745 11746 ASTContext &C = FD->getASTContext(); 11747 11748 bool OutputBecomesInput = false; 11749 11750 llvm::SmallVector<unsigned, 8> Sizes; 11751 if (!RetType->isVoidType()) { 11752 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11753 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11754 OutputBecomesInput = true; 11755 } 11756 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11757 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11758 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11759 } 11760 11761 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11762 // The LS of a function parameter / return value can only be a power 11763 // of 2, starting from 8 bits, up to 128. 11764 assert(llvm::all_of(Sizes, 11765 [](unsigned Size) { 11766 return Size == 8 || Size == 16 || Size == 32 || 11767 Size == 64 || Size == 128; 11768 }) && 11769 "Invalid size"); 11770 11771 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11772 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11773 OutputBecomesInput); 11774 } 11775 11776 /// Mangle the parameter part of the vector function name according to 11777 /// their OpenMP classification. The mangling function is defined in 11778 /// section 3.5 of the AAVFABI. 11779 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11780 SmallString<256> Buffer; 11781 llvm::raw_svector_ostream Out(Buffer); 11782 for (const auto &ParamAttr : ParamAttrs) { 11783 switch (ParamAttr.Kind) { 11784 case LinearWithVarStride: 11785 Out << "ls" << ParamAttr.StrideOrArg; 11786 break; 11787 case Linear: 11788 Out << 'l'; 11789 // Don't print the step value if it is not present or if it is 11790 // equal to 1. 11791 if (ParamAttr.StrideOrArg != 1) 11792 Out << ParamAttr.StrideOrArg; 11793 break; 11794 case Uniform: 11795 Out << 'u'; 11796 break; 11797 case Vector: 11798 Out << 'v'; 11799 break; 11800 } 11801 11802 if (!!ParamAttr.Alignment) 11803 Out << 'a' << ParamAttr.Alignment; 11804 } 11805 11806 return std::string(Out.str()); 11807 } 11808 11809 // Function used to add the attribute. The parameter `VLEN` is 11810 // templated to allow the use of "x" when targeting scalable functions 11811 // for SVE. 11812 template <typename T> 11813 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11814 char ISA, StringRef ParSeq, 11815 StringRef MangledName, bool OutputBecomesInput, 11816 llvm::Function *Fn) { 11817 SmallString<256> Buffer; 11818 llvm::raw_svector_ostream Out(Buffer); 11819 Out << Prefix << ISA << LMask << VLEN; 11820 if (OutputBecomesInput) 11821 Out << "v"; 11822 Out << ParSeq << "_" << MangledName; 11823 Fn->addFnAttr(Out.str()); 11824 } 11825 11826 // Helper function to generate the Advanced SIMD names depending on 11827 // the value of the NDS when simdlen is not present. 11828 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11829 StringRef Prefix, char ISA, 11830 StringRef ParSeq, StringRef MangledName, 11831 bool OutputBecomesInput, 11832 llvm::Function *Fn) { 11833 switch (NDS) { 11834 case 8: 11835 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11836 OutputBecomesInput, Fn); 11837 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11838 OutputBecomesInput, Fn); 11839 break; 11840 case 16: 11841 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11842 OutputBecomesInput, Fn); 11843 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11844 OutputBecomesInput, Fn); 11845 break; 11846 case 32: 11847 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11848 OutputBecomesInput, Fn); 11849 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11850 OutputBecomesInput, Fn); 11851 break; 11852 case 64: 11853 case 128: 11854 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11855 OutputBecomesInput, Fn); 11856 break; 11857 default: 11858 llvm_unreachable("Scalar type is too wide."); 11859 } 11860 } 11861 11862 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11863 static void emitAArch64DeclareSimdFunction( 11864 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11865 ArrayRef<ParamAttrTy> ParamAttrs, 11866 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11867 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11868 11869 // Get basic data for building the vector signature. 11870 const auto Data = getNDSWDS(FD, ParamAttrs); 11871 const unsigned NDS = std::get<0>(Data); 11872 const unsigned WDS = std::get<1>(Data); 11873 const bool OutputBecomesInput = std::get<2>(Data); 11874 11875 // Check the values provided via `simdlen` by the user. 11876 // 1. A `simdlen(1)` doesn't produce vector signatures, 11877 if (UserVLEN == 1) { 11878 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11879 DiagnosticsEngine::Warning, 11880 "The clause simdlen(1) has no effect when targeting aarch64."); 11881 CGM.getDiags().Report(SLoc, DiagID); 11882 return; 11883 } 11884 11885 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11886 // Advanced SIMD output. 11887 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11888 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11889 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11890 "power of 2 when targeting Advanced SIMD."); 11891 CGM.getDiags().Report(SLoc, DiagID); 11892 return; 11893 } 11894 11895 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11896 // limits. 11897 if (ISA == 's' && UserVLEN != 0) { 11898 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11899 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11900 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11901 "lanes in the architectural constraints " 11902 "for SVE (min is 128-bit, max is " 11903 "2048-bit, by steps of 128-bit)"); 11904 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11905 return; 11906 } 11907 } 11908 11909 // Sort out parameter sequence. 11910 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11911 StringRef Prefix = "_ZGV"; 11912 // Generate simdlen from user input (if any). 11913 if (UserVLEN) { 11914 if (ISA == 's') { 11915 // SVE generates only a masked function. 11916 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11917 OutputBecomesInput, Fn); 11918 } else { 11919 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11920 // Advanced SIMD generates one or two functions, depending on 11921 // the `[not]inbranch` clause. 11922 switch (State) { 11923 case OMPDeclareSimdDeclAttr::BS_Undefined: 11924 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11925 OutputBecomesInput, Fn); 11926 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11927 OutputBecomesInput, Fn); 11928 break; 11929 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11930 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11931 OutputBecomesInput, Fn); 11932 break; 11933 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11934 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11935 OutputBecomesInput, Fn); 11936 break; 11937 } 11938 } 11939 } else { 11940 // If no user simdlen is provided, follow the AAVFABI rules for 11941 // generating the vector length. 11942 if (ISA == 's') { 11943 // SVE, section 3.4.1, item 1. 11944 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11945 OutputBecomesInput, Fn); 11946 } else { 11947 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11948 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11949 // two vector names depending on the use of the clause 11950 // `[not]inbranch`. 11951 switch (State) { 11952 case OMPDeclareSimdDeclAttr::BS_Undefined: 11953 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11954 OutputBecomesInput, Fn); 11955 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11956 OutputBecomesInput, Fn); 11957 break; 11958 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11959 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11960 OutputBecomesInput, Fn); 11961 break; 11962 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11963 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11964 OutputBecomesInput, Fn); 11965 break; 11966 } 11967 } 11968 } 11969 } 11970 11971 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11972 llvm::Function *Fn) { 11973 ASTContext &C = CGM.getContext(); 11974 FD = FD->getMostRecentDecl(); 11975 // Map params to their positions in function decl. 11976 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11977 if (isa<CXXMethodDecl>(FD)) 11978 ParamPositions.try_emplace(FD, 0); 11979 unsigned ParamPos = ParamPositions.size(); 11980 for (const ParmVarDecl *P : FD->parameters()) { 11981 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11982 ++ParamPos; 11983 } 11984 while (FD) { 11985 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11986 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11987 // Mark uniform parameters. 11988 for (const Expr *E : Attr->uniforms()) { 11989 E = E->IgnoreParenImpCasts(); 11990 unsigned Pos; 11991 if (isa<CXXThisExpr>(E)) { 11992 Pos = ParamPositions[FD]; 11993 } else { 11994 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11995 ->getCanonicalDecl(); 11996 Pos = ParamPositions[PVD]; 11997 } 11998 ParamAttrs[Pos].Kind = Uniform; 11999 } 12000 // Get alignment info. 12001 auto *NI = Attr->alignments_begin(); 12002 for (const Expr *E : Attr->aligneds()) { 12003 E = E->IgnoreParenImpCasts(); 12004 unsigned Pos; 12005 QualType ParmTy; 12006 if (isa<CXXThisExpr>(E)) { 12007 Pos = ParamPositions[FD]; 12008 ParmTy = E->getType(); 12009 } else { 12010 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12011 ->getCanonicalDecl(); 12012 Pos = ParamPositions[PVD]; 12013 ParmTy = PVD->getType(); 12014 } 12015 ParamAttrs[Pos].Alignment = 12016 (*NI) 12017 ? (*NI)->EvaluateKnownConstInt(C) 12018 : llvm::APSInt::getUnsigned( 12019 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 12020 .getQuantity()); 12021 ++NI; 12022 } 12023 // Mark linear parameters. 12024 auto *SI = Attr->steps_begin(); 12025 auto *MI = Attr->modifiers_begin(); 12026 for (const Expr *E : Attr->linears()) { 12027 E = E->IgnoreParenImpCasts(); 12028 unsigned Pos; 12029 // Rescaling factor needed to compute the linear parameter 12030 // value in the mangled name. 12031 unsigned PtrRescalingFactor = 1; 12032 if (isa<CXXThisExpr>(E)) { 12033 Pos = ParamPositions[FD]; 12034 } else { 12035 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12036 ->getCanonicalDecl(); 12037 Pos = ParamPositions[PVD]; 12038 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 12039 PtrRescalingFactor = CGM.getContext() 12040 .getTypeSizeInChars(P->getPointeeType()) 12041 .getQuantity(); 12042 } 12043 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12044 ParamAttr.Kind = Linear; 12045 // Assuming a stride of 1, for `linear` without modifiers. 12046 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12047 if (*SI) { 12048 Expr::EvalResult Result; 12049 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12050 if (const auto *DRE = 12051 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12052 if (const auto *StridePVD = 12053 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 12054 ParamAttr.Kind = LinearWithVarStride; 12055 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 12056 ParamPositions[StridePVD->getCanonicalDecl()]); 12057 } 12058 } 12059 } else { 12060 ParamAttr.StrideOrArg = Result.Val.getInt(); 12061 } 12062 } 12063 // If we are using a linear clause on a pointer, we need to 12064 // rescale the value of linear_step with the byte size of the 12065 // pointee type. 12066 if (Linear == ParamAttr.Kind) 12067 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12068 ++SI; 12069 ++MI; 12070 } 12071 llvm::APSInt VLENVal; 12072 SourceLocation ExprLoc; 12073 const Expr *VLENExpr = Attr->getSimdlen(); 12074 if (VLENExpr) { 12075 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12076 ExprLoc = VLENExpr->getExprLoc(); 12077 } 12078 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12079 if (CGM.getTriple().isX86()) { 12080 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12081 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12082 unsigned VLEN = VLENVal.getExtValue(); 12083 StringRef MangledName = Fn->getName(); 12084 if (CGM.getTarget().hasFeature("sve")) 12085 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12086 MangledName, 's', 128, Fn, ExprLoc); 12087 if (CGM.getTarget().hasFeature("neon")) 12088 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12089 MangledName, 'n', 128, Fn, ExprLoc); 12090 } 12091 } 12092 FD = FD->getPreviousDecl(); 12093 } 12094 } 12095 12096 namespace { 12097 /// Cleanup action for doacross support. 12098 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12099 public: 12100 static const int DoacrossFinArgs = 2; 12101 12102 private: 12103 llvm::FunctionCallee RTLFn; 12104 llvm::Value *Args[DoacrossFinArgs]; 12105 12106 public: 12107 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12108 ArrayRef<llvm::Value *> CallArgs) 12109 : RTLFn(RTLFn) { 12110 assert(CallArgs.size() == DoacrossFinArgs); 12111 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12112 } 12113 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12114 if (!CGF.HaveInsertPoint()) 12115 return; 12116 CGF.EmitRuntimeCall(RTLFn, Args); 12117 } 12118 }; 12119 } // namespace 12120 12121 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12122 const OMPLoopDirective &D, 12123 ArrayRef<Expr *> NumIterations) { 12124 if (!CGF.HaveInsertPoint()) 12125 return; 12126 12127 ASTContext &C = CGM.getContext(); 12128 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12129 RecordDecl *RD; 12130 if (KmpDimTy.isNull()) { 12131 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12132 // kmp_int64 lo; // lower 12133 // kmp_int64 up; // upper 12134 // kmp_int64 st; // stride 12135 // }; 12136 RD = C.buildImplicitRecord("kmp_dim"); 12137 RD->startDefinition(); 12138 addFieldToRecordDecl(C, RD, Int64Ty); 12139 addFieldToRecordDecl(C, RD, Int64Ty); 12140 addFieldToRecordDecl(C, RD, Int64Ty); 12141 RD->completeDefinition(); 12142 KmpDimTy = C.getRecordType(RD); 12143 } else { 12144 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12145 } 12146 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12147 QualType ArrayTy = 12148 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12149 12150 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12151 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12152 enum { LowerFD = 0, UpperFD, StrideFD }; 12153 // Fill dims with data. 12154 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12155 LValue DimsLVal = CGF.MakeAddrLValue( 12156 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12157 // dims.upper = num_iterations; 12158 LValue UpperLVal = CGF.EmitLValueForField( 12159 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12160 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12161 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12162 Int64Ty, NumIterations[I]->getExprLoc()); 12163 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12164 // dims.stride = 1; 12165 LValue StrideLVal = CGF.EmitLValueForField( 12166 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12167 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12168 StrideLVal); 12169 } 12170 12171 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12172 // kmp_int32 num_dims, struct kmp_dim * dims); 12173 llvm::Value *Args[] = { 12174 emitUpdateLocation(CGF, D.getBeginLoc()), 12175 getThreadID(CGF, D.getBeginLoc()), 12176 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12177 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12178 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12179 CGM.VoidPtrTy)}; 12180 12181 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12182 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12183 CGF.EmitRuntimeCall(RTLFn, Args); 12184 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12185 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12186 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12187 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12188 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12189 llvm::makeArrayRef(FiniArgs)); 12190 } 12191 12192 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12193 const OMPDependClause *C) { 12194 QualType Int64Ty = 12195 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12196 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12197 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12198 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12199 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12200 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12201 const Expr *CounterVal = C->getLoopData(I); 12202 assert(CounterVal); 12203 llvm::Value *CntVal = CGF.EmitScalarConversion( 12204 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12205 CounterVal->getExprLoc()); 12206 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12207 /*Volatile=*/false, Int64Ty); 12208 } 12209 llvm::Value *Args[] = { 12210 emitUpdateLocation(CGF, C->getBeginLoc()), 12211 getThreadID(CGF, C->getBeginLoc()), 12212 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12213 llvm::FunctionCallee RTLFn; 12214 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12215 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12216 OMPRTL___kmpc_doacross_post); 12217 } else { 12218 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12219 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12220 OMPRTL___kmpc_doacross_wait); 12221 } 12222 CGF.EmitRuntimeCall(RTLFn, Args); 12223 } 12224 12225 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12226 llvm::FunctionCallee Callee, 12227 ArrayRef<llvm::Value *> Args) const { 12228 assert(Loc.isValid() && "Outlined function call location must be valid."); 12229 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12230 12231 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12232 if (Fn->doesNotThrow()) { 12233 CGF.EmitNounwindRuntimeCall(Fn, Args); 12234 return; 12235 } 12236 } 12237 CGF.EmitRuntimeCall(Callee, Args); 12238 } 12239 12240 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12241 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12242 ArrayRef<llvm::Value *> Args) const { 12243 emitCall(CGF, Loc, OutlinedFn, Args); 12244 } 12245 12246 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12247 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12248 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12249 HasEmittedDeclareTargetRegion = true; 12250 } 12251 12252 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12253 const VarDecl *NativeParam, 12254 const VarDecl *TargetParam) const { 12255 return CGF.GetAddrOfLocalVar(NativeParam); 12256 } 12257 12258 /// Return allocator value from expression, or return a null allocator (default 12259 /// when no allocator specified). 12260 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 12261 const Expr *Allocator) { 12262 llvm::Value *AllocVal; 12263 if (Allocator) { 12264 AllocVal = CGF.EmitScalarExpr(Allocator); 12265 // According to the standard, the original allocator type is a enum 12266 // (integer). Convert to pointer type, if required. 12267 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12268 CGF.getContext().VoidPtrTy, 12269 Allocator->getExprLoc()); 12270 } else { 12271 // If no allocator specified, it defaults to the null allocator. 12272 AllocVal = llvm::Constant::getNullValue( 12273 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 12274 } 12275 return AllocVal; 12276 } 12277 12278 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12279 const VarDecl *VD) { 12280 if (!VD) 12281 return Address::invalid(); 12282 Address UntiedAddr = Address::invalid(); 12283 Address UntiedRealAddr = Address::invalid(); 12284 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12285 if (It != FunctionToUntiedTaskStackMap.end()) { 12286 const UntiedLocalVarsAddressesMap &UntiedData = 12287 UntiedLocalVarsStack[It->second]; 12288 auto I = UntiedData.find(VD); 12289 if (I != UntiedData.end()) { 12290 UntiedAddr = I->second.first; 12291 UntiedRealAddr = I->second.second; 12292 } 12293 } 12294 const VarDecl *CVD = VD->getCanonicalDecl(); 12295 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12296 // Use the default allocation. 12297 if (!isAllocatableDecl(VD)) 12298 return UntiedAddr; 12299 llvm::Value *Size; 12300 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12301 if (CVD->getType()->isVariablyModifiedType()) { 12302 Size = CGF.getTypeSize(CVD->getType()); 12303 // Align the size: ((size + align - 1) / align) * align 12304 Size = CGF.Builder.CreateNUWAdd( 12305 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12306 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12307 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12308 } else { 12309 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12310 Size = CGM.getSize(Sz.alignTo(Align)); 12311 } 12312 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12313 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12314 const Expr *Allocator = AA->getAllocator(); 12315 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 12316 llvm::Value *Alignment = 12317 AA->getAlignment() 12318 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()), 12319 CGM.SizeTy, /*isSigned=*/false) 12320 : nullptr; 12321 SmallVector<llvm::Value *, 4> Args; 12322 Args.push_back(ThreadID); 12323 if (Alignment) 12324 Args.push_back(Alignment); 12325 Args.push_back(Size); 12326 Args.push_back(AllocVal); 12327 llvm::omp::RuntimeFunction FnID = 12328 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 12329 llvm::Value *Addr = CGF.EmitRuntimeCall( 12330 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 12331 getName({CVD->getName(), ".void.addr"})); 12332 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12333 CGM.getModule(), OMPRTL___kmpc_free); 12334 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12335 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12336 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12337 if (UntiedAddr.isValid()) 12338 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12339 12340 // Cleanup action for allocate support. 12341 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12342 llvm::FunctionCallee RTLFn; 12343 SourceLocation::UIntTy LocEncoding; 12344 Address Addr; 12345 const Expr *AllocExpr; 12346 12347 public: 12348 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12349 SourceLocation::UIntTy LocEncoding, Address Addr, 12350 const Expr *AllocExpr) 12351 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12352 AllocExpr(AllocExpr) {} 12353 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12354 if (!CGF.HaveInsertPoint()) 12355 return; 12356 llvm::Value *Args[3]; 12357 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12358 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12359 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12360 Addr.getPointer(), CGF.VoidPtrTy); 12361 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 12362 Args[2] = AllocVal; 12363 CGF.EmitRuntimeCall(RTLFn, Args); 12364 } 12365 }; 12366 Address VDAddr = UntiedRealAddr.isValid() 12367 ? UntiedRealAddr 12368 : Address::deprecated(Addr, Align); 12369 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12370 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12371 VDAddr, Allocator); 12372 if (UntiedRealAddr.isValid()) 12373 if (auto *Region = 12374 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12375 Region->emitUntiedSwitch(CGF); 12376 return VDAddr; 12377 } 12378 return UntiedAddr; 12379 } 12380 12381 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12382 const VarDecl *VD) const { 12383 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12384 if (It == FunctionToUntiedTaskStackMap.end()) 12385 return false; 12386 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12387 } 12388 12389 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12390 CodeGenModule &CGM, const OMPLoopDirective &S) 12391 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12392 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12393 if (!NeedToPush) 12394 return; 12395 NontemporalDeclsSet &DS = 12396 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12397 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12398 for (const Stmt *Ref : C->private_refs()) { 12399 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12400 const ValueDecl *VD; 12401 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12402 VD = DRE->getDecl(); 12403 } else { 12404 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12405 assert((ME->isImplicitCXXThis() || 12406 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12407 "Expected member of current class."); 12408 VD = ME->getMemberDecl(); 12409 } 12410 DS.insert(VD); 12411 } 12412 } 12413 } 12414 12415 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12416 if (!NeedToPush) 12417 return; 12418 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12419 } 12420 12421 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12422 CodeGenFunction &CGF, 12423 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12424 std::pair<Address, Address>> &LocalVars) 12425 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12426 if (!NeedToPush) 12427 return; 12428 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12429 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12430 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12431 } 12432 12433 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12434 if (!NeedToPush) 12435 return; 12436 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12437 } 12438 12439 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12440 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12441 12442 return llvm::any_of( 12443 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12444 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12445 } 12446 12447 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12448 const OMPExecutableDirective &S, 12449 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12450 const { 12451 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12452 // Vars in target/task regions must be excluded completely. 12453 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12454 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12455 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12456 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12457 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12458 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12459 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12460 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12461 } 12462 } 12463 // Exclude vars in private clauses. 12464 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12465 for (const Expr *Ref : C->varlists()) { 12466 if (!Ref->getType()->isScalarType()) 12467 continue; 12468 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12469 if (!DRE) 12470 continue; 12471 NeedToCheckForLPCs.insert(DRE->getDecl()); 12472 } 12473 } 12474 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12475 for (const Expr *Ref : C->varlists()) { 12476 if (!Ref->getType()->isScalarType()) 12477 continue; 12478 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12479 if (!DRE) 12480 continue; 12481 NeedToCheckForLPCs.insert(DRE->getDecl()); 12482 } 12483 } 12484 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12485 for (const Expr *Ref : C->varlists()) { 12486 if (!Ref->getType()->isScalarType()) 12487 continue; 12488 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12489 if (!DRE) 12490 continue; 12491 NeedToCheckForLPCs.insert(DRE->getDecl()); 12492 } 12493 } 12494 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12495 for (const Expr *Ref : C->varlists()) { 12496 if (!Ref->getType()->isScalarType()) 12497 continue; 12498 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12499 if (!DRE) 12500 continue; 12501 NeedToCheckForLPCs.insert(DRE->getDecl()); 12502 } 12503 } 12504 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12505 for (const Expr *Ref : C->varlists()) { 12506 if (!Ref->getType()->isScalarType()) 12507 continue; 12508 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12509 if (!DRE) 12510 continue; 12511 NeedToCheckForLPCs.insert(DRE->getDecl()); 12512 } 12513 } 12514 for (const Decl *VD : NeedToCheckForLPCs) { 12515 for (const LastprivateConditionalData &Data : 12516 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12517 if (Data.DeclToUniqueName.count(VD) > 0) { 12518 if (!Data.Disabled) 12519 NeedToAddForLPCsAsDisabled.insert(VD); 12520 break; 12521 } 12522 } 12523 } 12524 } 12525 12526 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12527 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12528 : CGM(CGF.CGM), 12529 Action((CGM.getLangOpts().OpenMP >= 50 && 12530 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12531 [](const OMPLastprivateClause *C) { 12532 return C->getKind() == 12533 OMPC_LASTPRIVATE_conditional; 12534 })) 12535 ? ActionToDo::PushAsLastprivateConditional 12536 : ActionToDo::DoNotPush) { 12537 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12538 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12539 return; 12540 assert(Action == ActionToDo::PushAsLastprivateConditional && 12541 "Expected a push action."); 12542 LastprivateConditionalData &Data = 12543 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12544 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12545 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12546 continue; 12547 12548 for (const Expr *Ref : C->varlists()) { 12549 Data.DeclToUniqueName.insert(std::make_pair( 12550 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12551 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12552 } 12553 } 12554 Data.IVLVal = IVLVal; 12555 Data.Fn = CGF.CurFn; 12556 } 12557 12558 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12559 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12560 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12561 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12562 if (CGM.getLangOpts().OpenMP < 50) 12563 return; 12564 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12565 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12566 if (!NeedToAddForLPCsAsDisabled.empty()) { 12567 Action = ActionToDo::DisableLastprivateConditional; 12568 LastprivateConditionalData &Data = 12569 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12570 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12571 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12572 Data.Fn = CGF.CurFn; 12573 Data.Disabled = true; 12574 } 12575 } 12576 12577 CGOpenMPRuntime::LastprivateConditionalRAII 12578 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12579 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12580 return LastprivateConditionalRAII(CGF, S); 12581 } 12582 12583 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12584 if (CGM.getLangOpts().OpenMP < 50) 12585 return; 12586 if (Action == ActionToDo::DisableLastprivateConditional) { 12587 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12588 "Expected list of disabled private vars."); 12589 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12590 } 12591 if (Action == ActionToDo::PushAsLastprivateConditional) { 12592 assert( 12593 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12594 "Expected list of lastprivate conditional vars."); 12595 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12596 } 12597 } 12598 12599 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12600 const VarDecl *VD) { 12601 ASTContext &C = CGM.getContext(); 12602 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12603 if (I == LastprivateConditionalToTypes.end()) 12604 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12605 QualType NewType; 12606 const FieldDecl *VDField; 12607 const FieldDecl *FiredField; 12608 LValue BaseLVal; 12609 auto VI = I->getSecond().find(VD); 12610 if (VI == I->getSecond().end()) { 12611 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12612 RD->startDefinition(); 12613 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12614 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12615 RD->completeDefinition(); 12616 NewType = C.getRecordType(RD); 12617 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12618 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12619 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12620 } else { 12621 NewType = std::get<0>(VI->getSecond()); 12622 VDField = std::get<1>(VI->getSecond()); 12623 FiredField = std::get<2>(VI->getSecond()); 12624 BaseLVal = std::get<3>(VI->getSecond()); 12625 } 12626 LValue FiredLVal = 12627 CGF.EmitLValueForField(BaseLVal, FiredField); 12628 CGF.EmitStoreOfScalar( 12629 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12630 FiredLVal); 12631 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12632 } 12633 12634 namespace { 12635 /// Checks if the lastprivate conditional variable is referenced in LHS. 12636 class LastprivateConditionalRefChecker final 12637 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12638 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12639 const Expr *FoundE = nullptr; 12640 const Decl *FoundD = nullptr; 12641 StringRef UniqueDeclName; 12642 LValue IVLVal; 12643 llvm::Function *FoundFn = nullptr; 12644 SourceLocation Loc; 12645 12646 public: 12647 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12648 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12649 llvm::reverse(LPM)) { 12650 auto It = D.DeclToUniqueName.find(E->getDecl()); 12651 if (It == D.DeclToUniqueName.end()) 12652 continue; 12653 if (D.Disabled) 12654 return false; 12655 FoundE = E; 12656 FoundD = E->getDecl()->getCanonicalDecl(); 12657 UniqueDeclName = It->second; 12658 IVLVal = D.IVLVal; 12659 FoundFn = D.Fn; 12660 break; 12661 } 12662 return FoundE == E; 12663 } 12664 bool VisitMemberExpr(const MemberExpr *E) { 12665 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12666 return false; 12667 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12668 llvm::reverse(LPM)) { 12669 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12670 if (It == D.DeclToUniqueName.end()) 12671 continue; 12672 if (D.Disabled) 12673 return false; 12674 FoundE = E; 12675 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12676 UniqueDeclName = It->second; 12677 IVLVal = D.IVLVal; 12678 FoundFn = D.Fn; 12679 break; 12680 } 12681 return FoundE == E; 12682 } 12683 bool VisitStmt(const Stmt *S) { 12684 for (const Stmt *Child : S->children()) { 12685 if (!Child) 12686 continue; 12687 if (const auto *E = dyn_cast<Expr>(Child)) 12688 if (!E->isGLValue()) 12689 continue; 12690 if (Visit(Child)) 12691 return true; 12692 } 12693 return false; 12694 } 12695 explicit LastprivateConditionalRefChecker( 12696 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12697 : LPM(LPM) {} 12698 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12699 getFoundData() const { 12700 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12701 } 12702 }; 12703 } // namespace 12704 12705 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12706 LValue IVLVal, 12707 StringRef UniqueDeclName, 12708 LValue LVal, 12709 SourceLocation Loc) { 12710 // Last updated loop counter for the lastprivate conditional var. 12711 // int<xx> last_iv = 0; 12712 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12713 llvm::Constant *LastIV = 12714 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12715 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12716 IVLVal.getAlignment().getAsAlign()); 12717 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12718 12719 // Last value of the lastprivate conditional. 12720 // decltype(priv_a) last_a; 12721 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12722 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12723 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12724 LValue LastLVal = CGF.MakeAddrLValue( 12725 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12726 12727 // Global loop counter. Required to handle inner parallel-for regions. 12728 // iv 12729 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12730 12731 // #pragma omp critical(a) 12732 // if (last_iv <= iv) { 12733 // last_iv = iv; 12734 // last_a = priv_a; 12735 // } 12736 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12737 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12738 Action.Enter(CGF); 12739 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12740 // (last_iv <= iv) ? Check if the variable is updated and store new 12741 // value in global var. 12742 llvm::Value *CmpRes; 12743 if (IVLVal.getType()->isSignedIntegerType()) { 12744 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12745 } else { 12746 assert(IVLVal.getType()->isUnsignedIntegerType() && 12747 "Loop iteration variable must be integer."); 12748 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12749 } 12750 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12751 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12752 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12753 // { 12754 CGF.EmitBlock(ThenBB); 12755 12756 // last_iv = iv; 12757 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12758 12759 // last_a = priv_a; 12760 switch (CGF.getEvaluationKind(LVal.getType())) { 12761 case TEK_Scalar: { 12762 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12763 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12764 break; 12765 } 12766 case TEK_Complex: { 12767 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12768 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12769 break; 12770 } 12771 case TEK_Aggregate: 12772 llvm_unreachable( 12773 "Aggregates are not supported in lastprivate conditional."); 12774 } 12775 // } 12776 CGF.EmitBranch(ExitBB); 12777 // There is no need to emit line number for unconditional branch. 12778 (void)ApplyDebugLocation::CreateEmpty(CGF); 12779 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12780 }; 12781 12782 if (CGM.getLangOpts().OpenMPSimd) { 12783 // Do not emit as a critical region as no parallel region could be emitted. 12784 RegionCodeGenTy ThenRCG(CodeGen); 12785 ThenRCG(CGF); 12786 } else { 12787 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12788 } 12789 } 12790 12791 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12792 const Expr *LHS) { 12793 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12794 return; 12795 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12796 if (!Checker.Visit(LHS)) 12797 return; 12798 const Expr *FoundE; 12799 const Decl *FoundD; 12800 StringRef UniqueDeclName; 12801 LValue IVLVal; 12802 llvm::Function *FoundFn; 12803 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12804 Checker.getFoundData(); 12805 if (FoundFn != CGF.CurFn) { 12806 // Special codegen for inner parallel regions. 12807 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12808 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12809 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12810 "Lastprivate conditional is not found in outer region."); 12811 QualType StructTy = std::get<0>(It->getSecond()); 12812 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12813 LValue PrivLVal = CGF.EmitLValue(FoundE); 12814 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12815 PrivLVal.getAddress(CGF), 12816 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 12817 CGF.ConvertTypeForMem(StructTy)); 12818 LValue BaseLVal = 12819 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12820 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12821 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12822 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12823 FiredLVal, llvm::AtomicOrdering::Unordered, 12824 /*IsVolatile=*/true, /*isInit=*/false); 12825 return; 12826 } 12827 12828 // Private address of the lastprivate conditional in the current context. 12829 // priv_a 12830 LValue LVal = CGF.EmitLValue(FoundE); 12831 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12832 FoundE->getExprLoc()); 12833 } 12834 12835 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12836 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12837 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12838 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12839 return; 12840 auto Range = llvm::reverse(LastprivateConditionalStack); 12841 auto It = llvm::find_if( 12842 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12843 if (It == Range.end() || It->Fn != CGF.CurFn) 12844 return; 12845 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12846 assert(LPCI != LastprivateConditionalToTypes.end() && 12847 "Lastprivates must be registered already."); 12848 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12849 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12850 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12851 for (const auto &Pair : It->DeclToUniqueName) { 12852 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12853 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12854 continue; 12855 auto I = LPCI->getSecond().find(Pair.first); 12856 assert(I != LPCI->getSecond().end() && 12857 "Lastprivate must be rehistered already."); 12858 // bool Cmp = priv_a.Fired != 0; 12859 LValue BaseLVal = std::get<3>(I->getSecond()); 12860 LValue FiredLVal = 12861 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12862 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12863 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12864 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12865 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12866 // if (Cmp) { 12867 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12868 CGF.EmitBlock(ThenBB); 12869 Address Addr = CGF.GetAddrOfLocalVar(VD); 12870 LValue LVal; 12871 if (VD->getType()->isReferenceType()) 12872 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12873 AlignmentSource::Decl); 12874 else 12875 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12876 AlignmentSource::Decl); 12877 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12878 D.getBeginLoc()); 12879 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12880 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12881 // } 12882 } 12883 } 12884 12885 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12886 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12887 SourceLocation Loc) { 12888 if (CGF.getLangOpts().OpenMP < 50) 12889 return; 12890 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12891 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12892 "Unknown lastprivate conditional variable."); 12893 StringRef UniqueName = It->second; 12894 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12895 // The variable was not updated in the region - exit. 12896 if (!GV) 12897 return; 12898 LValue LPLVal = CGF.MakeAddrLValue( 12899 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12900 PrivLVal.getType().getNonReferenceType()); 12901 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12902 CGF.EmitStoreOfScalar(Res, PrivLVal); 12903 } 12904 12905 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12906 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12907 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12908 llvm_unreachable("Not supported in SIMD-only mode"); 12909 } 12910 12911 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12912 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12913 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12914 llvm_unreachable("Not supported in SIMD-only mode"); 12915 } 12916 12917 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12918 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12919 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12920 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12921 bool Tied, unsigned &NumberOfParts) { 12922 llvm_unreachable("Not supported in SIMD-only mode"); 12923 } 12924 12925 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12926 SourceLocation Loc, 12927 llvm::Function *OutlinedFn, 12928 ArrayRef<llvm::Value *> CapturedVars, 12929 const Expr *IfCond, 12930 llvm::Value *NumThreads) { 12931 llvm_unreachable("Not supported in SIMD-only mode"); 12932 } 12933 12934 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12935 CodeGenFunction &CGF, StringRef CriticalName, 12936 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12937 const Expr *Hint) { 12938 llvm_unreachable("Not supported in SIMD-only mode"); 12939 } 12940 12941 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12942 const RegionCodeGenTy &MasterOpGen, 12943 SourceLocation Loc) { 12944 llvm_unreachable("Not supported in SIMD-only mode"); 12945 } 12946 12947 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12948 const RegionCodeGenTy &MasterOpGen, 12949 SourceLocation Loc, 12950 const Expr *Filter) { 12951 llvm_unreachable("Not supported in SIMD-only mode"); 12952 } 12953 12954 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12955 SourceLocation Loc) { 12956 llvm_unreachable("Not supported in SIMD-only mode"); 12957 } 12958 12959 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12960 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12961 SourceLocation Loc) { 12962 llvm_unreachable("Not supported in SIMD-only mode"); 12963 } 12964 12965 void CGOpenMPSIMDRuntime::emitSingleRegion( 12966 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12967 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12968 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12969 ArrayRef<const Expr *> AssignmentOps) { 12970 llvm_unreachable("Not supported in SIMD-only mode"); 12971 } 12972 12973 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12974 const RegionCodeGenTy &OrderedOpGen, 12975 SourceLocation Loc, 12976 bool IsThreads) { 12977 llvm_unreachable("Not supported in SIMD-only mode"); 12978 } 12979 12980 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12981 SourceLocation Loc, 12982 OpenMPDirectiveKind Kind, 12983 bool EmitChecks, 12984 bool ForceSimpleCall) { 12985 llvm_unreachable("Not supported in SIMD-only mode"); 12986 } 12987 12988 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12989 CodeGenFunction &CGF, SourceLocation Loc, 12990 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12991 bool Ordered, const DispatchRTInput &DispatchValues) { 12992 llvm_unreachable("Not supported in SIMD-only mode"); 12993 } 12994 12995 void CGOpenMPSIMDRuntime::emitForStaticInit( 12996 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12997 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12998 llvm_unreachable("Not supported in SIMD-only mode"); 12999 } 13000 13001 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 13002 CodeGenFunction &CGF, SourceLocation Loc, 13003 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 13004 llvm_unreachable("Not supported in SIMD-only mode"); 13005 } 13006 13007 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 13008 SourceLocation Loc, 13009 unsigned IVSize, 13010 bool IVSigned) { 13011 llvm_unreachable("Not supported in SIMD-only mode"); 13012 } 13013 13014 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 13015 SourceLocation Loc, 13016 OpenMPDirectiveKind DKind) { 13017 llvm_unreachable("Not supported in SIMD-only mode"); 13018 } 13019 13020 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 13021 SourceLocation Loc, 13022 unsigned IVSize, bool IVSigned, 13023 Address IL, Address LB, 13024 Address UB, Address ST) { 13025 llvm_unreachable("Not supported in SIMD-only mode"); 13026 } 13027 13028 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 13029 llvm::Value *NumThreads, 13030 SourceLocation Loc) { 13031 llvm_unreachable("Not supported in SIMD-only mode"); 13032 } 13033 13034 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 13035 ProcBindKind ProcBind, 13036 SourceLocation Loc) { 13037 llvm_unreachable("Not supported in SIMD-only mode"); 13038 } 13039 13040 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 13041 const VarDecl *VD, 13042 Address VDAddr, 13043 SourceLocation Loc) { 13044 llvm_unreachable("Not supported in SIMD-only mode"); 13045 } 13046 13047 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 13048 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 13049 CodeGenFunction *CGF) { 13050 llvm_unreachable("Not supported in SIMD-only mode"); 13051 } 13052 13053 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 13054 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 13055 llvm_unreachable("Not supported in SIMD-only mode"); 13056 } 13057 13058 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13059 ArrayRef<const Expr *> Vars, 13060 SourceLocation Loc, 13061 llvm::AtomicOrdering AO) { 13062 llvm_unreachable("Not supported in SIMD-only mode"); 13063 } 13064 13065 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13066 const OMPExecutableDirective &D, 13067 llvm::Function *TaskFunction, 13068 QualType SharedsTy, Address Shareds, 13069 const Expr *IfCond, 13070 const OMPTaskDataTy &Data) { 13071 llvm_unreachable("Not supported in SIMD-only mode"); 13072 } 13073 13074 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13075 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13076 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13077 const Expr *IfCond, const OMPTaskDataTy &Data) { 13078 llvm_unreachable("Not supported in SIMD-only mode"); 13079 } 13080 13081 void CGOpenMPSIMDRuntime::emitReduction( 13082 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13083 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13084 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13085 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13086 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13087 ReductionOps, Options); 13088 } 13089 13090 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13091 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13092 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13093 llvm_unreachable("Not supported in SIMD-only mode"); 13094 } 13095 13096 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13097 SourceLocation Loc, 13098 bool IsWorksharingReduction) { 13099 llvm_unreachable("Not supported in SIMD-only mode"); 13100 } 13101 13102 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13103 SourceLocation Loc, 13104 ReductionCodeGen &RCG, 13105 unsigned N) { 13106 llvm_unreachable("Not supported in SIMD-only mode"); 13107 } 13108 13109 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13110 SourceLocation Loc, 13111 llvm::Value *ReductionsPtr, 13112 LValue SharedLVal) { 13113 llvm_unreachable("Not supported in SIMD-only mode"); 13114 } 13115 13116 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13117 SourceLocation Loc, 13118 const OMPTaskDataTy &Data) { 13119 llvm_unreachable("Not supported in SIMD-only mode"); 13120 } 13121 13122 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13123 CodeGenFunction &CGF, SourceLocation Loc, 13124 OpenMPDirectiveKind CancelRegion) { 13125 llvm_unreachable("Not supported in SIMD-only mode"); 13126 } 13127 13128 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13129 SourceLocation Loc, const Expr *IfCond, 13130 OpenMPDirectiveKind CancelRegion) { 13131 llvm_unreachable("Not supported in SIMD-only mode"); 13132 } 13133 13134 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13135 const OMPExecutableDirective &D, StringRef ParentName, 13136 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13137 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13138 llvm_unreachable("Not supported in SIMD-only mode"); 13139 } 13140 13141 void CGOpenMPSIMDRuntime::emitTargetCall( 13142 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13143 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13144 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13145 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13146 const OMPLoopDirective &D)> 13147 SizeEmitter) { 13148 llvm_unreachable("Not supported in SIMD-only mode"); 13149 } 13150 13151 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13152 llvm_unreachable("Not supported in SIMD-only mode"); 13153 } 13154 13155 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13156 llvm_unreachable("Not supported in SIMD-only mode"); 13157 } 13158 13159 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13160 return false; 13161 } 13162 13163 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13164 const OMPExecutableDirective &D, 13165 SourceLocation Loc, 13166 llvm::Function *OutlinedFn, 13167 ArrayRef<llvm::Value *> CapturedVars) { 13168 llvm_unreachable("Not supported in SIMD-only mode"); 13169 } 13170 13171 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13172 const Expr *NumTeams, 13173 const Expr *ThreadLimit, 13174 SourceLocation Loc) { 13175 llvm_unreachable("Not supported in SIMD-only mode"); 13176 } 13177 13178 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13179 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13180 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13181 llvm_unreachable("Not supported in SIMD-only mode"); 13182 } 13183 13184 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13185 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13186 const Expr *Device) { 13187 llvm_unreachable("Not supported in SIMD-only mode"); 13188 } 13189 13190 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13191 const OMPLoopDirective &D, 13192 ArrayRef<Expr *> NumIterations) { 13193 llvm_unreachable("Not supported in SIMD-only mode"); 13194 } 13195 13196 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13197 const OMPDependClause *C) { 13198 llvm_unreachable("Not supported in SIMD-only mode"); 13199 } 13200 13201 const VarDecl * 13202 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13203 const VarDecl *NativeParam) const { 13204 llvm_unreachable("Not supported in SIMD-only mode"); 13205 } 13206 13207 Address 13208 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13209 const VarDecl *NativeParam, 13210 const VarDecl *TargetParam) const { 13211 llvm_unreachable("Not supported in SIMD-only mode"); 13212 } 13213