1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/StringExtras.h" 33 #include "llvm/Bitcode/BitcodeReader.h" 34 #include "llvm/IR/Constants.h" 35 #include "llvm/IR/DerivedTypes.h" 36 #include "llvm/IR/GlobalValue.h" 37 #include "llvm/IR/Value.h" 38 #include "llvm/Support/AtomicOrdering.h" 39 #include "llvm/Support/Format.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <cassert> 42 #include <numeric> 43 44 using namespace clang; 45 using namespace CodeGen; 46 using namespace llvm::omp; 47 48 namespace { 49 /// Base class for handling code generation inside OpenMP regions. 50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 51 public: 52 /// Kinds of OpenMP regions used in codegen. 53 enum CGOpenMPRegionKind { 54 /// Region with outlined function for standalone 'parallel' 55 /// directive. 56 ParallelOutlinedRegion, 57 /// Region with outlined function for standalone 'task' directive. 58 TaskOutlinedRegion, 59 /// Region for constructs that do not require function outlining, 60 /// like 'for', 'sections', 'atomic' etc. directives. 61 InlinedRegion, 62 /// Region with outlined function for standalone 'target' directive. 63 TargetRegion, 64 }; 65 66 CGOpenMPRegionInfo(const CapturedStmt &CS, 67 const CGOpenMPRegionKind RegionKind, 68 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 69 bool HasCancel) 70 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 71 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 72 73 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 75 bool HasCancel) 76 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 77 Kind(Kind), HasCancel(HasCancel) {} 78 79 /// Get a variable or parameter for storing global thread id 80 /// inside OpenMP construct. 81 virtual const VarDecl *getThreadIDVariable() const = 0; 82 83 /// Emit the captured statement body. 84 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 85 86 /// Get an LValue for the current ThreadID variable. 87 /// \return LValue for thread id variable. This LValue always has type int32*. 88 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 89 90 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 91 92 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 93 94 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 95 96 bool hasCancel() const { return HasCancel; } 97 98 static bool classof(const CGCapturedStmtInfo *Info) { 99 return Info->getKind() == CR_OpenMP; 100 } 101 102 ~CGOpenMPRegionInfo() override = default; 103 104 protected: 105 CGOpenMPRegionKind RegionKind; 106 RegionCodeGenTy CodeGen; 107 OpenMPDirectiveKind Kind; 108 bool HasCancel; 109 }; 110 111 /// API for captured statement code generation in OpenMP constructs. 112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 113 public: 114 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 115 const RegionCodeGenTy &CodeGen, 116 OpenMPDirectiveKind Kind, bool HasCancel, 117 StringRef HelperName) 118 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 119 HasCancel), 120 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 121 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 122 } 123 124 /// Get a variable or parameter for storing global thread id 125 /// inside OpenMP construct. 126 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 127 128 /// Get the name of the capture helper. 129 StringRef getHelperName() const override { return HelperName; } 130 131 static bool classof(const CGCapturedStmtInfo *Info) { 132 return CGOpenMPRegionInfo::classof(Info) && 133 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 134 ParallelOutlinedRegion; 135 } 136 137 private: 138 /// A variable or parameter storing global thread id for OpenMP 139 /// constructs. 140 const VarDecl *ThreadIDVar; 141 StringRef HelperName; 142 }; 143 144 /// API for captured statement code generation in OpenMP constructs. 145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 146 public: 147 class UntiedTaskActionTy final : public PrePostActionTy { 148 bool Untied; 149 const VarDecl *PartIDVar; 150 const RegionCodeGenTy UntiedCodeGen; 151 llvm::SwitchInst *UntiedSwitch = nullptr; 152 153 public: 154 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 155 const RegionCodeGenTy &UntiedCodeGen) 156 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 157 void Enter(CodeGenFunction &CGF) override { 158 if (Untied) { 159 // Emit task switching point. 160 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 161 CGF.GetAddrOfLocalVar(PartIDVar), 162 PartIDVar->getType()->castAs<PointerType>()); 163 llvm::Value *Res = 164 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 165 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 166 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 167 CGF.EmitBlock(DoneBB); 168 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 169 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 170 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 171 CGF.Builder.GetInsertBlock()); 172 emitUntiedSwitch(CGF); 173 } 174 } 175 void emitUntiedSwitch(CodeGenFunction &CGF) const { 176 if (Untied) { 177 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 178 CGF.GetAddrOfLocalVar(PartIDVar), 179 PartIDVar->getType()->castAs<PointerType>()); 180 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 181 PartIdLVal); 182 UntiedCodeGen(CGF); 183 CodeGenFunction::JumpDest CurPoint = 184 CGF.getJumpDestInCurrentScope(".untied.next."); 185 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 186 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 187 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 188 CGF.Builder.GetInsertBlock()); 189 CGF.EmitBranchThroughCleanup(CurPoint); 190 CGF.EmitBlock(CurPoint.getBlock()); 191 } 192 } 193 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 194 }; 195 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 196 const VarDecl *ThreadIDVar, 197 const RegionCodeGenTy &CodeGen, 198 OpenMPDirectiveKind Kind, bool HasCancel, 199 const UntiedTaskActionTy &Action) 200 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 201 ThreadIDVar(ThreadIDVar), Action(Action) { 202 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 203 } 204 205 /// Get a variable or parameter for storing global thread id 206 /// inside OpenMP construct. 207 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 208 209 /// Get an LValue for the current ThreadID variable. 210 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 211 212 /// Get the name of the capture helper. 213 StringRef getHelperName() const override { return ".omp_outlined."; } 214 215 void emitUntiedSwitch(CodeGenFunction &CGF) override { 216 Action.emitUntiedSwitch(CGF); 217 } 218 219 static bool classof(const CGCapturedStmtInfo *Info) { 220 return CGOpenMPRegionInfo::classof(Info) && 221 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 222 TaskOutlinedRegion; 223 } 224 225 private: 226 /// A variable or parameter storing global thread id for OpenMP 227 /// constructs. 228 const VarDecl *ThreadIDVar; 229 /// Action for emitting code for untied tasks. 230 const UntiedTaskActionTy &Action; 231 }; 232 233 /// API for inlined captured statement code generation in OpenMP 234 /// constructs. 235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 236 public: 237 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 238 const RegionCodeGenTy &CodeGen, 239 OpenMPDirectiveKind Kind, bool HasCancel) 240 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 241 OldCSI(OldCSI), 242 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 243 244 // Retrieve the value of the context parameter. 245 llvm::Value *getContextValue() const override { 246 if (OuterRegionInfo) 247 return OuterRegionInfo->getContextValue(); 248 llvm_unreachable("No context value for inlined OpenMP region"); 249 } 250 251 void setContextValue(llvm::Value *V) override { 252 if (OuterRegionInfo) { 253 OuterRegionInfo->setContextValue(V); 254 return; 255 } 256 llvm_unreachable("No context value for inlined OpenMP region"); 257 } 258 259 /// Lookup the captured field decl for a variable. 260 const FieldDecl *lookup(const VarDecl *VD) const override { 261 if (OuterRegionInfo) 262 return OuterRegionInfo->lookup(VD); 263 // If there is no outer outlined region,no need to lookup in a list of 264 // captured variables, we can use the original one. 265 return nullptr; 266 } 267 268 FieldDecl *getThisFieldDecl() const override { 269 if (OuterRegionInfo) 270 return OuterRegionInfo->getThisFieldDecl(); 271 return nullptr; 272 } 273 274 /// Get a variable or parameter for storing global thread id 275 /// inside OpenMP construct. 276 const VarDecl *getThreadIDVariable() const override { 277 if (OuterRegionInfo) 278 return OuterRegionInfo->getThreadIDVariable(); 279 return nullptr; 280 } 281 282 /// Get an LValue for the current ThreadID variable. 283 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 284 if (OuterRegionInfo) 285 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 286 llvm_unreachable("No LValue for inlined OpenMP construct"); 287 } 288 289 /// Get the name of the capture helper. 290 StringRef getHelperName() const override { 291 if (auto *OuterRegionInfo = getOldCSI()) 292 return OuterRegionInfo->getHelperName(); 293 llvm_unreachable("No helper name for inlined OpenMP construct"); 294 } 295 296 void emitUntiedSwitch(CodeGenFunction &CGF) override { 297 if (OuterRegionInfo) 298 OuterRegionInfo->emitUntiedSwitch(CGF); 299 } 300 301 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 302 303 static bool classof(const CGCapturedStmtInfo *Info) { 304 return CGOpenMPRegionInfo::classof(Info) && 305 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 306 } 307 308 ~CGOpenMPInlinedRegionInfo() override = default; 309 310 private: 311 /// CodeGen info about outer OpenMP region. 312 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 313 CGOpenMPRegionInfo *OuterRegionInfo; 314 }; 315 316 /// API for captured statement code generation in OpenMP target 317 /// constructs. For this captures, implicit parameters are used instead of the 318 /// captured fields. The name of the target region has to be unique in a given 319 /// application so it is provided by the client, because only the client has 320 /// the information to generate that. 321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 322 public: 323 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 324 const RegionCodeGenTy &CodeGen, StringRef HelperName) 325 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 326 /*HasCancel=*/false), 327 HelperName(HelperName) {} 328 329 /// This is unused for target regions because each starts executing 330 /// with a single thread. 331 const VarDecl *getThreadIDVariable() const override { return nullptr; } 332 333 /// Get the name of the capture helper. 334 StringRef getHelperName() const override { return HelperName; } 335 336 static bool classof(const CGCapturedStmtInfo *Info) { 337 return CGOpenMPRegionInfo::classof(Info) && 338 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 339 } 340 341 private: 342 StringRef HelperName; 343 }; 344 345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 346 llvm_unreachable("No codegen for expressions"); 347 } 348 /// API for generation of expressions captured in a innermost OpenMP 349 /// region. 350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 351 public: 352 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 353 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 354 OMPD_unknown, 355 /*HasCancel=*/false), 356 PrivScope(CGF) { 357 // Make sure the globals captured in the provided statement are local by 358 // using the privatization logic. We assume the same variable is not 359 // captured more than once. 360 for (const auto &C : CS.captures()) { 361 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 362 continue; 363 364 const VarDecl *VD = C.getCapturedVar(); 365 if (VD->isLocalVarDeclOrParm()) 366 continue; 367 368 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 369 /*RefersToEnclosingVariableOrCapture=*/false, 370 VD->getType().getNonReferenceType(), VK_LValue, 371 C.getLocation()); 372 PrivScope.addPrivate( 373 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 374 } 375 (void)PrivScope.Privatize(); 376 } 377 378 /// Lookup the captured field decl for a variable. 379 const FieldDecl *lookup(const VarDecl *VD) const override { 380 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 381 return FD; 382 return nullptr; 383 } 384 385 /// Emit the captured statement body. 386 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 387 llvm_unreachable("No body for expressions"); 388 } 389 390 /// Get a variable or parameter for storing global thread id 391 /// inside OpenMP construct. 392 const VarDecl *getThreadIDVariable() const override { 393 llvm_unreachable("No thread id for expressions"); 394 } 395 396 /// Get the name of the capture helper. 397 StringRef getHelperName() const override { 398 llvm_unreachable("No helper name for expressions"); 399 } 400 401 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 402 403 private: 404 /// Private scope to capture global variables. 405 CodeGenFunction::OMPPrivateScope PrivScope; 406 }; 407 408 /// RAII for emitting code of OpenMP constructs. 409 class InlinedOpenMPRegionRAII { 410 CodeGenFunction &CGF; 411 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 412 FieldDecl *LambdaThisCaptureField = nullptr; 413 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 414 bool NoInheritance = false; 415 416 public: 417 /// Constructs region for combined constructs. 418 /// \param CodeGen Code generation sequence for combined directives. Includes 419 /// a list of functions used for code generation of implicitly inlined 420 /// regions. 421 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 422 OpenMPDirectiveKind Kind, bool HasCancel, 423 bool NoInheritance = true) 424 : CGF(CGF), NoInheritance(NoInheritance) { 425 // Start emission for the construct. 426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 428 if (NoInheritance) { 429 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 430 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 431 CGF.LambdaThisCaptureField = nullptr; 432 BlockInfo = CGF.BlockInfo; 433 CGF.BlockInfo = nullptr; 434 } 435 } 436 437 ~InlinedOpenMPRegionRAII() { 438 // Restore original CapturedStmtInfo only if we're done with code emission. 439 auto *OldCSI = 440 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 441 delete CGF.CapturedStmtInfo; 442 CGF.CapturedStmtInfo = OldCSI; 443 if (NoInheritance) { 444 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 445 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 446 CGF.BlockInfo = BlockInfo; 447 } 448 } 449 }; 450 451 /// Values for bit flags used in the ident_t to describe the fields. 452 /// All enumeric elements are named and described in accordance with the code 453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 454 enum OpenMPLocationFlags : unsigned { 455 /// Use trampoline for internal microtask. 456 OMP_IDENT_IMD = 0x01, 457 /// Use c-style ident structure. 458 OMP_IDENT_KMPC = 0x02, 459 /// Atomic reduction option for kmpc_reduce. 460 OMP_ATOMIC_REDUCE = 0x10, 461 /// Explicit 'barrier' directive. 462 OMP_IDENT_BARRIER_EXPL = 0x20, 463 /// Implicit barrier in code. 464 OMP_IDENT_BARRIER_IMPL = 0x40, 465 /// Implicit barrier in 'for' directive. 466 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 467 /// Implicit barrier in 'sections' directive. 468 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 469 /// Implicit barrier in 'single' directive. 470 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 471 /// Call of __kmp_for_static_init for static loop. 472 OMP_IDENT_WORK_LOOP = 0x200, 473 /// Call of __kmp_for_static_init for sections. 474 OMP_IDENT_WORK_SECTIONS = 0x400, 475 /// Call of __kmp_for_static_init for distribute. 476 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 478 }; 479 480 namespace { 481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 482 /// Values for bit flags for marking which requires clauses have been used. 483 enum OpenMPOffloadingRequiresDirFlags : int64_t { 484 /// flag undefined. 485 OMP_REQ_UNDEFINED = 0x000, 486 /// no requires clause present. 487 OMP_REQ_NONE = 0x001, 488 /// reverse_offload clause. 489 OMP_REQ_REVERSE_OFFLOAD = 0x002, 490 /// unified_address clause. 491 OMP_REQ_UNIFIED_ADDRESS = 0x004, 492 /// unified_shared_memory clause. 493 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 494 /// dynamic_allocators clause. 495 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 496 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 497 }; 498 499 enum OpenMPOffloadingReservedDeviceIDs { 500 /// Device ID if the device was not defined, runtime should get it 501 /// from environment variables in the spec. 502 OMP_DEVICEID_UNDEF = -1, 503 }; 504 } // anonymous namespace 505 506 /// Describes ident structure that describes a source location. 507 /// All descriptions are taken from 508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 509 /// Original structure: 510 /// typedef struct ident { 511 /// kmp_int32 reserved_1; /**< might be used in Fortran; 512 /// see above */ 513 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 514 /// KMP_IDENT_KMPC identifies this union 515 /// member */ 516 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 517 /// see above */ 518 ///#if USE_ITT_BUILD 519 /// /* but currently used for storing 520 /// region-specific ITT */ 521 /// /* contextual information. */ 522 ///#endif /* USE_ITT_BUILD */ 523 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 524 /// C++ */ 525 /// char const *psource; /**< String describing the source location. 526 /// The string is composed of semi-colon separated 527 // fields which describe the source file, 528 /// the function and a pair of line numbers that 529 /// delimit the construct. 530 /// */ 531 /// } ident_t; 532 enum IdentFieldIndex { 533 /// might be used in Fortran 534 IdentField_Reserved_1, 535 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 536 IdentField_Flags, 537 /// Not really used in Fortran any more 538 IdentField_Reserved_2, 539 /// Source[4] in Fortran, do not use for C++ 540 IdentField_Reserved_3, 541 /// String describing the source location. The string is composed of 542 /// semi-colon separated fields which describe the source file, the function 543 /// and a pair of line numbers that delimit the construct. 544 IdentField_PSource 545 }; 546 547 /// Schedule types for 'omp for' loops (these enumerators are taken from 548 /// the enum sched_type in kmp.h). 549 enum OpenMPSchedType { 550 /// Lower bound for default (unordered) versions. 551 OMP_sch_lower = 32, 552 OMP_sch_static_chunked = 33, 553 OMP_sch_static = 34, 554 OMP_sch_dynamic_chunked = 35, 555 OMP_sch_guided_chunked = 36, 556 OMP_sch_runtime = 37, 557 OMP_sch_auto = 38, 558 /// static with chunk adjustment (e.g., simd) 559 OMP_sch_static_balanced_chunked = 45, 560 /// Lower bound for 'ordered' versions. 561 OMP_ord_lower = 64, 562 OMP_ord_static_chunked = 65, 563 OMP_ord_static = 66, 564 OMP_ord_dynamic_chunked = 67, 565 OMP_ord_guided_chunked = 68, 566 OMP_ord_runtime = 69, 567 OMP_ord_auto = 70, 568 OMP_sch_default = OMP_sch_static, 569 /// dist_schedule types 570 OMP_dist_sch_static_chunked = 91, 571 OMP_dist_sch_static = 92, 572 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 573 /// Set if the monotonic schedule modifier was present. 574 OMP_sch_modifier_monotonic = (1 << 29), 575 /// Set if the nonmonotonic schedule modifier was present. 576 OMP_sch_modifier_nonmonotonic = (1 << 30), 577 }; 578 579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 580 /// region. 581 class CleanupTy final : public EHScopeStack::Cleanup { 582 PrePostActionTy *Action; 583 584 public: 585 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 586 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 587 if (!CGF.HaveInsertPoint()) 588 return; 589 Action->Exit(CGF); 590 } 591 }; 592 593 } // anonymous namespace 594 595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 596 CodeGenFunction::RunCleanupsScope Scope(CGF); 597 if (PrePostAction) { 598 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 599 Callback(CodeGen, CGF, *PrePostAction); 600 } else { 601 PrePostActionTy Action; 602 Callback(CodeGen, CGF, Action); 603 } 604 } 605 606 /// Check if the combiner is a call to UDR combiner and if it is so return the 607 /// UDR decl used for reduction. 608 static const OMPDeclareReductionDecl * 609 getReductionInit(const Expr *ReductionOp) { 610 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 611 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 612 if (const auto *DRE = 613 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 614 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 615 return DRD; 616 return nullptr; 617 } 618 619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 620 const OMPDeclareReductionDecl *DRD, 621 const Expr *InitOp, 622 Address Private, Address Original, 623 QualType Ty) { 624 if (DRD->getInitializer()) { 625 std::pair<llvm::Function *, llvm::Function *> Reduction = 626 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 627 const auto *CE = cast<CallExpr>(InitOp); 628 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 629 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 630 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 631 const auto *LHSDRE = 632 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 633 const auto *RHSDRE = 634 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 635 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 636 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 637 [=]() { return Private; }); 638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 639 [=]() { return Original; }); 640 (void)PrivateScope.Privatize(); 641 RValue Func = RValue::get(Reduction.second); 642 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 643 CGF.EmitIgnoredExpr(InitOp); 644 } else { 645 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 646 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 647 auto *GV = new llvm::GlobalVariable( 648 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 649 llvm::GlobalValue::PrivateLinkage, Init, Name); 650 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 651 RValue InitRVal; 652 switch (CGF.getEvaluationKind(Ty)) { 653 case TEK_Scalar: 654 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 655 break; 656 case TEK_Complex: 657 InitRVal = 658 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 659 break; 660 case TEK_Aggregate: { 661 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 662 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 663 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 664 /*IsInitializer=*/false); 665 return; 666 } 667 } 668 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 669 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 670 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 671 /*IsInitializer=*/false); 672 } 673 } 674 675 /// Emit initialization of arrays of complex types. 676 /// \param DestAddr Address of the array. 677 /// \param Type Type of array. 678 /// \param Init Initial expression of array. 679 /// \param SrcAddr Address of the original array. 680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 681 QualType Type, bool EmitDeclareReductionInit, 682 const Expr *Init, 683 const OMPDeclareReductionDecl *DRD, 684 Address SrcAddr = Address::invalid()) { 685 // Perform element-by-element initialization. 686 QualType ElementTy; 687 688 // Drill down to the base element type on both arrays. 689 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 690 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 691 DestAddr = 692 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 693 if (DRD) 694 SrcAddr = 695 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 696 697 llvm::Value *SrcBegin = nullptr; 698 if (DRD) 699 SrcBegin = SrcAddr.getPointer(); 700 llvm::Value *DestBegin = DestAddr.getPointer(); 701 // Cast from pointer to array type to pointer to single element. 702 llvm::Value *DestEnd = 703 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 704 // The basic structure here is a while-do loop. 705 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 706 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 707 llvm::Value *IsEmpty = 708 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 709 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 710 711 // Enter the loop body, making that address the current address. 712 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 713 CGF.EmitBlock(BodyBB); 714 715 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 716 717 llvm::PHINode *SrcElementPHI = nullptr; 718 Address SrcElementCurrent = Address::invalid(); 719 if (DRD) { 720 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 721 "omp.arraycpy.srcElementPast"); 722 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 723 SrcElementCurrent = 724 Address(SrcElementPHI, 725 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 726 } 727 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 728 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 729 DestElementPHI->addIncoming(DestBegin, EntryBB); 730 Address DestElementCurrent = 731 Address(DestElementPHI, 732 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 733 734 // Emit copy. 735 { 736 CodeGenFunction::RunCleanupsScope InitScope(CGF); 737 if (EmitDeclareReductionInit) { 738 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 739 SrcElementCurrent, ElementTy); 740 } else 741 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 742 /*IsInitializer=*/false); 743 } 744 745 if (DRD) { 746 // Shift the address forward by one element. 747 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 748 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 749 "omp.arraycpy.dest.element"); 750 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 751 } 752 753 // Shift the address forward by one element. 754 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 755 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 756 "omp.arraycpy.dest.element"); 757 // Check whether we've reached the end. 758 llvm::Value *Done = 759 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 760 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 761 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 762 763 // Done. 764 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 765 } 766 767 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 768 return CGF.EmitOMPSharedLValue(E); 769 } 770 771 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 772 const Expr *E) { 773 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 774 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 775 return LValue(); 776 } 777 778 void ReductionCodeGen::emitAggregateInitialization( 779 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 780 const OMPDeclareReductionDecl *DRD) { 781 // Emit VarDecl with copy init for arrays. 782 // Get the address of the original variable captured in current 783 // captured region. 784 const auto *PrivateVD = 785 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 786 bool EmitDeclareReductionInit = 787 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 788 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 789 EmitDeclareReductionInit, 790 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 791 : PrivateVD->getInit(), 792 DRD, SharedLVal.getAddress(CGF)); 793 } 794 795 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 796 ArrayRef<const Expr *> Origs, 797 ArrayRef<const Expr *> Privates, 798 ArrayRef<const Expr *> ReductionOps) { 799 ClausesData.reserve(Shareds.size()); 800 SharedAddresses.reserve(Shareds.size()); 801 Sizes.reserve(Shareds.size()); 802 BaseDecls.reserve(Shareds.size()); 803 const auto *IOrig = Origs.begin(); 804 const auto *IPriv = Privates.begin(); 805 const auto *IRed = ReductionOps.begin(); 806 for (const Expr *Ref : Shareds) { 807 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 808 std::advance(IOrig, 1); 809 std::advance(IPriv, 1); 810 std::advance(IRed, 1); 811 } 812 } 813 814 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 815 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 816 "Number of generated lvalues must be exactly N."); 817 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 818 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 819 SharedAddresses.emplace_back(First, Second); 820 if (ClausesData[N].Shared == ClausesData[N].Ref) { 821 OrigAddresses.emplace_back(First, Second); 822 } else { 823 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 824 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 825 OrigAddresses.emplace_back(First, Second); 826 } 827 } 828 829 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 830 const auto *PrivateVD = 831 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 832 QualType PrivateType = PrivateVD->getType(); 833 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 834 if (!PrivateType->isVariablyModifiedType()) { 835 Sizes.emplace_back( 836 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 837 nullptr); 838 return; 839 } 840 llvm::Value *Size; 841 llvm::Value *SizeInChars; 842 auto *ElemType = 843 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 844 ->getElementType(); 845 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 846 if (AsArraySection) { 847 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 848 OrigAddresses[N].first.getPointer(CGF)); 849 Size = CGF.Builder.CreateNUWAdd( 850 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 851 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 852 } else { 853 SizeInChars = 854 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 855 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 856 } 857 Sizes.emplace_back(SizeInChars, Size); 858 CodeGenFunction::OpaqueValueMapping OpaqueMap( 859 CGF, 860 cast<OpaqueValueExpr>( 861 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 862 RValue::get(Size)); 863 CGF.EmitVariablyModifiedType(PrivateType); 864 } 865 866 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 867 llvm::Value *Size) { 868 const auto *PrivateVD = 869 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 870 QualType PrivateType = PrivateVD->getType(); 871 if (!PrivateType->isVariablyModifiedType()) { 872 assert(!Size && !Sizes[N].second && 873 "Size should be nullptr for non-variably modified reduction " 874 "items."); 875 return; 876 } 877 CodeGenFunction::OpaqueValueMapping OpaqueMap( 878 CGF, 879 cast<OpaqueValueExpr>( 880 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 881 RValue::get(Size)); 882 CGF.EmitVariablyModifiedType(PrivateType); 883 } 884 885 void ReductionCodeGen::emitInitialization( 886 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 887 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 888 assert(SharedAddresses.size() > N && "No variable was generated"); 889 const auto *PrivateVD = 890 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 891 const OMPDeclareReductionDecl *DRD = 892 getReductionInit(ClausesData[N].ReductionOp); 893 QualType PrivateType = PrivateVD->getType(); 894 PrivateAddr = CGF.Builder.CreateElementBitCast( 895 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 896 QualType SharedType = SharedAddresses[N].first.getType(); 897 SharedLVal = CGF.MakeAddrLValue( 898 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 899 CGF.ConvertTypeForMem(SharedType)), 900 SharedType, SharedAddresses[N].first.getBaseInfo(), 901 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 902 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 903 if (DRD && DRD->getInitializer()) 904 (void)DefaultInit(CGF); 905 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 906 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 907 (void)DefaultInit(CGF); 908 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 909 PrivateAddr, SharedLVal.getAddress(CGF), 910 SharedLVal.getType()); 911 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 912 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 913 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 914 PrivateVD->getType().getQualifiers(), 915 /*IsInitializer=*/false); 916 } 917 } 918 919 bool ReductionCodeGen::needCleanups(unsigned N) { 920 const auto *PrivateVD = 921 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 922 QualType PrivateType = PrivateVD->getType(); 923 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 924 return DTorKind != QualType::DK_none; 925 } 926 927 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 928 Address PrivateAddr) { 929 const auto *PrivateVD = 930 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 931 QualType PrivateType = PrivateVD->getType(); 932 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 933 if (needCleanups(N)) { 934 PrivateAddr = CGF.Builder.CreateElementBitCast( 935 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 936 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 937 } 938 } 939 940 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 941 LValue BaseLV) { 942 BaseTy = BaseTy.getNonReferenceType(); 943 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 944 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 945 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 946 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 947 } else { 948 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 949 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 950 } 951 BaseTy = BaseTy->getPointeeType(); 952 } 953 return CGF.MakeAddrLValue( 954 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 955 CGF.ConvertTypeForMem(ElTy)), 956 BaseLV.getType(), BaseLV.getBaseInfo(), 957 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 958 } 959 960 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 961 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 962 llvm::Value *Addr) { 963 Address Tmp = Address::invalid(); 964 Address TopTmp = Address::invalid(); 965 Address MostTopTmp = Address::invalid(); 966 BaseTy = BaseTy.getNonReferenceType(); 967 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 968 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 969 Tmp = CGF.CreateMemTemp(BaseTy); 970 if (TopTmp.isValid()) 971 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 972 else 973 MostTopTmp = Tmp; 974 TopTmp = Tmp; 975 BaseTy = BaseTy->getPointeeType(); 976 } 977 llvm::Type *Ty = BaseLVType; 978 if (Tmp.isValid()) 979 Ty = Tmp.getElementType(); 980 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 981 if (Tmp.isValid()) { 982 CGF.Builder.CreateStore(Addr, Tmp); 983 return MostTopTmp; 984 } 985 return Address(Addr, BaseLVAlignment); 986 } 987 988 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 989 const VarDecl *OrigVD = nullptr; 990 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 991 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 992 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 993 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 994 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 995 Base = TempASE->getBase()->IgnoreParenImpCasts(); 996 DE = cast<DeclRefExpr>(Base); 997 OrigVD = cast<VarDecl>(DE->getDecl()); 998 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 999 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1000 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1001 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1002 DE = cast<DeclRefExpr>(Base); 1003 OrigVD = cast<VarDecl>(DE->getDecl()); 1004 } 1005 return OrigVD; 1006 } 1007 1008 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1009 Address PrivateAddr) { 1010 const DeclRefExpr *DE; 1011 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1012 BaseDecls.emplace_back(OrigVD); 1013 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1014 LValue BaseLValue = 1015 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1016 OriginalBaseLValue); 1017 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1018 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1019 BaseLValue.getPointer(CGF), SharedAddr.getPointer()); 1020 llvm::Value *PrivatePointer = 1021 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1022 PrivateAddr.getPointer(), SharedAddr.getType()); 1023 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1024 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1025 return castToBase(CGF, OrigVD->getType(), 1026 SharedAddresses[N].first.getType(), 1027 OriginalBaseLValue.getAddress(CGF).getType(), 1028 OriginalBaseLValue.getAlignment(), Ptr); 1029 } 1030 BaseDecls.emplace_back( 1031 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1032 return PrivateAddr; 1033 } 1034 1035 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1036 const OMPDeclareReductionDecl *DRD = 1037 getReductionInit(ClausesData[N].ReductionOp); 1038 return DRD && DRD->getInitializer(); 1039 } 1040 1041 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1042 return CGF.EmitLoadOfPointerLValue( 1043 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1044 getThreadIDVariable()->getType()->castAs<PointerType>()); 1045 } 1046 1047 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1048 if (!CGF.HaveInsertPoint()) 1049 return; 1050 // 1.2.2 OpenMP Language Terminology 1051 // Structured block - An executable statement with a single entry at the 1052 // top and a single exit at the bottom. 1053 // The point of exit cannot be a branch out of the structured block. 1054 // longjmp() and throw() must not violate the entry/exit criteria. 1055 CGF.EHStack.pushTerminate(); 1056 if (S) 1057 CGF.incrementProfileCounter(S); 1058 CodeGen(CGF); 1059 CGF.EHStack.popTerminate(); 1060 } 1061 1062 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1063 CodeGenFunction &CGF) { 1064 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1065 getThreadIDVariable()->getType(), 1066 AlignmentSource::Decl); 1067 } 1068 1069 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1070 QualType FieldTy) { 1071 auto *Field = FieldDecl::Create( 1072 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1073 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1074 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1075 Field->setAccess(AS_public); 1076 DC->addDecl(Field); 1077 return Field; 1078 } 1079 1080 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1081 StringRef Separator) 1082 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1083 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1084 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1085 1086 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1087 OMPBuilder.initialize(); 1088 loadOffloadInfoMetadata(); 1089 } 1090 1091 void CGOpenMPRuntime::clear() { 1092 InternalVars.clear(); 1093 // Clean non-target variable declarations possibly used only in debug info. 1094 for (const auto &Data : EmittedNonTargetVariables) { 1095 if (!Data.getValue().pointsToAliveValue()) 1096 continue; 1097 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1098 if (!GV) 1099 continue; 1100 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1101 continue; 1102 GV->eraseFromParent(); 1103 } 1104 } 1105 1106 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1107 SmallString<128> Buffer; 1108 llvm::raw_svector_ostream OS(Buffer); 1109 StringRef Sep = FirstSeparator; 1110 for (StringRef Part : Parts) { 1111 OS << Sep << Part; 1112 Sep = Separator; 1113 } 1114 return std::string(OS.str()); 1115 } 1116 1117 static llvm::Function * 1118 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1119 const Expr *CombinerInitializer, const VarDecl *In, 1120 const VarDecl *Out, bool IsCombiner) { 1121 // void .omp_combiner.(Ty *in, Ty *out); 1122 ASTContext &C = CGM.getContext(); 1123 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1124 FunctionArgList Args; 1125 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1126 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1127 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1128 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1129 Args.push_back(&OmpOutParm); 1130 Args.push_back(&OmpInParm); 1131 const CGFunctionInfo &FnInfo = 1132 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1133 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1134 std::string Name = CGM.getOpenMPRuntime().getName( 1135 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1136 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1137 Name, &CGM.getModule()); 1138 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1139 if (CGM.getLangOpts().Optimize) { 1140 Fn->removeFnAttr(llvm::Attribute::NoInline); 1141 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1142 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1143 } 1144 CodeGenFunction CGF(CGM); 1145 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1146 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1147 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1148 Out->getLocation()); 1149 CodeGenFunction::OMPPrivateScope Scope(CGF); 1150 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1151 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1152 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1153 .getAddress(CGF); 1154 }); 1155 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1156 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1157 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1158 .getAddress(CGF); 1159 }); 1160 (void)Scope.Privatize(); 1161 if (!IsCombiner && Out->hasInit() && 1162 !CGF.isTrivialInitializer(Out->getInit())) { 1163 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1164 Out->getType().getQualifiers(), 1165 /*IsInitializer=*/true); 1166 } 1167 if (CombinerInitializer) 1168 CGF.EmitIgnoredExpr(CombinerInitializer); 1169 Scope.ForceCleanup(); 1170 CGF.FinishFunction(); 1171 return Fn; 1172 } 1173 1174 void CGOpenMPRuntime::emitUserDefinedReduction( 1175 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1176 if (UDRMap.count(D) > 0) 1177 return; 1178 llvm::Function *Combiner = emitCombinerOrInitializer( 1179 CGM, D->getType(), D->getCombiner(), 1180 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1181 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1182 /*IsCombiner=*/true); 1183 llvm::Function *Initializer = nullptr; 1184 if (const Expr *Init = D->getInitializer()) { 1185 Initializer = emitCombinerOrInitializer( 1186 CGM, D->getType(), 1187 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1188 : nullptr, 1189 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1190 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1191 /*IsCombiner=*/false); 1192 } 1193 UDRMap.try_emplace(D, Combiner, Initializer); 1194 if (CGF) { 1195 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1196 Decls.second.push_back(D); 1197 } 1198 } 1199 1200 std::pair<llvm::Function *, llvm::Function *> 1201 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1202 auto I = UDRMap.find(D); 1203 if (I != UDRMap.end()) 1204 return I->second; 1205 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1206 return UDRMap.lookup(D); 1207 } 1208 1209 namespace { 1210 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1211 // Builder if one is present. 1212 struct PushAndPopStackRAII { 1213 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1214 bool HasCancel, llvm::omp::Directive Kind) 1215 : OMPBuilder(OMPBuilder) { 1216 if (!OMPBuilder) 1217 return; 1218 1219 // The following callback is the crucial part of clangs cleanup process. 1220 // 1221 // NOTE: 1222 // Once the OpenMPIRBuilder is used to create parallel regions (and 1223 // similar), the cancellation destination (Dest below) is determined via 1224 // IP. That means if we have variables to finalize we split the block at IP, 1225 // use the new block (=BB) as destination to build a JumpDest (via 1226 // getJumpDestInCurrentScope(BB)) which then is fed to 1227 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1228 // to push & pop an FinalizationInfo object. 1229 // The FiniCB will still be needed but at the point where the 1230 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1231 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1232 assert(IP.getBlock()->end() == IP.getPoint() && 1233 "Clang CG should cause non-terminated block!"); 1234 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1235 CGF.Builder.restoreIP(IP); 1236 CodeGenFunction::JumpDest Dest = 1237 CGF.getOMPCancelDestination(OMPD_parallel); 1238 CGF.EmitBranchThroughCleanup(Dest); 1239 }; 1240 1241 // TODO: Remove this once we emit parallel regions through the 1242 // OpenMPIRBuilder as it can do this setup internally. 1243 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1244 OMPBuilder->pushFinalizationCB(std::move(FI)); 1245 } 1246 ~PushAndPopStackRAII() { 1247 if (OMPBuilder) 1248 OMPBuilder->popFinalizationCB(); 1249 } 1250 llvm::OpenMPIRBuilder *OMPBuilder; 1251 }; 1252 } // namespace 1253 1254 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1255 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1256 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1257 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1258 assert(ThreadIDVar->getType()->isPointerType() && 1259 "thread id variable must be of type kmp_int32 *"); 1260 CodeGenFunction CGF(CGM, true); 1261 bool HasCancel = false; 1262 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1263 HasCancel = OPD->hasCancel(); 1264 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1265 HasCancel = OPD->hasCancel(); 1266 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1267 HasCancel = OPSD->hasCancel(); 1268 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1269 HasCancel = OPFD->hasCancel(); 1270 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1273 HasCancel = OPFD->hasCancel(); 1274 else if (const auto *OPFD = 1275 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1276 HasCancel = OPFD->hasCancel(); 1277 else if (const auto *OPFD = 1278 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1279 HasCancel = OPFD->hasCancel(); 1280 1281 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1282 // parallel region to make cancellation barriers work properly. 1283 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1284 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1285 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1286 HasCancel, OutlinedHelperName); 1287 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1288 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1289 } 1290 1291 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1292 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1294 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1295 return emitParallelOrTeamsOutlinedFunction( 1296 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1297 } 1298 1299 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1300 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1301 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1302 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1303 return emitParallelOrTeamsOutlinedFunction( 1304 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1305 } 1306 1307 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1308 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1309 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1310 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1311 bool Tied, unsigned &NumberOfParts) { 1312 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1313 PrePostActionTy &) { 1314 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1315 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1316 llvm::Value *TaskArgs[] = { 1317 UpLoc, ThreadID, 1318 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1319 TaskTVar->getType()->castAs<PointerType>()) 1320 .getPointer(CGF)}; 1321 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1322 CGM.getModule(), OMPRTL___kmpc_omp_task), 1323 TaskArgs); 1324 }; 1325 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1326 UntiedCodeGen); 1327 CodeGen.setAction(Action); 1328 assert(!ThreadIDVar->getType()->isPointerType() && 1329 "thread id variable must be of type kmp_int32 for tasks"); 1330 const OpenMPDirectiveKind Region = 1331 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1332 : OMPD_task; 1333 const CapturedStmt *CS = D.getCapturedStmt(Region); 1334 bool HasCancel = false; 1335 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1336 HasCancel = TD->hasCancel(); 1337 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1338 HasCancel = TD->hasCancel(); 1339 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1340 HasCancel = TD->hasCancel(); 1341 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1342 HasCancel = TD->hasCancel(); 1343 1344 CodeGenFunction CGF(CGM, true); 1345 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1346 InnermostKind, HasCancel, Action); 1347 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1348 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1349 if (!Tied) 1350 NumberOfParts = Action.getNumberOfParts(); 1351 return Res; 1352 } 1353 1354 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1355 const RecordDecl *RD, const CGRecordLayout &RL, 1356 ArrayRef<llvm::Constant *> Data) { 1357 llvm::StructType *StructTy = RL.getLLVMType(); 1358 unsigned PrevIdx = 0; 1359 ConstantInitBuilder CIBuilder(CGM); 1360 auto DI = Data.begin(); 1361 for (const FieldDecl *FD : RD->fields()) { 1362 unsigned Idx = RL.getLLVMFieldNo(FD); 1363 // Fill the alignment. 1364 for (unsigned I = PrevIdx; I < Idx; ++I) 1365 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1366 PrevIdx = Idx + 1; 1367 Fields.add(*DI); 1368 ++DI; 1369 } 1370 } 1371 1372 template <class... As> 1373 static llvm::GlobalVariable * 1374 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1375 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1376 As &&... Args) { 1377 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1378 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1379 ConstantInitBuilder CIBuilder(CGM); 1380 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1381 buildStructValue(Fields, CGM, RD, RL, Data); 1382 return Fields.finishAndCreateGlobal( 1383 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1384 std::forward<As>(Args)...); 1385 } 1386 1387 template <typename T> 1388 static void 1389 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1390 ArrayRef<llvm::Constant *> Data, 1391 T &Parent) { 1392 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1393 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1394 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1395 buildStructValue(Fields, CGM, RD, RL, Data); 1396 Fields.finishAndAddTo(Parent); 1397 } 1398 1399 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1400 bool AtCurrentPoint) { 1401 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1402 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1403 1404 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1405 if (AtCurrentPoint) { 1406 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1407 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1408 } else { 1409 Elem.second.ServiceInsertPt = 1410 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1411 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1412 } 1413 } 1414 1415 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1416 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1417 if (Elem.second.ServiceInsertPt) { 1418 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1419 Elem.second.ServiceInsertPt = nullptr; 1420 Ptr->eraseFromParent(); 1421 } 1422 } 1423 1424 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1425 SourceLocation Loc, 1426 SmallString<128> &Buffer) { 1427 llvm::raw_svector_ostream OS(Buffer); 1428 // Build debug location 1429 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1430 OS << ";" << PLoc.getFilename() << ";"; 1431 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1432 OS << FD->getQualifiedNameAsString(); 1433 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1434 return OS.str(); 1435 } 1436 1437 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1438 SourceLocation Loc, 1439 unsigned Flags) { 1440 llvm::Constant *SrcLocStr; 1441 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1442 Loc.isInvalid()) { 1443 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1444 } else { 1445 std::string FunctionName = ""; 1446 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1447 FunctionName = FD->getQualifiedNameAsString(); 1448 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1449 const char *FileName = PLoc.getFilename(); 1450 unsigned Line = PLoc.getLine(); 1451 unsigned Column = PLoc.getColumn(); 1452 SrcLocStr = 1453 OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column); 1454 } 1455 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1456 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1457 Reserved2Flags); 1458 } 1459 1460 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1461 SourceLocation Loc) { 1462 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1463 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1464 // the clang invariants used below might be broken. 1465 if (CGM.getLangOpts().OpenMPIRBuilder) { 1466 SmallString<128> Buffer; 1467 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1468 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1469 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1470 return OMPBuilder.getOrCreateThreadID( 1471 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1472 } 1473 1474 llvm::Value *ThreadID = nullptr; 1475 // Check whether we've already cached a load of the thread id in this 1476 // function. 1477 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1478 if (I != OpenMPLocThreadIDMap.end()) { 1479 ThreadID = I->second.ThreadID; 1480 if (ThreadID != nullptr) 1481 return ThreadID; 1482 } 1483 // If exceptions are enabled, do not use parameter to avoid possible crash. 1484 if (auto *OMPRegionInfo = 1485 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1486 if (OMPRegionInfo->getThreadIDVariable()) { 1487 // Check if this an outlined function with thread id passed as argument. 1488 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1489 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1490 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1491 !CGF.getLangOpts().CXXExceptions || 1492 CGF.Builder.GetInsertBlock() == TopBlock || 1493 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1494 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1495 TopBlock || 1496 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1497 CGF.Builder.GetInsertBlock()) { 1498 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1499 // If value loaded in entry block, cache it and use it everywhere in 1500 // function. 1501 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1502 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1503 Elem.second.ThreadID = ThreadID; 1504 } 1505 return ThreadID; 1506 } 1507 } 1508 } 1509 1510 // This is not an outlined function region - need to call __kmpc_int32 1511 // kmpc_global_thread_num(ident_t *loc). 1512 // Generate thread id value and cache this value for use across the 1513 // function. 1514 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1515 if (!Elem.second.ServiceInsertPt) 1516 setLocThreadIdInsertPt(CGF); 1517 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1518 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1519 llvm::CallInst *Call = CGF.Builder.CreateCall( 1520 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1521 OMPRTL___kmpc_global_thread_num), 1522 emitUpdateLocation(CGF, Loc)); 1523 Call->setCallingConv(CGF.getRuntimeCC()); 1524 Elem.second.ThreadID = Call; 1525 return Call; 1526 } 1527 1528 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1529 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1530 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1531 clearLocThreadIdInsertPt(CGF); 1532 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1533 } 1534 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1535 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1536 UDRMap.erase(D); 1537 FunctionUDRMap.erase(CGF.CurFn); 1538 } 1539 auto I = FunctionUDMMap.find(CGF.CurFn); 1540 if (I != FunctionUDMMap.end()) { 1541 for(const auto *D : I->second) 1542 UDMMap.erase(D); 1543 FunctionUDMMap.erase(I); 1544 } 1545 LastprivateConditionalToTypes.erase(CGF.CurFn); 1546 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1547 } 1548 1549 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1550 return OMPBuilder.IdentPtr; 1551 } 1552 1553 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1554 if (!Kmpc_MicroTy) { 1555 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1556 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1557 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1558 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1559 } 1560 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1561 } 1562 1563 llvm::FunctionCallee 1564 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1565 bool IsGPUDistribute) { 1566 assert((IVSize == 32 || IVSize == 64) && 1567 "IV size is not compatible with the omp runtime"); 1568 StringRef Name; 1569 if (IsGPUDistribute) 1570 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1571 : "__kmpc_distribute_static_init_4u") 1572 : (IVSigned ? "__kmpc_distribute_static_init_8" 1573 : "__kmpc_distribute_static_init_8u"); 1574 else 1575 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1576 : "__kmpc_for_static_init_4u") 1577 : (IVSigned ? "__kmpc_for_static_init_8" 1578 : "__kmpc_for_static_init_8u"); 1579 1580 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1581 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1582 llvm::Type *TypeParams[] = { 1583 getIdentTyPointerTy(), // loc 1584 CGM.Int32Ty, // tid 1585 CGM.Int32Ty, // schedtype 1586 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1587 PtrTy, // p_lower 1588 PtrTy, // p_upper 1589 PtrTy, // p_stride 1590 ITy, // incr 1591 ITy // chunk 1592 }; 1593 auto *FnTy = 1594 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1595 return CGM.CreateRuntimeFunction(FnTy, Name); 1596 } 1597 1598 llvm::FunctionCallee 1599 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1600 assert((IVSize == 32 || IVSize == 64) && 1601 "IV size is not compatible with the omp runtime"); 1602 StringRef Name = 1603 IVSize == 32 1604 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1605 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1606 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1607 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1608 CGM.Int32Ty, // tid 1609 CGM.Int32Ty, // schedtype 1610 ITy, // lower 1611 ITy, // upper 1612 ITy, // stride 1613 ITy // chunk 1614 }; 1615 auto *FnTy = 1616 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1617 return CGM.CreateRuntimeFunction(FnTy, Name); 1618 } 1619 1620 llvm::FunctionCallee 1621 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1622 assert((IVSize == 32 || IVSize == 64) && 1623 "IV size is not compatible with the omp runtime"); 1624 StringRef Name = 1625 IVSize == 32 1626 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1627 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1628 llvm::Type *TypeParams[] = { 1629 getIdentTyPointerTy(), // loc 1630 CGM.Int32Ty, // tid 1631 }; 1632 auto *FnTy = 1633 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1634 return CGM.CreateRuntimeFunction(FnTy, Name); 1635 } 1636 1637 llvm::FunctionCallee 1638 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1639 assert((IVSize == 32 || IVSize == 64) && 1640 "IV size is not compatible with the omp runtime"); 1641 StringRef Name = 1642 IVSize == 32 1643 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1644 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1645 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1646 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1647 llvm::Type *TypeParams[] = { 1648 getIdentTyPointerTy(), // loc 1649 CGM.Int32Ty, // tid 1650 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1651 PtrTy, // p_lower 1652 PtrTy, // p_upper 1653 PtrTy // p_stride 1654 }; 1655 auto *FnTy = 1656 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1657 return CGM.CreateRuntimeFunction(FnTy, Name); 1658 } 1659 1660 /// Obtain information that uniquely identifies a target entry. This 1661 /// consists of the file and device IDs as well as line number associated with 1662 /// the relevant entry source location. 1663 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1664 unsigned &DeviceID, unsigned &FileID, 1665 unsigned &LineNum) { 1666 SourceManager &SM = C.getSourceManager(); 1667 1668 // The loc should be always valid and have a file ID (the user cannot use 1669 // #pragma directives in macros) 1670 1671 assert(Loc.isValid() && "Source location is expected to be always valid."); 1672 1673 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1674 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1675 1676 llvm::sys::fs::UniqueID ID; 1677 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1678 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1679 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1680 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1681 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1682 << PLoc.getFilename() << EC.message(); 1683 } 1684 1685 DeviceID = ID.getDevice(); 1686 FileID = ID.getFile(); 1687 LineNum = PLoc.getLine(); 1688 } 1689 1690 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1691 if (CGM.getLangOpts().OpenMPSimd) 1692 return Address::invalid(); 1693 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1694 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1695 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1696 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1697 HasRequiresUnifiedSharedMemory))) { 1698 SmallString<64> PtrName; 1699 { 1700 llvm::raw_svector_ostream OS(PtrName); 1701 OS << CGM.getMangledName(GlobalDecl(VD)); 1702 if (!VD->isExternallyVisible()) { 1703 unsigned DeviceID, FileID, Line; 1704 getTargetEntryUniqueInfo(CGM.getContext(), 1705 VD->getCanonicalDecl()->getBeginLoc(), 1706 DeviceID, FileID, Line); 1707 OS << llvm::format("_%x", FileID); 1708 } 1709 OS << "_decl_tgt_ref_ptr"; 1710 } 1711 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1712 if (!Ptr) { 1713 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1714 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1715 PtrName); 1716 1717 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1718 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1719 1720 if (!CGM.getLangOpts().OpenMPIsDevice) 1721 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1722 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1723 } 1724 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1725 } 1726 return Address::invalid(); 1727 } 1728 1729 llvm::Constant * 1730 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1731 assert(!CGM.getLangOpts().OpenMPUseTLS || 1732 !CGM.getContext().getTargetInfo().isTLSSupported()); 1733 // Lookup the entry, lazily creating it if necessary. 1734 std::string Suffix = getName({"cache", ""}); 1735 return getOrCreateInternalVariable( 1736 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1737 } 1738 1739 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1740 const VarDecl *VD, 1741 Address VDAddr, 1742 SourceLocation Loc) { 1743 if (CGM.getLangOpts().OpenMPUseTLS && 1744 CGM.getContext().getTargetInfo().isTLSSupported()) 1745 return VDAddr; 1746 1747 llvm::Type *VarTy = VDAddr.getElementType(); 1748 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1749 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1750 CGM.Int8PtrTy), 1751 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1752 getOrCreateThreadPrivateCache(VD)}; 1753 return Address(CGF.EmitRuntimeCall( 1754 OMPBuilder.getOrCreateRuntimeFunction( 1755 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1756 Args), 1757 VDAddr.getAlignment()); 1758 } 1759 1760 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1761 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1762 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1763 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1764 // library. 1765 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1766 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1767 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1768 OMPLoc); 1769 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1770 // to register constructor/destructor for variable. 1771 llvm::Value *Args[] = { 1772 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1773 Ctor, CopyCtor, Dtor}; 1774 CGF.EmitRuntimeCall( 1775 OMPBuilder.getOrCreateRuntimeFunction( 1776 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1777 Args); 1778 } 1779 1780 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1781 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1782 bool PerformInit, CodeGenFunction *CGF) { 1783 if (CGM.getLangOpts().OpenMPUseTLS && 1784 CGM.getContext().getTargetInfo().isTLSSupported()) 1785 return nullptr; 1786 1787 VD = VD->getDefinition(CGM.getContext()); 1788 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1789 QualType ASTTy = VD->getType(); 1790 1791 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1792 const Expr *Init = VD->getAnyInitializer(); 1793 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1794 // Generate function that re-emits the declaration's initializer into the 1795 // threadprivate copy of the variable VD 1796 CodeGenFunction CtorCGF(CGM); 1797 FunctionArgList Args; 1798 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1799 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1800 ImplicitParamDecl::Other); 1801 Args.push_back(&Dst); 1802 1803 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1804 CGM.getContext().VoidPtrTy, Args); 1805 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1806 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1807 llvm::Function *Fn = 1808 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1809 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1810 Args, Loc, Loc); 1811 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1812 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1813 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1814 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1815 Arg = CtorCGF.Builder.CreateElementBitCast( 1816 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1817 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1818 /*IsInitializer=*/true); 1819 ArgVal = CtorCGF.EmitLoadOfScalar( 1820 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1821 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1822 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1823 CtorCGF.FinishFunction(); 1824 Ctor = Fn; 1825 } 1826 if (VD->getType().isDestructedType() != QualType::DK_none) { 1827 // Generate function that emits destructor call for the threadprivate copy 1828 // of the variable VD 1829 CodeGenFunction DtorCGF(CGM); 1830 FunctionArgList Args; 1831 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1832 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1833 ImplicitParamDecl::Other); 1834 Args.push_back(&Dst); 1835 1836 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1837 CGM.getContext().VoidTy, Args); 1838 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1839 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1840 llvm::Function *Fn = 1841 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1842 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1843 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1844 Loc, Loc); 1845 // Create a scope with an artificial location for the body of this function. 1846 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1847 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1848 DtorCGF.GetAddrOfLocalVar(&Dst), 1849 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1850 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1851 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1852 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1853 DtorCGF.FinishFunction(); 1854 Dtor = Fn; 1855 } 1856 // Do not emit init function if it is not required. 1857 if (!Ctor && !Dtor) 1858 return nullptr; 1859 1860 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1861 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1862 /*isVarArg=*/false) 1863 ->getPointerTo(); 1864 // Copying constructor for the threadprivate variable. 1865 // Must be NULL - reserved by runtime, but currently it requires that this 1866 // parameter is always NULL. Otherwise it fires assertion. 1867 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1868 if (Ctor == nullptr) { 1869 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1870 /*isVarArg=*/false) 1871 ->getPointerTo(); 1872 Ctor = llvm::Constant::getNullValue(CtorTy); 1873 } 1874 if (Dtor == nullptr) { 1875 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1876 /*isVarArg=*/false) 1877 ->getPointerTo(); 1878 Dtor = llvm::Constant::getNullValue(DtorTy); 1879 } 1880 if (!CGF) { 1881 auto *InitFunctionTy = 1882 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1883 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1884 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1885 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1886 CodeGenFunction InitCGF(CGM); 1887 FunctionArgList ArgList; 1888 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1889 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1890 Loc, Loc); 1891 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1892 InitCGF.FinishFunction(); 1893 return InitFunction; 1894 } 1895 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1896 } 1897 return nullptr; 1898 } 1899 1900 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1901 llvm::GlobalVariable *Addr, 1902 bool PerformInit) { 1903 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1904 !CGM.getLangOpts().OpenMPIsDevice) 1905 return false; 1906 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1907 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1908 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1909 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1910 HasRequiresUnifiedSharedMemory)) 1911 return CGM.getLangOpts().OpenMPIsDevice; 1912 VD = VD->getDefinition(CGM.getContext()); 1913 assert(VD && "Unknown VarDecl"); 1914 1915 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1916 return CGM.getLangOpts().OpenMPIsDevice; 1917 1918 QualType ASTTy = VD->getType(); 1919 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1920 1921 // Produce the unique prefix to identify the new target regions. We use 1922 // the source location of the variable declaration which we know to not 1923 // conflict with any target region. 1924 unsigned DeviceID; 1925 unsigned FileID; 1926 unsigned Line; 1927 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1928 SmallString<128> Buffer, Out; 1929 { 1930 llvm::raw_svector_ostream OS(Buffer); 1931 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1932 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1933 } 1934 1935 const Expr *Init = VD->getAnyInitializer(); 1936 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1937 llvm::Constant *Ctor; 1938 llvm::Constant *ID; 1939 if (CGM.getLangOpts().OpenMPIsDevice) { 1940 // Generate function that re-emits the declaration's initializer into 1941 // the threadprivate copy of the variable VD 1942 CodeGenFunction CtorCGF(CGM); 1943 1944 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1945 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1946 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1947 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1948 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1949 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1950 FunctionArgList(), Loc, Loc); 1951 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1952 CtorCGF.EmitAnyExprToMem(Init, 1953 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1954 Init->getType().getQualifiers(), 1955 /*IsInitializer=*/true); 1956 CtorCGF.FinishFunction(); 1957 Ctor = Fn; 1958 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1959 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1960 } else { 1961 Ctor = new llvm::GlobalVariable( 1962 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1963 llvm::GlobalValue::PrivateLinkage, 1964 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1965 ID = Ctor; 1966 } 1967 1968 // Register the information for the entry associated with the constructor. 1969 Out.clear(); 1970 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1971 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1972 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1973 } 1974 if (VD->getType().isDestructedType() != QualType::DK_none) { 1975 llvm::Constant *Dtor; 1976 llvm::Constant *ID; 1977 if (CGM.getLangOpts().OpenMPIsDevice) { 1978 // Generate function that emits destructor call for the threadprivate 1979 // copy of the variable VD 1980 CodeGenFunction DtorCGF(CGM); 1981 1982 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1983 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1984 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1985 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1986 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1987 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1988 FunctionArgList(), Loc, Loc); 1989 // Create a scope with an artificial location for the body of this 1990 // function. 1991 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1992 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1993 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1994 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1995 DtorCGF.FinishFunction(); 1996 Dtor = Fn; 1997 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1998 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1999 } else { 2000 Dtor = new llvm::GlobalVariable( 2001 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2002 llvm::GlobalValue::PrivateLinkage, 2003 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2004 ID = Dtor; 2005 } 2006 // Register the information for the entry associated with the destructor. 2007 Out.clear(); 2008 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2009 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2010 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2011 } 2012 return CGM.getLangOpts().OpenMPIsDevice; 2013 } 2014 2015 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2016 QualType VarType, 2017 StringRef Name) { 2018 std::string Suffix = getName({"artificial", ""}); 2019 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2020 llvm::Value *GAddr = 2021 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2022 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2023 CGM.getTarget().isTLSSupported()) { 2024 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2025 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2026 } 2027 std::string CacheSuffix = getName({"cache", ""}); 2028 llvm::Value *Args[] = { 2029 emitUpdateLocation(CGF, SourceLocation()), 2030 getThreadID(CGF, SourceLocation()), 2031 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2032 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2033 /*isSigned=*/false), 2034 getOrCreateInternalVariable( 2035 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2036 return Address( 2037 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2038 CGF.EmitRuntimeCall( 2039 OMPBuilder.getOrCreateRuntimeFunction( 2040 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2041 Args), 2042 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2043 CGM.getContext().getTypeAlignInChars(VarType)); 2044 } 2045 2046 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2047 const RegionCodeGenTy &ThenGen, 2048 const RegionCodeGenTy &ElseGen) { 2049 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2050 2051 // If the condition constant folds and can be elided, try to avoid emitting 2052 // the condition and the dead arm of the if/else. 2053 bool CondConstant; 2054 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2055 if (CondConstant) 2056 ThenGen(CGF); 2057 else 2058 ElseGen(CGF); 2059 return; 2060 } 2061 2062 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2063 // emit the conditional branch. 2064 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2065 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2066 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2067 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2068 2069 // Emit the 'then' code. 2070 CGF.EmitBlock(ThenBlock); 2071 ThenGen(CGF); 2072 CGF.EmitBranch(ContBlock); 2073 // Emit the 'else' code if present. 2074 // There is no need to emit line number for unconditional branch. 2075 (void)ApplyDebugLocation::CreateEmpty(CGF); 2076 CGF.EmitBlock(ElseBlock); 2077 ElseGen(CGF); 2078 // There is no need to emit line number for unconditional branch. 2079 (void)ApplyDebugLocation::CreateEmpty(CGF); 2080 CGF.EmitBranch(ContBlock); 2081 // Emit the continuation block for code after the if. 2082 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2083 } 2084 2085 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2086 llvm::Function *OutlinedFn, 2087 ArrayRef<llvm::Value *> CapturedVars, 2088 const Expr *IfCond) { 2089 if (!CGF.HaveInsertPoint()) 2090 return; 2091 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2092 auto &M = CGM.getModule(); 2093 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2094 this](CodeGenFunction &CGF, PrePostActionTy &) { 2095 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2096 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2097 llvm::Value *Args[] = { 2098 RTLoc, 2099 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2100 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2101 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2102 RealArgs.append(std::begin(Args), std::end(Args)); 2103 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2104 2105 llvm::FunctionCallee RTLFn = 2106 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2107 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2108 }; 2109 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2110 this](CodeGenFunction &CGF, PrePostActionTy &) { 2111 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2112 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2113 // Build calls: 2114 // __kmpc_serialized_parallel(&Loc, GTid); 2115 llvm::Value *Args[] = {RTLoc, ThreadID}; 2116 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2117 M, OMPRTL___kmpc_serialized_parallel), 2118 Args); 2119 2120 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2121 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2122 Address ZeroAddrBound = 2123 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2124 /*Name=*/".bound.zero.addr"); 2125 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2126 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2127 // ThreadId for serialized parallels is 0. 2128 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2129 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2130 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2131 2132 // Ensure we do not inline the function. This is trivially true for the ones 2133 // passed to __kmpc_fork_call but the ones called in serialized regions 2134 // could be inlined. This is not a perfect but it is closer to the invariant 2135 // we want, namely, every data environment starts with a new function. 2136 // TODO: We should pass the if condition to the runtime function and do the 2137 // handling there. Much cleaner code. 2138 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2139 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2140 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2141 2142 // __kmpc_end_serialized_parallel(&Loc, GTid); 2143 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2144 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2145 M, OMPRTL___kmpc_end_serialized_parallel), 2146 EndArgs); 2147 }; 2148 if (IfCond) { 2149 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2150 } else { 2151 RegionCodeGenTy ThenRCG(ThenGen); 2152 ThenRCG(CGF); 2153 } 2154 } 2155 2156 // If we're inside an (outlined) parallel region, use the region info's 2157 // thread-ID variable (it is passed in a first argument of the outlined function 2158 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2159 // regular serial code region, get thread ID by calling kmp_int32 2160 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2161 // return the address of that temp. 2162 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2163 SourceLocation Loc) { 2164 if (auto *OMPRegionInfo = 2165 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2166 if (OMPRegionInfo->getThreadIDVariable()) 2167 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2168 2169 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2170 QualType Int32Ty = 2171 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2172 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2173 CGF.EmitStoreOfScalar(ThreadID, 2174 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2175 2176 return ThreadIDTemp; 2177 } 2178 2179 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2180 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2181 SmallString<256> Buffer; 2182 llvm::raw_svector_ostream Out(Buffer); 2183 Out << Name; 2184 StringRef RuntimeName = Out.str(); 2185 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2186 if (Elem.second) { 2187 assert(Elem.second->getType()->getPointerElementType() == Ty && 2188 "OMP internal variable has different type than requested"); 2189 return &*Elem.second; 2190 } 2191 2192 return Elem.second = new llvm::GlobalVariable( 2193 CGM.getModule(), Ty, /*IsConstant*/ false, 2194 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2195 Elem.first(), /*InsertBefore=*/nullptr, 2196 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2197 } 2198 2199 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2200 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2201 std::string Name = getName({Prefix, "var"}); 2202 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2203 } 2204 2205 namespace { 2206 /// Common pre(post)-action for different OpenMP constructs. 2207 class CommonActionTy final : public PrePostActionTy { 2208 llvm::FunctionCallee EnterCallee; 2209 ArrayRef<llvm::Value *> EnterArgs; 2210 llvm::FunctionCallee ExitCallee; 2211 ArrayRef<llvm::Value *> ExitArgs; 2212 bool Conditional; 2213 llvm::BasicBlock *ContBlock = nullptr; 2214 2215 public: 2216 CommonActionTy(llvm::FunctionCallee EnterCallee, 2217 ArrayRef<llvm::Value *> EnterArgs, 2218 llvm::FunctionCallee ExitCallee, 2219 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2220 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2221 ExitArgs(ExitArgs), Conditional(Conditional) {} 2222 void Enter(CodeGenFunction &CGF) override { 2223 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2224 if (Conditional) { 2225 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2226 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2227 ContBlock = CGF.createBasicBlock("omp_if.end"); 2228 // Generate the branch (If-stmt) 2229 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2230 CGF.EmitBlock(ThenBlock); 2231 } 2232 } 2233 void Done(CodeGenFunction &CGF) { 2234 // Emit the rest of blocks/branches 2235 CGF.EmitBranch(ContBlock); 2236 CGF.EmitBlock(ContBlock, true); 2237 } 2238 void Exit(CodeGenFunction &CGF) override { 2239 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2240 } 2241 }; 2242 } // anonymous namespace 2243 2244 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2245 StringRef CriticalName, 2246 const RegionCodeGenTy &CriticalOpGen, 2247 SourceLocation Loc, const Expr *Hint) { 2248 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2249 // CriticalOpGen(); 2250 // __kmpc_end_critical(ident_t *, gtid, Lock); 2251 // Prepare arguments and build a call to __kmpc_critical 2252 if (!CGF.HaveInsertPoint()) 2253 return; 2254 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2255 getCriticalRegionLock(CriticalName)}; 2256 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2257 std::end(Args)); 2258 if (Hint) { 2259 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2260 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2261 } 2262 CommonActionTy Action( 2263 OMPBuilder.getOrCreateRuntimeFunction( 2264 CGM.getModule(), 2265 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2266 EnterArgs, 2267 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2268 OMPRTL___kmpc_end_critical), 2269 Args); 2270 CriticalOpGen.setAction(Action); 2271 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2272 } 2273 2274 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2275 const RegionCodeGenTy &MasterOpGen, 2276 SourceLocation Loc) { 2277 if (!CGF.HaveInsertPoint()) 2278 return; 2279 // if(__kmpc_master(ident_t *, gtid)) { 2280 // MasterOpGen(); 2281 // __kmpc_end_master(ident_t *, gtid); 2282 // } 2283 // Prepare arguments and build a call to __kmpc_master 2284 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2285 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2286 CGM.getModule(), OMPRTL___kmpc_master), 2287 Args, 2288 OMPBuilder.getOrCreateRuntimeFunction( 2289 CGM.getModule(), OMPRTL___kmpc_end_master), 2290 Args, 2291 /*Conditional=*/true); 2292 MasterOpGen.setAction(Action); 2293 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2294 Action.Done(CGF); 2295 } 2296 2297 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2298 const RegionCodeGenTy &MaskedOpGen, 2299 SourceLocation Loc, const Expr *Filter) { 2300 if (!CGF.HaveInsertPoint()) 2301 return; 2302 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2303 // MaskedOpGen(); 2304 // __kmpc_end_masked(iden_t *, gtid); 2305 // } 2306 // Prepare arguments and build a call to __kmpc_masked 2307 llvm::Value *FilterVal = Filter 2308 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2309 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2310 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2311 FilterVal}; 2312 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2313 getThreadID(CGF, Loc)}; 2314 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2315 CGM.getModule(), OMPRTL___kmpc_masked), 2316 Args, 2317 OMPBuilder.getOrCreateRuntimeFunction( 2318 CGM.getModule(), OMPRTL___kmpc_end_masked), 2319 ArgsEnd, 2320 /*Conditional=*/true); 2321 MaskedOpGen.setAction(Action); 2322 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2323 Action.Done(CGF); 2324 } 2325 2326 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2327 SourceLocation Loc) { 2328 if (!CGF.HaveInsertPoint()) 2329 return; 2330 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2331 OMPBuilder.createTaskyield(CGF.Builder); 2332 } else { 2333 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2334 llvm::Value *Args[] = { 2335 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2336 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2337 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2338 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2339 Args); 2340 } 2341 2342 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2343 Region->emitUntiedSwitch(CGF); 2344 } 2345 2346 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2347 const RegionCodeGenTy &TaskgroupOpGen, 2348 SourceLocation Loc) { 2349 if (!CGF.HaveInsertPoint()) 2350 return; 2351 // __kmpc_taskgroup(ident_t *, gtid); 2352 // TaskgroupOpGen(); 2353 // __kmpc_end_taskgroup(ident_t *, gtid); 2354 // Prepare arguments and build a call to __kmpc_taskgroup 2355 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2356 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2357 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2358 Args, 2359 OMPBuilder.getOrCreateRuntimeFunction( 2360 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2361 Args); 2362 TaskgroupOpGen.setAction(Action); 2363 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2364 } 2365 2366 /// Given an array of pointers to variables, project the address of a 2367 /// given variable. 2368 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2369 unsigned Index, const VarDecl *Var) { 2370 // Pull out the pointer to the variable. 2371 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2372 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2373 2374 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2375 Addr = CGF.Builder.CreateElementBitCast( 2376 Addr, CGF.ConvertTypeForMem(Var->getType())); 2377 return Addr; 2378 } 2379 2380 static llvm::Value *emitCopyprivateCopyFunction( 2381 CodeGenModule &CGM, llvm::Type *ArgsType, 2382 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2383 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2384 SourceLocation Loc) { 2385 ASTContext &C = CGM.getContext(); 2386 // void copy_func(void *LHSArg, void *RHSArg); 2387 FunctionArgList Args; 2388 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2389 ImplicitParamDecl::Other); 2390 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2391 ImplicitParamDecl::Other); 2392 Args.push_back(&LHSArg); 2393 Args.push_back(&RHSArg); 2394 const auto &CGFI = 2395 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2396 std::string Name = 2397 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2398 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2399 llvm::GlobalValue::InternalLinkage, Name, 2400 &CGM.getModule()); 2401 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2402 Fn->setDoesNotRecurse(); 2403 CodeGenFunction CGF(CGM); 2404 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2405 // Dest = (void*[n])(LHSArg); 2406 // Src = (void*[n])(RHSArg); 2407 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2408 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2409 ArgsType), CGF.getPointerAlign()); 2410 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2411 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2412 ArgsType), CGF.getPointerAlign()); 2413 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2414 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2415 // ... 2416 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2417 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2418 const auto *DestVar = 2419 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2420 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2421 2422 const auto *SrcVar = 2423 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2424 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2425 2426 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2427 QualType Type = VD->getType(); 2428 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2429 } 2430 CGF.FinishFunction(); 2431 return Fn; 2432 } 2433 2434 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2435 const RegionCodeGenTy &SingleOpGen, 2436 SourceLocation Loc, 2437 ArrayRef<const Expr *> CopyprivateVars, 2438 ArrayRef<const Expr *> SrcExprs, 2439 ArrayRef<const Expr *> DstExprs, 2440 ArrayRef<const Expr *> AssignmentOps) { 2441 if (!CGF.HaveInsertPoint()) 2442 return; 2443 assert(CopyprivateVars.size() == SrcExprs.size() && 2444 CopyprivateVars.size() == DstExprs.size() && 2445 CopyprivateVars.size() == AssignmentOps.size()); 2446 ASTContext &C = CGM.getContext(); 2447 // int32 did_it = 0; 2448 // if(__kmpc_single(ident_t *, gtid)) { 2449 // SingleOpGen(); 2450 // __kmpc_end_single(ident_t *, gtid); 2451 // did_it = 1; 2452 // } 2453 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2454 // <copy_func>, did_it); 2455 2456 Address DidIt = Address::invalid(); 2457 if (!CopyprivateVars.empty()) { 2458 // int32 did_it = 0; 2459 QualType KmpInt32Ty = 2460 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2461 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2462 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2463 } 2464 // Prepare arguments and build a call to __kmpc_single 2465 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2466 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2467 CGM.getModule(), OMPRTL___kmpc_single), 2468 Args, 2469 OMPBuilder.getOrCreateRuntimeFunction( 2470 CGM.getModule(), OMPRTL___kmpc_end_single), 2471 Args, 2472 /*Conditional=*/true); 2473 SingleOpGen.setAction(Action); 2474 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2475 if (DidIt.isValid()) { 2476 // did_it = 1; 2477 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2478 } 2479 Action.Done(CGF); 2480 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2481 // <copy_func>, did_it); 2482 if (DidIt.isValid()) { 2483 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2484 QualType CopyprivateArrayTy = C.getConstantArrayType( 2485 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2486 /*IndexTypeQuals=*/0); 2487 // Create a list of all private variables for copyprivate. 2488 Address CopyprivateList = 2489 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2490 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2491 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2492 CGF.Builder.CreateStore( 2493 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2494 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2495 CGF.VoidPtrTy), 2496 Elem); 2497 } 2498 // Build function that copies private values from single region to all other 2499 // threads in the corresponding parallel region. 2500 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2501 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2502 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2503 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2504 Address CL = 2505 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2506 CGF.VoidPtrTy); 2507 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2508 llvm::Value *Args[] = { 2509 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2510 getThreadID(CGF, Loc), // i32 <gtid> 2511 BufSize, // size_t <buf_size> 2512 CL.getPointer(), // void *<copyprivate list> 2513 CpyFn, // void (*) (void *, void *) <copy_func> 2514 DidItVal // i32 did_it 2515 }; 2516 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2517 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2518 Args); 2519 } 2520 } 2521 2522 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2523 const RegionCodeGenTy &OrderedOpGen, 2524 SourceLocation Loc, bool IsThreads) { 2525 if (!CGF.HaveInsertPoint()) 2526 return; 2527 // __kmpc_ordered(ident_t *, gtid); 2528 // OrderedOpGen(); 2529 // __kmpc_end_ordered(ident_t *, gtid); 2530 // Prepare arguments and build a call to __kmpc_ordered 2531 if (IsThreads) { 2532 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2533 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2534 CGM.getModule(), OMPRTL___kmpc_ordered), 2535 Args, 2536 OMPBuilder.getOrCreateRuntimeFunction( 2537 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2538 Args); 2539 OrderedOpGen.setAction(Action); 2540 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2541 return; 2542 } 2543 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2544 } 2545 2546 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2547 unsigned Flags; 2548 if (Kind == OMPD_for) 2549 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2550 else if (Kind == OMPD_sections) 2551 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2552 else if (Kind == OMPD_single) 2553 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2554 else if (Kind == OMPD_barrier) 2555 Flags = OMP_IDENT_BARRIER_EXPL; 2556 else 2557 Flags = OMP_IDENT_BARRIER_IMPL; 2558 return Flags; 2559 } 2560 2561 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2562 CodeGenFunction &CGF, const OMPLoopDirective &S, 2563 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2564 // Check if the loop directive is actually a doacross loop directive. In this 2565 // case choose static, 1 schedule. 2566 if (llvm::any_of( 2567 S.getClausesOfKind<OMPOrderedClause>(), 2568 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2569 ScheduleKind = OMPC_SCHEDULE_static; 2570 // Chunk size is 1 in this case. 2571 llvm::APInt ChunkSize(32, 1); 2572 ChunkExpr = IntegerLiteral::Create( 2573 CGF.getContext(), ChunkSize, 2574 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2575 SourceLocation()); 2576 } 2577 } 2578 2579 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2580 OpenMPDirectiveKind Kind, bool EmitChecks, 2581 bool ForceSimpleCall) { 2582 // Check if we should use the OMPBuilder 2583 auto *OMPRegionInfo = 2584 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2585 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2586 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2587 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2588 return; 2589 } 2590 2591 if (!CGF.HaveInsertPoint()) 2592 return; 2593 // Build call __kmpc_cancel_barrier(loc, thread_id); 2594 // Build call __kmpc_barrier(loc, thread_id); 2595 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2596 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2597 // thread_id); 2598 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2599 getThreadID(CGF, Loc)}; 2600 if (OMPRegionInfo) { 2601 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2602 llvm::Value *Result = CGF.EmitRuntimeCall( 2603 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2604 OMPRTL___kmpc_cancel_barrier), 2605 Args); 2606 if (EmitChecks) { 2607 // if (__kmpc_cancel_barrier()) { 2608 // exit from construct; 2609 // } 2610 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2611 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2612 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2613 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2614 CGF.EmitBlock(ExitBB); 2615 // exit from construct; 2616 CodeGenFunction::JumpDest CancelDestination = 2617 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2618 CGF.EmitBranchThroughCleanup(CancelDestination); 2619 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2620 } 2621 return; 2622 } 2623 } 2624 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2625 CGM.getModule(), OMPRTL___kmpc_barrier), 2626 Args); 2627 } 2628 2629 /// Map the OpenMP loop schedule to the runtime enumeration. 2630 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2631 bool Chunked, bool Ordered) { 2632 switch (ScheduleKind) { 2633 case OMPC_SCHEDULE_static: 2634 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2635 : (Ordered ? OMP_ord_static : OMP_sch_static); 2636 case OMPC_SCHEDULE_dynamic: 2637 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2638 case OMPC_SCHEDULE_guided: 2639 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2640 case OMPC_SCHEDULE_runtime: 2641 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2642 case OMPC_SCHEDULE_auto: 2643 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2644 case OMPC_SCHEDULE_unknown: 2645 assert(!Chunked && "chunk was specified but schedule kind not known"); 2646 return Ordered ? OMP_ord_static : OMP_sch_static; 2647 } 2648 llvm_unreachable("Unexpected runtime schedule"); 2649 } 2650 2651 /// Map the OpenMP distribute schedule to the runtime enumeration. 2652 static OpenMPSchedType 2653 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2654 // only static is allowed for dist_schedule 2655 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2656 } 2657 2658 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2659 bool Chunked) const { 2660 OpenMPSchedType Schedule = 2661 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2662 return Schedule == OMP_sch_static; 2663 } 2664 2665 bool CGOpenMPRuntime::isStaticNonchunked( 2666 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2667 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2668 return Schedule == OMP_dist_sch_static; 2669 } 2670 2671 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2672 bool Chunked) const { 2673 OpenMPSchedType Schedule = 2674 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2675 return Schedule == OMP_sch_static_chunked; 2676 } 2677 2678 bool CGOpenMPRuntime::isStaticChunked( 2679 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2680 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2681 return Schedule == OMP_dist_sch_static_chunked; 2682 } 2683 2684 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2685 OpenMPSchedType Schedule = 2686 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2687 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2688 return Schedule != OMP_sch_static; 2689 } 2690 2691 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2692 OpenMPScheduleClauseModifier M1, 2693 OpenMPScheduleClauseModifier M2) { 2694 int Modifier = 0; 2695 switch (M1) { 2696 case OMPC_SCHEDULE_MODIFIER_monotonic: 2697 Modifier = OMP_sch_modifier_monotonic; 2698 break; 2699 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2700 Modifier = OMP_sch_modifier_nonmonotonic; 2701 break; 2702 case OMPC_SCHEDULE_MODIFIER_simd: 2703 if (Schedule == OMP_sch_static_chunked) 2704 Schedule = OMP_sch_static_balanced_chunked; 2705 break; 2706 case OMPC_SCHEDULE_MODIFIER_last: 2707 case OMPC_SCHEDULE_MODIFIER_unknown: 2708 break; 2709 } 2710 switch (M2) { 2711 case OMPC_SCHEDULE_MODIFIER_monotonic: 2712 Modifier = OMP_sch_modifier_monotonic; 2713 break; 2714 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2715 Modifier = OMP_sch_modifier_nonmonotonic; 2716 break; 2717 case OMPC_SCHEDULE_MODIFIER_simd: 2718 if (Schedule == OMP_sch_static_chunked) 2719 Schedule = OMP_sch_static_balanced_chunked; 2720 break; 2721 case OMPC_SCHEDULE_MODIFIER_last: 2722 case OMPC_SCHEDULE_MODIFIER_unknown: 2723 break; 2724 } 2725 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2726 // If the static schedule kind is specified or if the ordered clause is 2727 // specified, and if the nonmonotonic modifier is not specified, the effect is 2728 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2729 // modifier is specified, the effect is as if the nonmonotonic modifier is 2730 // specified. 2731 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2732 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2733 Schedule == OMP_sch_static_balanced_chunked || 2734 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2735 Schedule == OMP_dist_sch_static_chunked || 2736 Schedule == OMP_dist_sch_static)) 2737 Modifier = OMP_sch_modifier_nonmonotonic; 2738 } 2739 return Schedule | Modifier; 2740 } 2741 2742 void CGOpenMPRuntime::emitForDispatchInit( 2743 CodeGenFunction &CGF, SourceLocation Loc, 2744 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2745 bool Ordered, const DispatchRTInput &DispatchValues) { 2746 if (!CGF.HaveInsertPoint()) 2747 return; 2748 OpenMPSchedType Schedule = getRuntimeSchedule( 2749 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2750 assert(Ordered || 2751 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2752 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2753 Schedule != OMP_sch_static_balanced_chunked)); 2754 // Call __kmpc_dispatch_init( 2755 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2756 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2757 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2758 2759 // If the Chunk was not specified in the clause - use default value 1. 2760 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2761 : CGF.Builder.getIntN(IVSize, 1); 2762 llvm::Value *Args[] = { 2763 emitUpdateLocation(CGF, Loc), 2764 getThreadID(CGF, Loc), 2765 CGF.Builder.getInt32(addMonoNonMonoModifier( 2766 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2767 DispatchValues.LB, // Lower 2768 DispatchValues.UB, // Upper 2769 CGF.Builder.getIntN(IVSize, 1), // Stride 2770 Chunk // Chunk 2771 }; 2772 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2773 } 2774 2775 static void emitForStaticInitCall( 2776 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2777 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2778 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2779 const CGOpenMPRuntime::StaticRTInput &Values) { 2780 if (!CGF.HaveInsertPoint()) 2781 return; 2782 2783 assert(!Values.Ordered); 2784 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2785 Schedule == OMP_sch_static_balanced_chunked || 2786 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2787 Schedule == OMP_dist_sch_static || 2788 Schedule == OMP_dist_sch_static_chunked); 2789 2790 // Call __kmpc_for_static_init( 2791 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2792 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2793 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2794 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2795 llvm::Value *Chunk = Values.Chunk; 2796 if (Chunk == nullptr) { 2797 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2798 Schedule == OMP_dist_sch_static) && 2799 "expected static non-chunked schedule"); 2800 // If the Chunk was not specified in the clause - use default value 1. 2801 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2802 } else { 2803 assert((Schedule == OMP_sch_static_chunked || 2804 Schedule == OMP_sch_static_balanced_chunked || 2805 Schedule == OMP_ord_static_chunked || 2806 Schedule == OMP_dist_sch_static_chunked) && 2807 "expected static chunked schedule"); 2808 } 2809 llvm::Value *Args[] = { 2810 UpdateLocation, 2811 ThreadId, 2812 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2813 M2)), // Schedule type 2814 Values.IL.getPointer(), // &isLastIter 2815 Values.LB.getPointer(), // &LB 2816 Values.UB.getPointer(), // &UB 2817 Values.ST.getPointer(), // &Stride 2818 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2819 Chunk // Chunk 2820 }; 2821 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2822 } 2823 2824 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2825 SourceLocation Loc, 2826 OpenMPDirectiveKind DKind, 2827 const OpenMPScheduleTy &ScheduleKind, 2828 const StaticRTInput &Values) { 2829 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2830 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2831 assert(isOpenMPWorksharingDirective(DKind) && 2832 "Expected loop-based or sections-based directive."); 2833 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2834 isOpenMPLoopDirective(DKind) 2835 ? OMP_IDENT_WORK_LOOP 2836 : OMP_IDENT_WORK_SECTIONS); 2837 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2838 llvm::FunctionCallee StaticInitFunction = 2839 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2840 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2841 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2842 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2843 } 2844 2845 void CGOpenMPRuntime::emitDistributeStaticInit( 2846 CodeGenFunction &CGF, SourceLocation Loc, 2847 OpenMPDistScheduleClauseKind SchedKind, 2848 const CGOpenMPRuntime::StaticRTInput &Values) { 2849 OpenMPSchedType ScheduleNum = 2850 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2851 llvm::Value *UpdatedLocation = 2852 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2853 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2854 llvm::FunctionCallee StaticInitFunction; 2855 bool isGPUDistribute = 2856 CGM.getLangOpts().OpenMPIsDevice && 2857 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2858 StaticInitFunction = createForStaticInitFunction( 2859 Values.IVSize, Values.IVSigned, isGPUDistribute); 2860 2861 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2862 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2863 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2864 } 2865 2866 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2867 SourceLocation Loc, 2868 OpenMPDirectiveKind DKind) { 2869 if (!CGF.HaveInsertPoint()) 2870 return; 2871 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2872 llvm::Value *Args[] = { 2873 emitUpdateLocation(CGF, Loc, 2874 isOpenMPDistributeDirective(DKind) 2875 ? OMP_IDENT_WORK_DISTRIBUTE 2876 : isOpenMPLoopDirective(DKind) 2877 ? OMP_IDENT_WORK_LOOP 2878 : OMP_IDENT_WORK_SECTIONS), 2879 getThreadID(CGF, Loc)}; 2880 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2881 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2882 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2883 CGF.EmitRuntimeCall( 2884 OMPBuilder.getOrCreateRuntimeFunction( 2885 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2886 Args); 2887 else 2888 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2889 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2890 Args); 2891 } 2892 2893 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2894 SourceLocation Loc, 2895 unsigned IVSize, 2896 bool IVSigned) { 2897 if (!CGF.HaveInsertPoint()) 2898 return; 2899 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2900 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2901 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2902 } 2903 2904 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2905 SourceLocation Loc, unsigned IVSize, 2906 bool IVSigned, Address IL, 2907 Address LB, Address UB, 2908 Address ST) { 2909 // Call __kmpc_dispatch_next( 2910 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2911 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2912 // kmp_int[32|64] *p_stride); 2913 llvm::Value *Args[] = { 2914 emitUpdateLocation(CGF, Loc), 2915 getThreadID(CGF, Loc), 2916 IL.getPointer(), // &isLastIter 2917 LB.getPointer(), // &Lower 2918 UB.getPointer(), // &Upper 2919 ST.getPointer() // &Stride 2920 }; 2921 llvm::Value *Call = 2922 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2923 return CGF.EmitScalarConversion( 2924 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2925 CGF.getContext().BoolTy, Loc); 2926 } 2927 2928 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2929 llvm::Value *NumThreads, 2930 SourceLocation Loc) { 2931 if (!CGF.HaveInsertPoint()) 2932 return; 2933 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2934 llvm::Value *Args[] = { 2935 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2936 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2937 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2938 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2939 Args); 2940 } 2941 2942 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2943 ProcBindKind ProcBind, 2944 SourceLocation Loc) { 2945 if (!CGF.HaveInsertPoint()) 2946 return; 2947 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2948 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2949 llvm::Value *Args[] = { 2950 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2951 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2952 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2953 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2954 Args); 2955 } 2956 2957 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2958 SourceLocation Loc, llvm::AtomicOrdering AO) { 2959 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2960 OMPBuilder.createFlush(CGF.Builder); 2961 } else { 2962 if (!CGF.HaveInsertPoint()) 2963 return; 2964 // Build call void __kmpc_flush(ident_t *loc) 2965 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2966 CGM.getModule(), OMPRTL___kmpc_flush), 2967 emitUpdateLocation(CGF, Loc)); 2968 } 2969 } 2970 2971 namespace { 2972 /// Indexes of fields for type kmp_task_t. 2973 enum KmpTaskTFields { 2974 /// List of shared variables. 2975 KmpTaskTShareds, 2976 /// Task routine. 2977 KmpTaskTRoutine, 2978 /// Partition id for the untied tasks. 2979 KmpTaskTPartId, 2980 /// Function with call of destructors for private variables. 2981 Data1, 2982 /// Task priority. 2983 Data2, 2984 /// (Taskloops only) Lower bound. 2985 KmpTaskTLowerBound, 2986 /// (Taskloops only) Upper bound. 2987 KmpTaskTUpperBound, 2988 /// (Taskloops only) Stride. 2989 KmpTaskTStride, 2990 /// (Taskloops only) Is last iteration flag. 2991 KmpTaskTLastIter, 2992 /// (Taskloops only) Reduction data. 2993 KmpTaskTReductions, 2994 }; 2995 } // anonymous namespace 2996 2997 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2998 return OffloadEntriesTargetRegion.empty() && 2999 OffloadEntriesDeviceGlobalVar.empty(); 3000 } 3001 3002 /// Initialize target region entry. 3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3004 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3005 StringRef ParentName, unsigned LineNum, 3006 unsigned Order) { 3007 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3008 "only required for the device " 3009 "code generation."); 3010 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3011 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3012 OMPTargetRegionEntryTargetRegion); 3013 ++OffloadingEntriesNum; 3014 } 3015 3016 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3017 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3018 StringRef ParentName, unsigned LineNum, 3019 llvm::Constant *Addr, llvm::Constant *ID, 3020 OMPTargetRegionEntryKind Flags) { 3021 // If we are emitting code for a target, the entry is already initialized, 3022 // only has to be registered. 3023 if (CGM.getLangOpts().OpenMPIsDevice) { 3024 // This could happen if the device compilation is invoked standalone. 3025 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3026 return; 3027 auto &Entry = 3028 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3029 Entry.setAddress(Addr); 3030 Entry.setID(ID); 3031 Entry.setFlags(Flags); 3032 } else { 3033 if (Flags == 3034 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3035 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3036 /*IgnoreAddressId*/ true)) 3037 return; 3038 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3039 "Target region entry already registered!"); 3040 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3041 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3042 ++OffloadingEntriesNum; 3043 } 3044 } 3045 3046 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3047 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3048 bool IgnoreAddressId) const { 3049 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3050 if (PerDevice == OffloadEntriesTargetRegion.end()) 3051 return false; 3052 auto PerFile = PerDevice->second.find(FileID); 3053 if (PerFile == PerDevice->second.end()) 3054 return false; 3055 auto PerParentName = PerFile->second.find(ParentName); 3056 if (PerParentName == PerFile->second.end()) 3057 return false; 3058 auto PerLine = PerParentName->second.find(LineNum); 3059 if (PerLine == PerParentName->second.end()) 3060 return false; 3061 // Fail if this entry is already registered. 3062 if (!IgnoreAddressId && 3063 (PerLine->second.getAddress() || PerLine->second.getID())) 3064 return false; 3065 return true; 3066 } 3067 3068 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3069 const OffloadTargetRegionEntryInfoActTy &Action) { 3070 // Scan all target region entries and perform the provided action. 3071 for (const auto &D : OffloadEntriesTargetRegion) 3072 for (const auto &F : D.second) 3073 for (const auto &P : F.second) 3074 for (const auto &L : P.second) 3075 Action(D.first, F.first, P.first(), L.first, L.second); 3076 } 3077 3078 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3079 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3080 OMPTargetGlobalVarEntryKind Flags, 3081 unsigned Order) { 3082 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3083 "only required for the device " 3084 "code generation."); 3085 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3086 ++OffloadingEntriesNum; 3087 } 3088 3089 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3090 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3091 CharUnits VarSize, 3092 OMPTargetGlobalVarEntryKind Flags, 3093 llvm::GlobalValue::LinkageTypes Linkage) { 3094 if (CGM.getLangOpts().OpenMPIsDevice) { 3095 // This could happen if the device compilation is invoked standalone. 3096 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3097 return; 3098 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3099 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3100 if (Entry.getVarSize().isZero()) { 3101 Entry.setVarSize(VarSize); 3102 Entry.setLinkage(Linkage); 3103 } 3104 return; 3105 } 3106 Entry.setVarSize(VarSize); 3107 Entry.setLinkage(Linkage); 3108 Entry.setAddress(Addr); 3109 } else { 3110 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3111 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3112 assert(Entry.isValid() && Entry.getFlags() == Flags && 3113 "Entry not initialized!"); 3114 if (Entry.getVarSize().isZero()) { 3115 Entry.setVarSize(VarSize); 3116 Entry.setLinkage(Linkage); 3117 } 3118 return; 3119 } 3120 OffloadEntriesDeviceGlobalVar.try_emplace( 3121 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3122 ++OffloadingEntriesNum; 3123 } 3124 } 3125 3126 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3127 actOnDeviceGlobalVarEntriesInfo( 3128 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3129 // Scan all target region entries and perform the provided action. 3130 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3131 Action(E.getKey(), E.getValue()); 3132 } 3133 3134 void CGOpenMPRuntime::createOffloadEntry( 3135 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3136 llvm::GlobalValue::LinkageTypes Linkage) { 3137 StringRef Name = Addr->getName(); 3138 llvm::Module &M = CGM.getModule(); 3139 llvm::LLVMContext &C = M.getContext(); 3140 3141 // Create constant string with the name. 3142 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3143 3144 std::string StringName = getName({"omp_offloading", "entry_name"}); 3145 auto *Str = new llvm::GlobalVariable( 3146 M, StrPtrInit->getType(), /*isConstant=*/true, 3147 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3148 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3149 3150 llvm::Constant *Data[] = { 3151 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3152 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3153 llvm::ConstantInt::get(CGM.SizeTy, Size), 3154 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3155 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3156 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3157 llvm::GlobalVariable *Entry = createGlobalStruct( 3158 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3159 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3160 3161 // The entry has to be created in the section the linker expects it to be. 3162 Entry->setSection("omp_offloading_entries"); 3163 } 3164 3165 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3166 // Emit the offloading entries and metadata so that the device codegen side 3167 // can easily figure out what to emit. The produced metadata looks like 3168 // this: 3169 // 3170 // !omp_offload.info = !{!1, ...} 3171 // 3172 // Right now we only generate metadata for function that contain target 3173 // regions. 3174 3175 // If we are in simd mode or there are no entries, we don't need to do 3176 // anything. 3177 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3178 return; 3179 3180 llvm::Module &M = CGM.getModule(); 3181 llvm::LLVMContext &C = M.getContext(); 3182 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3183 SourceLocation, StringRef>, 3184 16> 3185 OrderedEntries(OffloadEntriesInfoManager.size()); 3186 llvm::SmallVector<StringRef, 16> ParentFunctions( 3187 OffloadEntriesInfoManager.size()); 3188 3189 // Auxiliary methods to create metadata values and strings. 3190 auto &&GetMDInt = [this](unsigned V) { 3191 return llvm::ConstantAsMetadata::get( 3192 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3193 }; 3194 3195 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3196 3197 // Create the offloading info metadata node. 3198 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3199 3200 // Create function that emits metadata for each target region entry; 3201 auto &&TargetRegionMetadataEmitter = 3202 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3203 &GetMDString]( 3204 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3205 unsigned Line, 3206 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3207 // Generate metadata for target regions. Each entry of this metadata 3208 // contains: 3209 // - Entry 0 -> Kind of this type of metadata (0). 3210 // - Entry 1 -> Device ID of the file where the entry was identified. 3211 // - Entry 2 -> File ID of the file where the entry was identified. 3212 // - Entry 3 -> Mangled name of the function where the entry was 3213 // identified. 3214 // - Entry 4 -> Line in the file where the entry was identified. 3215 // - Entry 5 -> Order the entry was created. 3216 // The first element of the metadata node is the kind. 3217 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3218 GetMDInt(FileID), GetMDString(ParentName), 3219 GetMDInt(Line), GetMDInt(E.getOrder())}; 3220 3221 SourceLocation Loc; 3222 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3223 E = CGM.getContext().getSourceManager().fileinfo_end(); 3224 I != E; ++I) { 3225 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3226 I->getFirst()->getUniqueID().getFile() == FileID) { 3227 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3228 I->getFirst(), Line, 1); 3229 break; 3230 } 3231 } 3232 // Save this entry in the right position of the ordered entries array. 3233 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3234 ParentFunctions[E.getOrder()] = ParentName; 3235 3236 // Add metadata to the named metadata node. 3237 MD->addOperand(llvm::MDNode::get(C, Ops)); 3238 }; 3239 3240 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3241 TargetRegionMetadataEmitter); 3242 3243 // Create function that emits metadata for each device global variable entry; 3244 auto &&DeviceGlobalVarMetadataEmitter = 3245 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3246 MD](StringRef MangledName, 3247 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3248 &E) { 3249 // Generate metadata for global variables. Each entry of this metadata 3250 // contains: 3251 // - Entry 0 -> Kind of this type of metadata (1). 3252 // - Entry 1 -> Mangled name of the variable. 3253 // - Entry 2 -> Declare target kind. 3254 // - Entry 3 -> Order the entry was created. 3255 // The first element of the metadata node is the kind. 3256 llvm::Metadata *Ops[] = { 3257 GetMDInt(E.getKind()), GetMDString(MangledName), 3258 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3259 3260 // Save this entry in the right position of the ordered entries array. 3261 OrderedEntries[E.getOrder()] = 3262 std::make_tuple(&E, SourceLocation(), MangledName); 3263 3264 // Add metadata to the named metadata node. 3265 MD->addOperand(llvm::MDNode::get(C, Ops)); 3266 }; 3267 3268 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3269 DeviceGlobalVarMetadataEmitter); 3270 3271 for (const auto &E : OrderedEntries) { 3272 assert(std::get<0>(E) && "All ordered entries must exist!"); 3273 if (const auto *CE = 3274 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3275 std::get<0>(E))) { 3276 if (!CE->getID() || !CE->getAddress()) { 3277 // Do not blame the entry if the parent funtion is not emitted. 3278 StringRef FnName = ParentFunctions[CE->getOrder()]; 3279 if (!CGM.GetGlobalValue(FnName)) 3280 continue; 3281 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3282 DiagnosticsEngine::Error, 3283 "Offloading entry for target region in %0 is incorrect: either the " 3284 "address or the ID is invalid."); 3285 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3286 continue; 3287 } 3288 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3289 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3290 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3291 OffloadEntryInfoDeviceGlobalVar>( 3292 std::get<0>(E))) { 3293 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3294 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3295 CE->getFlags()); 3296 switch (Flags) { 3297 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3298 if (CGM.getLangOpts().OpenMPIsDevice && 3299 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3300 continue; 3301 if (!CE->getAddress()) { 3302 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3303 DiagnosticsEngine::Error, "Offloading entry for declare target " 3304 "variable %0 is incorrect: the " 3305 "address is invalid."); 3306 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3307 continue; 3308 } 3309 // The vaiable has no definition - no need to add the entry. 3310 if (CE->getVarSize().isZero()) 3311 continue; 3312 break; 3313 } 3314 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3315 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3316 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3317 "Declaret target link address is set."); 3318 if (CGM.getLangOpts().OpenMPIsDevice) 3319 continue; 3320 if (!CE->getAddress()) { 3321 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3322 DiagnosticsEngine::Error, 3323 "Offloading entry for declare target variable is incorrect: the " 3324 "address is invalid."); 3325 CGM.getDiags().Report(DiagID); 3326 continue; 3327 } 3328 break; 3329 } 3330 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3331 CE->getVarSize().getQuantity(), Flags, 3332 CE->getLinkage()); 3333 } else { 3334 llvm_unreachable("Unsupported entry kind."); 3335 } 3336 } 3337 } 3338 3339 /// Loads all the offload entries information from the host IR 3340 /// metadata. 3341 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3342 // If we are in target mode, load the metadata from the host IR. This code has 3343 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3344 3345 if (!CGM.getLangOpts().OpenMPIsDevice) 3346 return; 3347 3348 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3349 return; 3350 3351 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3352 if (auto EC = Buf.getError()) { 3353 CGM.getDiags().Report(diag::err_cannot_open_file) 3354 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3355 return; 3356 } 3357 3358 llvm::LLVMContext C; 3359 auto ME = expectedToErrorOrAndEmitErrors( 3360 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3361 3362 if (auto EC = ME.getError()) { 3363 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3364 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3365 CGM.getDiags().Report(DiagID) 3366 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3367 return; 3368 } 3369 3370 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3371 if (!MD) 3372 return; 3373 3374 for (llvm::MDNode *MN : MD->operands()) { 3375 auto &&GetMDInt = [MN](unsigned Idx) { 3376 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3377 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3378 }; 3379 3380 auto &&GetMDString = [MN](unsigned Idx) { 3381 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3382 return V->getString(); 3383 }; 3384 3385 switch (GetMDInt(0)) { 3386 default: 3387 llvm_unreachable("Unexpected metadata!"); 3388 break; 3389 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3390 OffloadingEntryInfoTargetRegion: 3391 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3392 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3393 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3394 /*Order=*/GetMDInt(5)); 3395 break; 3396 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3397 OffloadingEntryInfoDeviceGlobalVar: 3398 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3399 /*MangledName=*/GetMDString(1), 3400 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3401 /*Flags=*/GetMDInt(2)), 3402 /*Order=*/GetMDInt(3)); 3403 break; 3404 } 3405 } 3406 } 3407 3408 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3409 if (!KmpRoutineEntryPtrTy) { 3410 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3411 ASTContext &C = CGM.getContext(); 3412 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3413 FunctionProtoType::ExtProtoInfo EPI; 3414 KmpRoutineEntryPtrQTy = C.getPointerType( 3415 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3416 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3417 } 3418 } 3419 3420 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3421 // Make sure the type of the entry is already created. This is the type we 3422 // have to create: 3423 // struct __tgt_offload_entry{ 3424 // void *addr; // Pointer to the offload entry info. 3425 // // (function or global) 3426 // char *name; // Name of the function or global. 3427 // size_t size; // Size of the entry info (0 if it a function). 3428 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3429 // int32_t reserved; // Reserved, to use by the runtime library. 3430 // }; 3431 if (TgtOffloadEntryQTy.isNull()) { 3432 ASTContext &C = CGM.getContext(); 3433 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3434 RD->startDefinition(); 3435 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3436 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3437 addFieldToRecordDecl(C, RD, C.getSizeType()); 3438 addFieldToRecordDecl( 3439 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3440 addFieldToRecordDecl( 3441 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3442 RD->completeDefinition(); 3443 RD->addAttr(PackedAttr::CreateImplicit(C)); 3444 TgtOffloadEntryQTy = C.getRecordType(RD); 3445 } 3446 return TgtOffloadEntryQTy; 3447 } 3448 3449 namespace { 3450 struct PrivateHelpersTy { 3451 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3452 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3453 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3454 PrivateElemInit(PrivateElemInit) {} 3455 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3456 const Expr *OriginalRef = nullptr; 3457 const VarDecl *Original = nullptr; 3458 const VarDecl *PrivateCopy = nullptr; 3459 const VarDecl *PrivateElemInit = nullptr; 3460 bool isLocalPrivate() const { 3461 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3462 } 3463 }; 3464 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3465 } // anonymous namespace 3466 3467 static bool isAllocatableDecl(const VarDecl *VD) { 3468 const VarDecl *CVD = VD->getCanonicalDecl(); 3469 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3470 return false; 3471 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3472 // Use the default allocation. 3473 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3474 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3475 !AA->getAllocator()); 3476 } 3477 3478 static RecordDecl * 3479 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3480 if (!Privates.empty()) { 3481 ASTContext &C = CGM.getContext(); 3482 // Build struct .kmp_privates_t. { 3483 // /* private vars */ 3484 // }; 3485 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3486 RD->startDefinition(); 3487 for (const auto &Pair : Privates) { 3488 const VarDecl *VD = Pair.second.Original; 3489 QualType Type = VD->getType().getNonReferenceType(); 3490 // If the private variable is a local variable with lvalue ref type, 3491 // allocate the pointer instead of the pointee type. 3492 if (Pair.second.isLocalPrivate()) { 3493 if (VD->getType()->isLValueReferenceType()) 3494 Type = C.getPointerType(Type); 3495 if (isAllocatableDecl(VD)) 3496 Type = C.getPointerType(Type); 3497 } 3498 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3499 if (VD->hasAttrs()) { 3500 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3501 E(VD->getAttrs().end()); 3502 I != E; ++I) 3503 FD->addAttr(*I); 3504 } 3505 } 3506 RD->completeDefinition(); 3507 return RD; 3508 } 3509 return nullptr; 3510 } 3511 3512 static RecordDecl * 3513 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3514 QualType KmpInt32Ty, 3515 QualType KmpRoutineEntryPointerQTy) { 3516 ASTContext &C = CGM.getContext(); 3517 // Build struct kmp_task_t { 3518 // void * shareds; 3519 // kmp_routine_entry_t routine; 3520 // kmp_int32 part_id; 3521 // kmp_cmplrdata_t data1; 3522 // kmp_cmplrdata_t data2; 3523 // For taskloops additional fields: 3524 // kmp_uint64 lb; 3525 // kmp_uint64 ub; 3526 // kmp_int64 st; 3527 // kmp_int32 liter; 3528 // void * reductions; 3529 // }; 3530 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3531 UD->startDefinition(); 3532 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3533 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3534 UD->completeDefinition(); 3535 QualType KmpCmplrdataTy = C.getRecordType(UD); 3536 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3537 RD->startDefinition(); 3538 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3539 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3540 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3541 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3542 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3543 if (isOpenMPTaskLoopDirective(Kind)) { 3544 QualType KmpUInt64Ty = 3545 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3546 QualType KmpInt64Ty = 3547 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3548 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3549 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3550 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3551 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3552 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3553 } 3554 RD->completeDefinition(); 3555 return RD; 3556 } 3557 3558 static RecordDecl * 3559 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3560 ArrayRef<PrivateDataTy> Privates) { 3561 ASTContext &C = CGM.getContext(); 3562 // Build struct kmp_task_t_with_privates { 3563 // kmp_task_t task_data; 3564 // .kmp_privates_t. privates; 3565 // }; 3566 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3567 RD->startDefinition(); 3568 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3569 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3570 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3571 RD->completeDefinition(); 3572 return RD; 3573 } 3574 3575 /// Emit a proxy function which accepts kmp_task_t as the second 3576 /// argument. 3577 /// \code 3578 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3579 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3580 /// For taskloops: 3581 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3582 /// tt->reductions, tt->shareds); 3583 /// return 0; 3584 /// } 3585 /// \endcode 3586 static llvm::Function * 3587 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3588 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3589 QualType KmpTaskTWithPrivatesPtrQTy, 3590 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3591 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3592 llvm::Value *TaskPrivatesMap) { 3593 ASTContext &C = CGM.getContext(); 3594 FunctionArgList Args; 3595 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3596 ImplicitParamDecl::Other); 3597 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3598 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3599 ImplicitParamDecl::Other); 3600 Args.push_back(&GtidArg); 3601 Args.push_back(&TaskTypeArg); 3602 const auto &TaskEntryFnInfo = 3603 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3604 llvm::FunctionType *TaskEntryTy = 3605 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3606 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3607 auto *TaskEntry = llvm::Function::Create( 3608 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3609 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3610 TaskEntry->setDoesNotRecurse(); 3611 CodeGenFunction CGF(CGM); 3612 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3613 Loc, Loc); 3614 3615 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3616 // tt, 3617 // For taskloops: 3618 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3619 // tt->task_data.shareds); 3620 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3621 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3622 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3623 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3624 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3625 const auto *KmpTaskTWithPrivatesQTyRD = 3626 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3627 LValue Base = 3628 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3629 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3630 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3631 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3632 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3633 3634 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3635 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3636 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3637 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3638 CGF.ConvertTypeForMem(SharedsPtrTy)); 3639 3640 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3641 llvm::Value *PrivatesParam; 3642 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3643 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3644 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3645 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3646 } else { 3647 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3648 } 3649 3650 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3651 TaskPrivatesMap, 3652 CGF.Builder 3653 .CreatePointerBitCastOrAddrSpaceCast( 3654 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3655 .getPointer()}; 3656 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3657 std::end(CommonArgs)); 3658 if (isOpenMPTaskLoopDirective(Kind)) { 3659 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3660 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3661 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3662 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3663 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3664 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3665 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3666 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3667 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3668 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3669 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3670 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3671 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3672 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3673 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3674 CallArgs.push_back(LBParam); 3675 CallArgs.push_back(UBParam); 3676 CallArgs.push_back(StParam); 3677 CallArgs.push_back(LIParam); 3678 CallArgs.push_back(RParam); 3679 } 3680 CallArgs.push_back(SharedsParam); 3681 3682 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3683 CallArgs); 3684 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3685 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3686 CGF.FinishFunction(); 3687 return TaskEntry; 3688 } 3689 3690 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3691 SourceLocation Loc, 3692 QualType KmpInt32Ty, 3693 QualType KmpTaskTWithPrivatesPtrQTy, 3694 QualType KmpTaskTWithPrivatesQTy) { 3695 ASTContext &C = CGM.getContext(); 3696 FunctionArgList Args; 3697 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3698 ImplicitParamDecl::Other); 3699 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3700 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3701 ImplicitParamDecl::Other); 3702 Args.push_back(&GtidArg); 3703 Args.push_back(&TaskTypeArg); 3704 const auto &DestructorFnInfo = 3705 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3706 llvm::FunctionType *DestructorFnTy = 3707 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3708 std::string Name = 3709 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3710 auto *DestructorFn = 3711 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3712 Name, &CGM.getModule()); 3713 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3714 DestructorFnInfo); 3715 DestructorFn->setDoesNotRecurse(); 3716 CodeGenFunction CGF(CGM); 3717 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3718 Args, Loc, Loc); 3719 3720 LValue Base = CGF.EmitLoadOfPointerLValue( 3721 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3722 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3723 const auto *KmpTaskTWithPrivatesQTyRD = 3724 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3725 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3726 Base = CGF.EmitLValueForField(Base, *FI); 3727 for (const auto *Field : 3728 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3729 if (QualType::DestructionKind DtorKind = 3730 Field->getType().isDestructedType()) { 3731 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3732 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3733 } 3734 } 3735 CGF.FinishFunction(); 3736 return DestructorFn; 3737 } 3738 3739 /// Emit a privates mapping function for correct handling of private and 3740 /// firstprivate variables. 3741 /// \code 3742 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3743 /// **noalias priv1,..., <tyn> **noalias privn) { 3744 /// *priv1 = &.privates.priv1; 3745 /// ...; 3746 /// *privn = &.privates.privn; 3747 /// } 3748 /// \endcode 3749 static llvm::Value * 3750 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3751 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3752 ArrayRef<PrivateDataTy> Privates) { 3753 ASTContext &C = CGM.getContext(); 3754 FunctionArgList Args; 3755 ImplicitParamDecl TaskPrivatesArg( 3756 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3757 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3758 ImplicitParamDecl::Other); 3759 Args.push_back(&TaskPrivatesArg); 3760 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3761 unsigned Counter = 1; 3762 for (const Expr *E : Data.PrivateVars) { 3763 Args.push_back(ImplicitParamDecl::Create( 3764 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3765 C.getPointerType(C.getPointerType(E->getType())) 3766 .withConst() 3767 .withRestrict(), 3768 ImplicitParamDecl::Other)); 3769 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3770 PrivateVarsPos[VD] = Counter; 3771 ++Counter; 3772 } 3773 for (const Expr *E : Data.FirstprivateVars) { 3774 Args.push_back(ImplicitParamDecl::Create( 3775 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3776 C.getPointerType(C.getPointerType(E->getType())) 3777 .withConst() 3778 .withRestrict(), 3779 ImplicitParamDecl::Other)); 3780 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3781 PrivateVarsPos[VD] = Counter; 3782 ++Counter; 3783 } 3784 for (const Expr *E : Data.LastprivateVars) { 3785 Args.push_back(ImplicitParamDecl::Create( 3786 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3787 C.getPointerType(C.getPointerType(E->getType())) 3788 .withConst() 3789 .withRestrict(), 3790 ImplicitParamDecl::Other)); 3791 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3792 PrivateVarsPos[VD] = Counter; 3793 ++Counter; 3794 } 3795 for (const VarDecl *VD : Data.PrivateLocals) { 3796 QualType Ty = VD->getType().getNonReferenceType(); 3797 if (VD->getType()->isLValueReferenceType()) 3798 Ty = C.getPointerType(Ty); 3799 if (isAllocatableDecl(VD)) 3800 Ty = C.getPointerType(Ty); 3801 Args.push_back(ImplicitParamDecl::Create( 3802 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3803 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3804 ImplicitParamDecl::Other)); 3805 PrivateVarsPos[VD] = Counter; 3806 ++Counter; 3807 } 3808 const auto &TaskPrivatesMapFnInfo = 3809 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3810 llvm::FunctionType *TaskPrivatesMapTy = 3811 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3812 std::string Name = 3813 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3814 auto *TaskPrivatesMap = llvm::Function::Create( 3815 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3816 &CGM.getModule()); 3817 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3818 TaskPrivatesMapFnInfo); 3819 if (CGM.getLangOpts().Optimize) { 3820 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3821 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3822 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3823 } 3824 CodeGenFunction CGF(CGM); 3825 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3826 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3827 3828 // *privi = &.privates.privi; 3829 LValue Base = CGF.EmitLoadOfPointerLValue( 3830 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3831 TaskPrivatesArg.getType()->castAs<PointerType>()); 3832 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3833 Counter = 0; 3834 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3835 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3836 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3837 LValue RefLVal = 3838 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3839 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3840 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3841 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3842 ++Counter; 3843 } 3844 CGF.FinishFunction(); 3845 return TaskPrivatesMap; 3846 } 3847 3848 /// Emit initialization for private variables in task-based directives. 3849 static void emitPrivatesInit(CodeGenFunction &CGF, 3850 const OMPExecutableDirective &D, 3851 Address KmpTaskSharedsPtr, LValue TDBase, 3852 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3853 QualType SharedsTy, QualType SharedsPtrTy, 3854 const OMPTaskDataTy &Data, 3855 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3856 ASTContext &C = CGF.getContext(); 3857 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3858 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3859 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3860 ? OMPD_taskloop 3861 : OMPD_task; 3862 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3863 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3864 LValue SrcBase; 3865 bool IsTargetTask = 3866 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3867 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3868 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3869 // PointersArray, SizesArray, and MappersArray. The original variables for 3870 // these arrays are not captured and we get their addresses explicitly. 3871 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3872 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3873 SrcBase = CGF.MakeAddrLValue( 3874 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3875 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3876 SharedsTy); 3877 } 3878 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3879 for (const PrivateDataTy &Pair : Privates) { 3880 // Do not initialize private locals. 3881 if (Pair.second.isLocalPrivate()) { 3882 ++FI; 3883 continue; 3884 } 3885 const VarDecl *VD = Pair.second.PrivateCopy; 3886 const Expr *Init = VD->getAnyInitializer(); 3887 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3888 !CGF.isTrivialInitializer(Init)))) { 3889 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3890 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3891 const VarDecl *OriginalVD = Pair.second.Original; 3892 // Check if the variable is the target-based BasePointersArray, 3893 // PointersArray, SizesArray, or MappersArray. 3894 LValue SharedRefLValue; 3895 QualType Type = PrivateLValue.getType(); 3896 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3897 if (IsTargetTask && !SharedField) { 3898 assert(isa<ImplicitParamDecl>(OriginalVD) && 3899 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3900 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3901 ->getNumParams() == 0 && 3902 isa<TranslationUnitDecl>( 3903 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3904 ->getDeclContext()) && 3905 "Expected artificial target data variable."); 3906 SharedRefLValue = 3907 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3908 } else if (ForDup) { 3909 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3910 SharedRefLValue = CGF.MakeAddrLValue( 3911 Address(SharedRefLValue.getPointer(CGF), 3912 C.getDeclAlign(OriginalVD)), 3913 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3914 SharedRefLValue.getTBAAInfo()); 3915 } else if (CGF.LambdaCaptureFields.count( 3916 Pair.second.Original->getCanonicalDecl()) > 0 || 3917 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3918 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3919 } else { 3920 // Processing for implicitly captured variables. 3921 InlinedOpenMPRegionRAII Region( 3922 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3923 /*HasCancel=*/false, /*NoInheritance=*/true); 3924 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3925 } 3926 if (Type->isArrayType()) { 3927 // Initialize firstprivate array. 3928 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3929 // Perform simple memcpy. 3930 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3931 } else { 3932 // Initialize firstprivate array using element-by-element 3933 // initialization. 3934 CGF.EmitOMPAggregateAssign( 3935 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3936 Type, 3937 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3938 Address SrcElement) { 3939 // Clean up any temporaries needed by the initialization. 3940 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3941 InitScope.addPrivate( 3942 Elem, [SrcElement]() -> Address { return SrcElement; }); 3943 (void)InitScope.Privatize(); 3944 // Emit initialization for single element. 3945 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3946 CGF, &CapturesInfo); 3947 CGF.EmitAnyExprToMem(Init, DestElement, 3948 Init->getType().getQualifiers(), 3949 /*IsInitializer=*/false); 3950 }); 3951 } 3952 } else { 3953 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3954 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3955 return SharedRefLValue.getAddress(CGF); 3956 }); 3957 (void)InitScope.Privatize(); 3958 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3959 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3960 /*capturedByInit=*/false); 3961 } 3962 } else { 3963 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3964 } 3965 } 3966 ++FI; 3967 } 3968 } 3969 3970 /// Check if duplication function is required for taskloops. 3971 static bool checkInitIsRequired(CodeGenFunction &CGF, 3972 ArrayRef<PrivateDataTy> Privates) { 3973 bool InitRequired = false; 3974 for (const PrivateDataTy &Pair : Privates) { 3975 if (Pair.second.isLocalPrivate()) 3976 continue; 3977 const VarDecl *VD = Pair.second.PrivateCopy; 3978 const Expr *Init = VD->getAnyInitializer(); 3979 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3980 !CGF.isTrivialInitializer(Init)); 3981 if (InitRequired) 3982 break; 3983 } 3984 return InitRequired; 3985 } 3986 3987 3988 /// Emit task_dup function (for initialization of 3989 /// private/firstprivate/lastprivate vars and last_iter flag) 3990 /// \code 3991 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3992 /// lastpriv) { 3993 /// // setup lastprivate flag 3994 /// task_dst->last = lastpriv; 3995 /// // could be constructor calls here... 3996 /// } 3997 /// \endcode 3998 static llvm::Value * 3999 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4000 const OMPExecutableDirective &D, 4001 QualType KmpTaskTWithPrivatesPtrQTy, 4002 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4003 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4004 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4005 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4006 ASTContext &C = CGM.getContext(); 4007 FunctionArgList Args; 4008 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4009 KmpTaskTWithPrivatesPtrQTy, 4010 ImplicitParamDecl::Other); 4011 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4012 KmpTaskTWithPrivatesPtrQTy, 4013 ImplicitParamDecl::Other); 4014 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4015 ImplicitParamDecl::Other); 4016 Args.push_back(&DstArg); 4017 Args.push_back(&SrcArg); 4018 Args.push_back(&LastprivArg); 4019 const auto &TaskDupFnInfo = 4020 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4021 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4022 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4023 auto *TaskDup = llvm::Function::Create( 4024 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4025 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4026 TaskDup->setDoesNotRecurse(); 4027 CodeGenFunction CGF(CGM); 4028 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4029 Loc); 4030 4031 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4032 CGF.GetAddrOfLocalVar(&DstArg), 4033 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4034 // task_dst->liter = lastpriv; 4035 if (WithLastIter) { 4036 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4037 LValue Base = CGF.EmitLValueForField( 4038 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4039 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4040 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4041 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4042 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4043 } 4044 4045 // Emit initial values for private copies (if any). 4046 assert(!Privates.empty()); 4047 Address KmpTaskSharedsPtr = Address::invalid(); 4048 if (!Data.FirstprivateVars.empty()) { 4049 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4050 CGF.GetAddrOfLocalVar(&SrcArg), 4051 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4052 LValue Base = CGF.EmitLValueForField( 4053 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4054 KmpTaskSharedsPtr = Address( 4055 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4056 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4057 KmpTaskTShareds)), 4058 Loc), 4059 CGM.getNaturalTypeAlignment(SharedsTy)); 4060 } 4061 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4062 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4063 CGF.FinishFunction(); 4064 return TaskDup; 4065 } 4066 4067 /// Checks if destructor function is required to be generated. 4068 /// \return true if cleanups are required, false otherwise. 4069 static bool 4070 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4071 ArrayRef<PrivateDataTy> Privates) { 4072 for (const PrivateDataTy &P : Privates) { 4073 if (P.second.isLocalPrivate()) 4074 continue; 4075 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4076 if (Ty.isDestructedType()) 4077 return true; 4078 } 4079 return false; 4080 } 4081 4082 namespace { 4083 /// Loop generator for OpenMP iterator expression. 4084 class OMPIteratorGeneratorScope final 4085 : public CodeGenFunction::OMPPrivateScope { 4086 CodeGenFunction &CGF; 4087 const OMPIteratorExpr *E = nullptr; 4088 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4089 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4090 OMPIteratorGeneratorScope() = delete; 4091 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4092 4093 public: 4094 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4095 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4096 if (!E) 4097 return; 4098 SmallVector<llvm::Value *, 4> Uppers; 4099 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4100 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4101 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4102 addPrivate(VD, [&CGF, VD]() { 4103 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4104 }); 4105 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4106 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4107 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4108 "counter.addr"); 4109 }); 4110 } 4111 Privatize(); 4112 4113 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4114 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4115 LValue CLVal = 4116 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4117 HelperData.CounterVD->getType()); 4118 // Counter = 0; 4119 CGF.EmitStoreOfScalar( 4120 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4121 CLVal); 4122 CodeGenFunction::JumpDest &ContDest = 4123 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4124 CodeGenFunction::JumpDest &ExitDest = 4125 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4126 // N = <number-of_iterations>; 4127 llvm::Value *N = Uppers[I]; 4128 // cont: 4129 // if (Counter < N) goto body; else goto exit; 4130 CGF.EmitBlock(ContDest.getBlock()); 4131 auto *CVal = 4132 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4133 llvm::Value *Cmp = 4134 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4135 ? CGF.Builder.CreateICmpSLT(CVal, N) 4136 : CGF.Builder.CreateICmpULT(CVal, N); 4137 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4138 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4139 // body: 4140 CGF.EmitBlock(BodyBB); 4141 // Iteri = Begini + Counter * Stepi; 4142 CGF.EmitIgnoredExpr(HelperData.Update); 4143 } 4144 } 4145 ~OMPIteratorGeneratorScope() { 4146 if (!E) 4147 return; 4148 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4149 // Counter = Counter + 1; 4150 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4151 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4152 // goto cont; 4153 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4154 // exit: 4155 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4156 } 4157 } 4158 }; 4159 } // namespace 4160 4161 static std::pair<llvm::Value *, llvm::Value *> 4162 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4163 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4164 llvm::Value *Addr; 4165 if (OASE) { 4166 const Expr *Base = OASE->getBase(); 4167 Addr = CGF.EmitScalarExpr(Base); 4168 } else { 4169 Addr = CGF.EmitLValue(E).getPointer(CGF); 4170 } 4171 llvm::Value *SizeVal; 4172 QualType Ty = E->getType(); 4173 if (OASE) { 4174 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4175 for (const Expr *SE : OASE->getDimensions()) { 4176 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4177 Sz = CGF.EmitScalarConversion( 4178 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4179 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4180 } 4181 } else if (const auto *ASE = 4182 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4183 LValue UpAddrLVal = 4184 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4185 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4186 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4187 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4188 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4189 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4190 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4191 } else { 4192 SizeVal = CGF.getTypeSize(Ty); 4193 } 4194 return std::make_pair(Addr, SizeVal); 4195 } 4196 4197 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4198 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4199 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4200 if (KmpTaskAffinityInfoTy.isNull()) { 4201 RecordDecl *KmpAffinityInfoRD = 4202 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4203 KmpAffinityInfoRD->startDefinition(); 4204 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4205 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4206 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4207 KmpAffinityInfoRD->completeDefinition(); 4208 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4209 } 4210 } 4211 4212 CGOpenMPRuntime::TaskResultTy 4213 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4214 const OMPExecutableDirective &D, 4215 llvm::Function *TaskFunction, QualType SharedsTy, 4216 Address Shareds, const OMPTaskDataTy &Data) { 4217 ASTContext &C = CGM.getContext(); 4218 llvm::SmallVector<PrivateDataTy, 4> Privates; 4219 // Aggregate privates and sort them by the alignment. 4220 const auto *I = Data.PrivateCopies.begin(); 4221 for (const Expr *E : Data.PrivateVars) { 4222 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4223 Privates.emplace_back( 4224 C.getDeclAlign(VD), 4225 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4226 /*PrivateElemInit=*/nullptr)); 4227 ++I; 4228 } 4229 I = Data.FirstprivateCopies.begin(); 4230 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4231 for (const Expr *E : Data.FirstprivateVars) { 4232 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4233 Privates.emplace_back( 4234 C.getDeclAlign(VD), 4235 PrivateHelpersTy( 4236 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4237 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4238 ++I; 4239 ++IElemInitRef; 4240 } 4241 I = Data.LastprivateCopies.begin(); 4242 for (const Expr *E : Data.LastprivateVars) { 4243 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4244 Privates.emplace_back( 4245 C.getDeclAlign(VD), 4246 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4247 /*PrivateElemInit=*/nullptr)); 4248 ++I; 4249 } 4250 for (const VarDecl *VD : Data.PrivateLocals) { 4251 if (isAllocatableDecl(VD)) 4252 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4253 else 4254 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4255 } 4256 llvm::stable_sort(Privates, 4257 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4258 return L.first > R.first; 4259 }); 4260 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4261 // Build type kmp_routine_entry_t (if not built yet). 4262 emitKmpRoutineEntryT(KmpInt32Ty); 4263 // Build type kmp_task_t (if not built yet). 4264 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4265 if (SavedKmpTaskloopTQTy.isNull()) { 4266 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4267 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4268 } 4269 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4270 } else { 4271 assert((D.getDirectiveKind() == OMPD_task || 4272 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4273 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4274 "Expected taskloop, task or target directive"); 4275 if (SavedKmpTaskTQTy.isNull()) { 4276 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4277 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4278 } 4279 KmpTaskTQTy = SavedKmpTaskTQTy; 4280 } 4281 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4282 // Build particular struct kmp_task_t for the given task. 4283 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4284 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4285 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4286 QualType KmpTaskTWithPrivatesPtrQTy = 4287 C.getPointerType(KmpTaskTWithPrivatesQTy); 4288 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4289 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4290 KmpTaskTWithPrivatesTy->getPointerTo(); 4291 llvm::Value *KmpTaskTWithPrivatesTySize = 4292 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4293 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4294 4295 // Emit initial values for private copies (if any). 4296 llvm::Value *TaskPrivatesMap = nullptr; 4297 llvm::Type *TaskPrivatesMapTy = 4298 std::next(TaskFunction->arg_begin(), 3)->getType(); 4299 if (!Privates.empty()) { 4300 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4301 TaskPrivatesMap = 4302 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4303 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4304 TaskPrivatesMap, TaskPrivatesMapTy); 4305 } else { 4306 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4307 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4308 } 4309 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4310 // kmp_task_t *tt); 4311 llvm::Function *TaskEntry = emitProxyTaskFunction( 4312 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4313 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4314 TaskPrivatesMap); 4315 4316 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4317 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4318 // kmp_routine_entry_t *task_entry); 4319 // Task flags. Format is taken from 4320 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4321 // description of kmp_tasking_flags struct. 4322 enum { 4323 TiedFlag = 0x1, 4324 FinalFlag = 0x2, 4325 DestructorsFlag = 0x8, 4326 PriorityFlag = 0x20, 4327 DetachableFlag = 0x40, 4328 }; 4329 unsigned Flags = Data.Tied ? TiedFlag : 0; 4330 bool NeedsCleanup = false; 4331 if (!Privates.empty()) { 4332 NeedsCleanup = 4333 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4334 if (NeedsCleanup) 4335 Flags = Flags | DestructorsFlag; 4336 } 4337 if (Data.Priority.getInt()) 4338 Flags = Flags | PriorityFlag; 4339 if (D.hasClausesOfKind<OMPDetachClause>()) 4340 Flags = Flags | DetachableFlag; 4341 llvm::Value *TaskFlags = 4342 Data.Final.getPointer() 4343 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4344 CGF.Builder.getInt32(FinalFlag), 4345 CGF.Builder.getInt32(/*C=*/0)) 4346 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4347 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4348 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4349 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4350 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4351 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4352 TaskEntry, KmpRoutineEntryPtrTy)}; 4353 llvm::Value *NewTask; 4354 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4355 // Check if we have any device clause associated with the directive. 4356 const Expr *Device = nullptr; 4357 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4358 Device = C->getDevice(); 4359 // Emit device ID if any otherwise use default value. 4360 llvm::Value *DeviceID; 4361 if (Device) 4362 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4363 CGF.Int64Ty, /*isSigned=*/true); 4364 else 4365 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4366 AllocArgs.push_back(DeviceID); 4367 NewTask = CGF.EmitRuntimeCall( 4368 OMPBuilder.getOrCreateRuntimeFunction( 4369 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4370 AllocArgs); 4371 } else { 4372 NewTask = 4373 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4374 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4375 AllocArgs); 4376 } 4377 // Emit detach clause initialization. 4378 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4379 // task_descriptor); 4380 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4381 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4382 LValue EvtLVal = CGF.EmitLValue(Evt); 4383 4384 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4385 // int gtid, kmp_task_t *task); 4386 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4387 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4388 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4389 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4390 OMPBuilder.getOrCreateRuntimeFunction( 4391 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4392 {Loc, Tid, NewTask}); 4393 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4394 Evt->getExprLoc()); 4395 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4396 } 4397 // Process affinity clauses. 4398 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4399 // Process list of affinity data. 4400 ASTContext &C = CGM.getContext(); 4401 Address AffinitiesArray = Address::invalid(); 4402 // Calculate number of elements to form the array of affinity data. 4403 llvm::Value *NumOfElements = nullptr; 4404 unsigned NumAffinities = 0; 4405 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4406 if (const Expr *Modifier = C->getModifier()) { 4407 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4408 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4409 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4410 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4411 NumOfElements = 4412 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4413 } 4414 } else { 4415 NumAffinities += C->varlist_size(); 4416 } 4417 } 4418 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4419 // Fields ids in kmp_task_affinity_info record. 4420 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4421 4422 QualType KmpTaskAffinityInfoArrayTy; 4423 if (NumOfElements) { 4424 NumOfElements = CGF.Builder.CreateNUWAdd( 4425 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4426 auto *OVE = new (C) OpaqueValueExpr( 4427 Loc, 4428 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4429 VK_PRValue); 4430 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4431 RValue::get(NumOfElements)); 4432 KmpTaskAffinityInfoArrayTy = 4433 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4434 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4435 // Properly emit variable-sized array. 4436 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4437 ImplicitParamDecl::Other); 4438 CGF.EmitVarDecl(*PD); 4439 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4440 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4441 /*isSigned=*/false); 4442 } else { 4443 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4444 KmpTaskAffinityInfoTy, 4445 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4446 ArrayType::Normal, /*IndexTypeQuals=*/0); 4447 AffinitiesArray = 4448 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4449 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4450 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4451 /*isSigned=*/false); 4452 } 4453 4454 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4455 // Fill array by elements without iterators. 4456 unsigned Pos = 0; 4457 bool HasIterator = false; 4458 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4459 if (C->getModifier()) { 4460 HasIterator = true; 4461 continue; 4462 } 4463 for (const Expr *E : C->varlists()) { 4464 llvm::Value *Addr; 4465 llvm::Value *Size; 4466 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4467 LValue Base = 4468 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4469 KmpTaskAffinityInfoTy); 4470 // affs[i].base_addr = &<Affinities[i].second>; 4471 LValue BaseAddrLVal = CGF.EmitLValueForField( 4472 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4473 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4474 BaseAddrLVal); 4475 // affs[i].len = sizeof(<Affinities[i].second>); 4476 LValue LenLVal = CGF.EmitLValueForField( 4477 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4478 CGF.EmitStoreOfScalar(Size, LenLVal); 4479 ++Pos; 4480 } 4481 } 4482 LValue PosLVal; 4483 if (HasIterator) { 4484 PosLVal = CGF.MakeAddrLValue( 4485 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4486 C.getSizeType()); 4487 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4488 } 4489 // Process elements with iterators. 4490 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4491 const Expr *Modifier = C->getModifier(); 4492 if (!Modifier) 4493 continue; 4494 OMPIteratorGeneratorScope IteratorScope( 4495 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4496 for (const Expr *E : C->varlists()) { 4497 llvm::Value *Addr; 4498 llvm::Value *Size; 4499 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4500 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4501 LValue Base = CGF.MakeAddrLValue( 4502 Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(), 4503 AffinitiesArray.getPointer(), Idx), 4504 AffinitiesArray.getAlignment()), 4505 KmpTaskAffinityInfoTy); 4506 // affs[i].base_addr = &<Affinities[i].second>; 4507 LValue BaseAddrLVal = CGF.EmitLValueForField( 4508 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4509 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4510 BaseAddrLVal); 4511 // affs[i].len = sizeof(<Affinities[i].second>); 4512 LValue LenLVal = CGF.EmitLValueForField( 4513 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4514 CGF.EmitStoreOfScalar(Size, LenLVal); 4515 Idx = CGF.Builder.CreateNUWAdd( 4516 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4517 CGF.EmitStoreOfScalar(Idx, PosLVal); 4518 } 4519 } 4520 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4521 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4522 // naffins, kmp_task_affinity_info_t *affin_list); 4523 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4524 llvm::Value *GTid = getThreadID(CGF, Loc); 4525 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4526 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4527 // FIXME: Emit the function and ignore its result for now unless the 4528 // runtime function is properly implemented. 4529 (void)CGF.EmitRuntimeCall( 4530 OMPBuilder.getOrCreateRuntimeFunction( 4531 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4532 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4533 } 4534 llvm::Value *NewTaskNewTaskTTy = 4535 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4536 NewTask, KmpTaskTWithPrivatesPtrTy); 4537 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4538 KmpTaskTWithPrivatesQTy); 4539 LValue TDBase = 4540 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4541 // Fill the data in the resulting kmp_task_t record. 4542 // Copy shareds if there are any. 4543 Address KmpTaskSharedsPtr = Address::invalid(); 4544 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4545 KmpTaskSharedsPtr = 4546 Address(CGF.EmitLoadOfScalar( 4547 CGF.EmitLValueForField( 4548 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4549 KmpTaskTShareds)), 4550 Loc), 4551 CGM.getNaturalTypeAlignment(SharedsTy)); 4552 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4553 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4554 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4555 } 4556 // Emit initial values for private copies (if any). 4557 TaskResultTy Result; 4558 if (!Privates.empty()) { 4559 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4560 SharedsTy, SharedsPtrTy, Data, Privates, 4561 /*ForDup=*/false); 4562 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4563 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4564 Result.TaskDupFn = emitTaskDupFunction( 4565 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4566 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4567 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4568 } 4569 } 4570 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4571 enum { Priority = 0, Destructors = 1 }; 4572 // Provide pointer to function with destructors for privates. 4573 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4574 const RecordDecl *KmpCmplrdataUD = 4575 (*FI)->getType()->getAsUnionType()->getDecl(); 4576 if (NeedsCleanup) { 4577 llvm::Value *DestructorFn = emitDestructorsFunction( 4578 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4579 KmpTaskTWithPrivatesQTy); 4580 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4581 LValue DestructorsLV = CGF.EmitLValueForField( 4582 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4583 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4584 DestructorFn, KmpRoutineEntryPtrTy), 4585 DestructorsLV); 4586 } 4587 // Set priority. 4588 if (Data.Priority.getInt()) { 4589 LValue Data2LV = CGF.EmitLValueForField( 4590 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4591 LValue PriorityLV = CGF.EmitLValueForField( 4592 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4593 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4594 } 4595 Result.NewTask = NewTask; 4596 Result.TaskEntry = TaskEntry; 4597 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4598 Result.TDBase = TDBase; 4599 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4600 return Result; 4601 } 4602 4603 namespace { 4604 /// Dependence kind for RTL. 4605 enum RTLDependenceKindTy { 4606 DepIn = 0x01, 4607 DepInOut = 0x3, 4608 DepMutexInOutSet = 0x4 4609 }; 4610 /// Fields ids in kmp_depend_info record. 4611 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4612 } // namespace 4613 4614 /// Translates internal dependency kind into the runtime kind. 4615 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4616 RTLDependenceKindTy DepKind; 4617 switch (K) { 4618 case OMPC_DEPEND_in: 4619 DepKind = DepIn; 4620 break; 4621 // Out and InOut dependencies must use the same code. 4622 case OMPC_DEPEND_out: 4623 case OMPC_DEPEND_inout: 4624 DepKind = DepInOut; 4625 break; 4626 case OMPC_DEPEND_mutexinoutset: 4627 DepKind = DepMutexInOutSet; 4628 break; 4629 case OMPC_DEPEND_source: 4630 case OMPC_DEPEND_sink: 4631 case OMPC_DEPEND_depobj: 4632 case OMPC_DEPEND_unknown: 4633 llvm_unreachable("Unknown task dependence type"); 4634 } 4635 return DepKind; 4636 } 4637 4638 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4639 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4640 QualType &FlagsTy) { 4641 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4642 if (KmpDependInfoTy.isNull()) { 4643 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4644 KmpDependInfoRD->startDefinition(); 4645 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4646 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4647 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4648 KmpDependInfoRD->completeDefinition(); 4649 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4650 } 4651 } 4652 4653 std::pair<llvm::Value *, LValue> 4654 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4655 SourceLocation Loc) { 4656 ASTContext &C = CGM.getContext(); 4657 QualType FlagsTy; 4658 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4659 RecordDecl *KmpDependInfoRD = 4660 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4661 LValue Base = CGF.EmitLoadOfPointerLValue( 4662 DepobjLVal.getAddress(CGF), 4663 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4664 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4665 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4666 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4667 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4668 Base.getTBAAInfo()); 4669 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4670 Addr.getElementType(), Addr.getPointer(), 4671 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4672 LValue NumDepsBase = CGF.MakeAddrLValue( 4673 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4674 Base.getBaseInfo(), Base.getTBAAInfo()); 4675 // NumDeps = deps[i].base_addr; 4676 LValue BaseAddrLVal = CGF.EmitLValueForField( 4677 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4678 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4679 return std::make_pair(NumDeps, Base); 4680 } 4681 4682 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4683 llvm::PointerUnion<unsigned *, LValue *> Pos, 4684 const OMPTaskDataTy::DependData &Data, 4685 Address DependenciesArray) { 4686 CodeGenModule &CGM = CGF.CGM; 4687 ASTContext &C = CGM.getContext(); 4688 QualType FlagsTy; 4689 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4690 RecordDecl *KmpDependInfoRD = 4691 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4692 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4693 4694 OMPIteratorGeneratorScope IteratorScope( 4695 CGF, cast_or_null<OMPIteratorExpr>( 4696 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4697 : nullptr)); 4698 for (const Expr *E : Data.DepExprs) { 4699 llvm::Value *Addr; 4700 llvm::Value *Size; 4701 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4702 LValue Base; 4703 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4704 Base = CGF.MakeAddrLValue( 4705 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4706 } else { 4707 LValue &PosLVal = *Pos.get<LValue *>(); 4708 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4709 Base = CGF.MakeAddrLValue( 4710 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), 4711 DependenciesArray.getPointer(), Idx), 4712 DependenciesArray.getAlignment()), 4713 KmpDependInfoTy); 4714 } 4715 // deps[i].base_addr = &<Dependencies[i].second>; 4716 LValue BaseAddrLVal = CGF.EmitLValueForField( 4717 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4718 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4719 BaseAddrLVal); 4720 // deps[i].len = sizeof(<Dependencies[i].second>); 4721 LValue LenLVal = CGF.EmitLValueForField( 4722 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4723 CGF.EmitStoreOfScalar(Size, LenLVal); 4724 // deps[i].flags = <Dependencies[i].first>; 4725 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4726 LValue FlagsLVal = CGF.EmitLValueForField( 4727 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4728 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4729 FlagsLVal); 4730 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4731 ++(*P); 4732 } else { 4733 LValue &PosLVal = *Pos.get<LValue *>(); 4734 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4735 Idx = CGF.Builder.CreateNUWAdd(Idx, 4736 llvm::ConstantInt::get(Idx->getType(), 1)); 4737 CGF.EmitStoreOfScalar(Idx, PosLVal); 4738 } 4739 } 4740 } 4741 4742 static SmallVector<llvm::Value *, 4> 4743 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4744 const OMPTaskDataTy::DependData &Data) { 4745 assert(Data.DepKind == OMPC_DEPEND_depobj && 4746 "Expected depobj dependecy kind."); 4747 SmallVector<llvm::Value *, 4> Sizes; 4748 SmallVector<LValue, 4> SizeLVals; 4749 ASTContext &C = CGF.getContext(); 4750 QualType FlagsTy; 4751 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4752 RecordDecl *KmpDependInfoRD = 4753 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4754 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4755 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4756 { 4757 OMPIteratorGeneratorScope IteratorScope( 4758 CGF, cast_or_null<OMPIteratorExpr>( 4759 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4760 : nullptr)); 4761 for (const Expr *E : Data.DepExprs) { 4762 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4763 LValue Base = CGF.EmitLoadOfPointerLValue( 4764 DepobjLVal.getAddress(CGF), 4765 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4766 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4767 Base.getAddress(CGF), KmpDependInfoPtrT); 4768 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4769 Base.getTBAAInfo()); 4770 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4771 Addr.getElementType(), Addr.getPointer(), 4772 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4773 LValue NumDepsBase = CGF.MakeAddrLValue( 4774 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4775 Base.getBaseInfo(), Base.getTBAAInfo()); 4776 // NumDeps = deps[i].base_addr; 4777 LValue BaseAddrLVal = CGF.EmitLValueForField( 4778 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4779 llvm::Value *NumDeps = 4780 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4781 LValue NumLVal = CGF.MakeAddrLValue( 4782 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4783 C.getUIntPtrType()); 4784 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4785 NumLVal.getAddress(CGF)); 4786 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4787 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4788 CGF.EmitStoreOfScalar(Add, NumLVal); 4789 SizeLVals.push_back(NumLVal); 4790 } 4791 } 4792 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4793 llvm::Value *Size = 4794 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4795 Sizes.push_back(Size); 4796 } 4797 return Sizes; 4798 } 4799 4800 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4801 LValue PosLVal, 4802 const OMPTaskDataTy::DependData &Data, 4803 Address DependenciesArray) { 4804 assert(Data.DepKind == OMPC_DEPEND_depobj && 4805 "Expected depobj dependecy kind."); 4806 ASTContext &C = CGF.getContext(); 4807 QualType FlagsTy; 4808 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4809 RecordDecl *KmpDependInfoRD = 4810 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4811 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4812 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4813 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4814 { 4815 OMPIteratorGeneratorScope IteratorScope( 4816 CGF, cast_or_null<OMPIteratorExpr>( 4817 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4818 : nullptr)); 4819 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4820 const Expr *E = Data.DepExprs[I]; 4821 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4822 LValue Base = CGF.EmitLoadOfPointerLValue( 4823 DepobjLVal.getAddress(CGF), 4824 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4825 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4826 Base.getAddress(CGF), KmpDependInfoPtrT); 4827 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4828 Base.getTBAAInfo()); 4829 4830 // Get number of elements in a single depobj. 4831 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4832 Addr.getElementType(), Addr.getPointer(), 4833 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4834 LValue NumDepsBase = CGF.MakeAddrLValue( 4835 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4836 Base.getBaseInfo(), Base.getTBAAInfo()); 4837 // NumDeps = deps[i].base_addr; 4838 LValue BaseAddrLVal = CGF.EmitLValueForField( 4839 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4840 llvm::Value *NumDeps = 4841 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4842 4843 // memcopy dependency data. 4844 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4845 ElSize, 4846 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4847 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4848 Address DepAddr = 4849 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), 4850 DependenciesArray.getPointer(), Pos), 4851 DependenciesArray.getAlignment()); 4852 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4853 4854 // Increase pos. 4855 // pos += size; 4856 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4857 CGF.EmitStoreOfScalar(Add, PosLVal); 4858 } 4859 } 4860 } 4861 4862 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4863 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4864 SourceLocation Loc) { 4865 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4866 return D.DepExprs.empty(); 4867 })) 4868 return std::make_pair(nullptr, Address::invalid()); 4869 // Process list of dependencies. 4870 ASTContext &C = CGM.getContext(); 4871 Address DependenciesArray = Address::invalid(); 4872 llvm::Value *NumOfElements = nullptr; 4873 unsigned NumDependencies = std::accumulate( 4874 Dependencies.begin(), Dependencies.end(), 0, 4875 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4876 return D.DepKind == OMPC_DEPEND_depobj 4877 ? V 4878 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4879 }); 4880 QualType FlagsTy; 4881 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4882 bool HasDepobjDeps = false; 4883 bool HasRegularWithIterators = false; 4884 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4885 llvm::Value *NumOfRegularWithIterators = 4886 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4887 // Calculate number of depobj dependecies and regular deps with the iterators. 4888 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4889 if (D.DepKind == OMPC_DEPEND_depobj) { 4890 SmallVector<llvm::Value *, 4> Sizes = 4891 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4892 for (llvm::Value *Size : Sizes) { 4893 NumOfDepobjElements = 4894 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4895 } 4896 HasDepobjDeps = true; 4897 continue; 4898 } 4899 // Include number of iterations, if any. 4900 4901 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4902 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4903 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4904 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4905 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4906 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4907 NumOfRegularWithIterators = 4908 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4909 } 4910 HasRegularWithIterators = true; 4911 continue; 4912 } 4913 } 4914 4915 QualType KmpDependInfoArrayTy; 4916 if (HasDepobjDeps || HasRegularWithIterators) { 4917 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4918 /*isSigned=*/false); 4919 if (HasDepobjDeps) { 4920 NumOfElements = 4921 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4922 } 4923 if (HasRegularWithIterators) { 4924 NumOfElements = 4925 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4926 } 4927 auto *OVE = new (C) OpaqueValueExpr( 4928 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4929 VK_PRValue); 4930 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4931 RValue::get(NumOfElements)); 4932 KmpDependInfoArrayTy = 4933 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4934 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4935 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4936 // Properly emit variable-sized array. 4937 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4938 ImplicitParamDecl::Other); 4939 CGF.EmitVarDecl(*PD); 4940 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4941 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4942 /*isSigned=*/false); 4943 } else { 4944 KmpDependInfoArrayTy = C.getConstantArrayType( 4945 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4946 ArrayType::Normal, /*IndexTypeQuals=*/0); 4947 DependenciesArray = 4948 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4949 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4950 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4951 /*isSigned=*/false); 4952 } 4953 unsigned Pos = 0; 4954 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4955 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4956 Dependencies[I].IteratorExpr) 4957 continue; 4958 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4959 DependenciesArray); 4960 } 4961 // Copy regular dependecies with iterators. 4962 LValue PosLVal = CGF.MakeAddrLValue( 4963 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4964 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4965 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4966 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4967 !Dependencies[I].IteratorExpr) 4968 continue; 4969 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4970 DependenciesArray); 4971 } 4972 // Copy final depobj arrays without iterators. 4973 if (HasDepobjDeps) { 4974 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4975 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4976 continue; 4977 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4978 DependenciesArray); 4979 } 4980 } 4981 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4982 DependenciesArray, CGF.VoidPtrTy); 4983 return std::make_pair(NumOfElements, DependenciesArray); 4984 } 4985 4986 Address CGOpenMPRuntime::emitDepobjDependClause( 4987 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4988 SourceLocation Loc) { 4989 if (Dependencies.DepExprs.empty()) 4990 return Address::invalid(); 4991 // Process list of dependencies. 4992 ASTContext &C = CGM.getContext(); 4993 Address DependenciesArray = Address::invalid(); 4994 unsigned NumDependencies = Dependencies.DepExprs.size(); 4995 QualType FlagsTy; 4996 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4997 RecordDecl *KmpDependInfoRD = 4998 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4999 5000 llvm::Value *Size; 5001 // Define type kmp_depend_info[<Dependencies.size()>]; 5002 // For depobj reserve one extra element to store the number of elements. 5003 // It is required to handle depobj(x) update(in) construct. 5004 // kmp_depend_info[<Dependencies.size()>] deps; 5005 llvm::Value *NumDepsVal; 5006 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 5007 if (const auto *IE = 5008 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 5009 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 5010 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5011 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5012 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 5013 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 5014 } 5015 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 5016 NumDepsVal); 5017 CharUnits SizeInBytes = 5018 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5019 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5020 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5021 NumDepsVal = 5022 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5023 } else { 5024 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5025 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5026 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5027 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5028 Size = CGM.getSize(Sz.alignTo(Align)); 5029 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5030 } 5031 // Need to allocate on the dynamic memory. 5032 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5033 // Use default allocator. 5034 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5035 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5036 5037 llvm::Value *Addr = 5038 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5039 CGM.getModule(), OMPRTL___kmpc_alloc), 5040 Args, ".dep.arr.addr"); 5041 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5042 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5043 DependenciesArray = Address(Addr, Align); 5044 // Write number of elements in the first element of array for depobj. 5045 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5046 // deps[i].base_addr = NumDependencies; 5047 LValue BaseAddrLVal = CGF.EmitLValueForField( 5048 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5049 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5050 llvm::PointerUnion<unsigned *, LValue *> Pos; 5051 unsigned Idx = 1; 5052 LValue PosLVal; 5053 if (Dependencies.IteratorExpr) { 5054 PosLVal = CGF.MakeAddrLValue( 5055 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5056 C.getSizeType()); 5057 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5058 /*IsInit=*/true); 5059 Pos = &PosLVal; 5060 } else { 5061 Pos = &Idx; 5062 } 5063 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5064 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5065 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5066 return DependenciesArray; 5067 } 5068 5069 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5070 SourceLocation Loc) { 5071 ASTContext &C = CGM.getContext(); 5072 QualType FlagsTy; 5073 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5074 LValue Base = CGF.EmitLoadOfPointerLValue( 5075 DepobjLVal.getAddress(CGF), 5076 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5077 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5078 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5079 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5080 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5081 Addr.getElementType(), Addr.getPointer(), 5082 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5083 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5084 CGF.VoidPtrTy); 5085 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5086 // Use default allocator. 5087 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5088 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5089 5090 // _kmpc_free(gtid, addr, nullptr); 5091 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5092 CGM.getModule(), OMPRTL___kmpc_free), 5093 Args); 5094 } 5095 5096 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5097 OpenMPDependClauseKind NewDepKind, 5098 SourceLocation Loc) { 5099 ASTContext &C = CGM.getContext(); 5100 QualType FlagsTy; 5101 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5102 RecordDecl *KmpDependInfoRD = 5103 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5104 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5105 llvm::Value *NumDeps; 5106 LValue Base; 5107 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5108 5109 Address Begin = Base.getAddress(CGF); 5110 // Cast from pointer to array type to pointer to single element. 5111 llvm::Value *End = CGF.Builder.CreateGEP( 5112 Begin.getElementType(), Begin.getPointer(), NumDeps); 5113 // The basic structure here is a while-do loop. 5114 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5115 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5116 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5117 CGF.EmitBlock(BodyBB); 5118 llvm::PHINode *ElementPHI = 5119 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5120 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5121 Begin = Address(ElementPHI, Begin.getAlignment()); 5122 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5123 Base.getTBAAInfo()); 5124 // deps[i].flags = NewDepKind; 5125 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5126 LValue FlagsLVal = CGF.EmitLValueForField( 5127 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5128 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5129 FlagsLVal); 5130 5131 // Shift the address forward by one element. 5132 Address ElementNext = 5133 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5134 ElementPHI->addIncoming(ElementNext.getPointer(), 5135 CGF.Builder.GetInsertBlock()); 5136 llvm::Value *IsEmpty = 5137 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5138 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5139 // Done. 5140 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5141 } 5142 5143 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5144 const OMPExecutableDirective &D, 5145 llvm::Function *TaskFunction, 5146 QualType SharedsTy, Address Shareds, 5147 const Expr *IfCond, 5148 const OMPTaskDataTy &Data) { 5149 if (!CGF.HaveInsertPoint()) 5150 return; 5151 5152 TaskResultTy Result = 5153 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5154 llvm::Value *NewTask = Result.NewTask; 5155 llvm::Function *TaskEntry = Result.TaskEntry; 5156 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5157 LValue TDBase = Result.TDBase; 5158 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5159 // Process list of dependences. 5160 Address DependenciesArray = Address::invalid(); 5161 llvm::Value *NumOfElements; 5162 std::tie(NumOfElements, DependenciesArray) = 5163 emitDependClause(CGF, Data.Dependences, Loc); 5164 5165 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5166 // libcall. 5167 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5168 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5169 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5170 // list is not empty 5171 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5172 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5173 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5174 llvm::Value *DepTaskArgs[7]; 5175 if (!Data.Dependences.empty()) { 5176 DepTaskArgs[0] = UpLoc; 5177 DepTaskArgs[1] = ThreadID; 5178 DepTaskArgs[2] = NewTask; 5179 DepTaskArgs[3] = NumOfElements; 5180 DepTaskArgs[4] = DependenciesArray.getPointer(); 5181 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5182 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5183 } 5184 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5185 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5186 if (!Data.Tied) { 5187 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5188 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5189 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5190 } 5191 if (!Data.Dependences.empty()) { 5192 CGF.EmitRuntimeCall( 5193 OMPBuilder.getOrCreateRuntimeFunction( 5194 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5195 DepTaskArgs); 5196 } else { 5197 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5198 CGM.getModule(), OMPRTL___kmpc_omp_task), 5199 TaskArgs); 5200 } 5201 // Check if parent region is untied and build return for untied task; 5202 if (auto *Region = 5203 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5204 Region->emitUntiedSwitch(CGF); 5205 }; 5206 5207 llvm::Value *DepWaitTaskArgs[6]; 5208 if (!Data.Dependences.empty()) { 5209 DepWaitTaskArgs[0] = UpLoc; 5210 DepWaitTaskArgs[1] = ThreadID; 5211 DepWaitTaskArgs[2] = NumOfElements; 5212 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5213 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5214 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5215 } 5216 auto &M = CGM.getModule(); 5217 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5218 TaskEntry, &Data, &DepWaitTaskArgs, 5219 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5220 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5221 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5222 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5223 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5224 // is specified. 5225 if (!Data.Dependences.empty()) 5226 CGF.EmitRuntimeCall( 5227 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5228 DepWaitTaskArgs); 5229 // Call proxy_task_entry(gtid, new_task); 5230 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5231 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5232 Action.Enter(CGF); 5233 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5234 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5235 OutlinedFnArgs); 5236 }; 5237 5238 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5239 // kmp_task_t *new_task); 5240 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5241 // kmp_task_t *new_task); 5242 RegionCodeGenTy RCG(CodeGen); 5243 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5244 M, OMPRTL___kmpc_omp_task_begin_if0), 5245 TaskArgs, 5246 OMPBuilder.getOrCreateRuntimeFunction( 5247 M, OMPRTL___kmpc_omp_task_complete_if0), 5248 TaskArgs); 5249 RCG.setAction(Action); 5250 RCG(CGF); 5251 }; 5252 5253 if (IfCond) { 5254 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5255 } else { 5256 RegionCodeGenTy ThenRCG(ThenCodeGen); 5257 ThenRCG(CGF); 5258 } 5259 } 5260 5261 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5262 const OMPLoopDirective &D, 5263 llvm::Function *TaskFunction, 5264 QualType SharedsTy, Address Shareds, 5265 const Expr *IfCond, 5266 const OMPTaskDataTy &Data) { 5267 if (!CGF.HaveInsertPoint()) 5268 return; 5269 TaskResultTy Result = 5270 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5271 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5272 // libcall. 5273 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5274 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5275 // sched, kmp_uint64 grainsize, void *task_dup); 5276 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5277 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5278 llvm::Value *IfVal; 5279 if (IfCond) { 5280 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5281 /*isSigned=*/true); 5282 } else { 5283 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5284 } 5285 5286 LValue LBLVal = CGF.EmitLValueForField( 5287 Result.TDBase, 5288 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5289 const auto *LBVar = 5290 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5291 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5292 LBLVal.getQuals(), 5293 /*IsInitializer=*/true); 5294 LValue UBLVal = CGF.EmitLValueForField( 5295 Result.TDBase, 5296 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5297 const auto *UBVar = 5298 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5299 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5300 UBLVal.getQuals(), 5301 /*IsInitializer=*/true); 5302 LValue StLVal = CGF.EmitLValueForField( 5303 Result.TDBase, 5304 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5305 const auto *StVar = 5306 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5307 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5308 StLVal.getQuals(), 5309 /*IsInitializer=*/true); 5310 // Store reductions address. 5311 LValue RedLVal = CGF.EmitLValueForField( 5312 Result.TDBase, 5313 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5314 if (Data.Reductions) { 5315 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5316 } else { 5317 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5318 CGF.getContext().VoidPtrTy); 5319 } 5320 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5321 llvm::Value *TaskArgs[] = { 5322 UpLoc, 5323 ThreadID, 5324 Result.NewTask, 5325 IfVal, 5326 LBLVal.getPointer(CGF), 5327 UBLVal.getPointer(CGF), 5328 CGF.EmitLoadOfScalar(StLVal, Loc), 5329 llvm::ConstantInt::getSigned( 5330 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5331 llvm::ConstantInt::getSigned( 5332 CGF.IntTy, Data.Schedule.getPointer() 5333 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5334 : NoSchedule), 5335 Data.Schedule.getPointer() 5336 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5337 /*isSigned=*/false) 5338 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5339 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5340 Result.TaskDupFn, CGF.VoidPtrTy) 5341 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5342 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5343 CGM.getModule(), OMPRTL___kmpc_taskloop), 5344 TaskArgs); 5345 } 5346 5347 /// Emit reduction operation for each element of array (required for 5348 /// array sections) LHS op = RHS. 5349 /// \param Type Type of array. 5350 /// \param LHSVar Variable on the left side of the reduction operation 5351 /// (references element of array in original variable). 5352 /// \param RHSVar Variable on the right side of the reduction operation 5353 /// (references element of array in original variable). 5354 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5355 /// RHSVar. 5356 static void EmitOMPAggregateReduction( 5357 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5358 const VarDecl *RHSVar, 5359 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5360 const Expr *, const Expr *)> &RedOpGen, 5361 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5362 const Expr *UpExpr = nullptr) { 5363 // Perform element-by-element initialization. 5364 QualType ElementTy; 5365 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5366 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5367 5368 // Drill down to the base element type on both arrays. 5369 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5370 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5371 5372 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5373 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5374 // Cast from pointer to array type to pointer to single element. 5375 llvm::Value *LHSEnd = 5376 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5377 // The basic structure here is a while-do loop. 5378 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5379 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5380 llvm::Value *IsEmpty = 5381 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5382 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5383 5384 // Enter the loop body, making that address the current address. 5385 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5386 CGF.EmitBlock(BodyBB); 5387 5388 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5389 5390 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5391 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5392 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5393 Address RHSElementCurrent = 5394 Address(RHSElementPHI, 5395 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5396 5397 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5398 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5399 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5400 Address LHSElementCurrent = 5401 Address(LHSElementPHI, 5402 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5403 5404 // Emit copy. 5405 CodeGenFunction::OMPPrivateScope Scope(CGF); 5406 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5407 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5408 Scope.Privatize(); 5409 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5410 Scope.ForceCleanup(); 5411 5412 // Shift the address forward by one element. 5413 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5414 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5415 "omp.arraycpy.dest.element"); 5416 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5417 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5418 "omp.arraycpy.src.element"); 5419 // Check whether we've reached the end. 5420 llvm::Value *Done = 5421 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5422 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5423 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5424 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5425 5426 // Done. 5427 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5428 } 5429 5430 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5431 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5432 /// UDR combiner function. 5433 static void emitReductionCombiner(CodeGenFunction &CGF, 5434 const Expr *ReductionOp) { 5435 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5436 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5437 if (const auto *DRE = 5438 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5439 if (const auto *DRD = 5440 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5441 std::pair<llvm::Function *, llvm::Function *> Reduction = 5442 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5443 RValue Func = RValue::get(Reduction.first); 5444 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5445 CGF.EmitIgnoredExpr(ReductionOp); 5446 return; 5447 } 5448 CGF.EmitIgnoredExpr(ReductionOp); 5449 } 5450 5451 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5452 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5453 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5454 ArrayRef<const Expr *> ReductionOps) { 5455 ASTContext &C = CGM.getContext(); 5456 5457 // void reduction_func(void *LHSArg, void *RHSArg); 5458 FunctionArgList Args; 5459 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5460 ImplicitParamDecl::Other); 5461 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5462 ImplicitParamDecl::Other); 5463 Args.push_back(&LHSArg); 5464 Args.push_back(&RHSArg); 5465 const auto &CGFI = 5466 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5467 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5468 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5469 llvm::GlobalValue::InternalLinkage, Name, 5470 &CGM.getModule()); 5471 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5472 Fn->setDoesNotRecurse(); 5473 CodeGenFunction CGF(CGM); 5474 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5475 5476 // Dst = (void*[n])(LHSArg); 5477 // Src = (void*[n])(RHSArg); 5478 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5479 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5480 ArgsType), CGF.getPointerAlign()); 5481 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5482 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5483 ArgsType), CGF.getPointerAlign()); 5484 5485 // ... 5486 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5487 // ... 5488 CodeGenFunction::OMPPrivateScope Scope(CGF); 5489 auto IPriv = Privates.begin(); 5490 unsigned Idx = 0; 5491 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5492 const auto *RHSVar = 5493 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5494 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5495 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5496 }); 5497 const auto *LHSVar = 5498 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5499 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5500 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5501 }); 5502 QualType PrivTy = (*IPriv)->getType(); 5503 if (PrivTy->isVariablyModifiedType()) { 5504 // Get array size and emit VLA type. 5505 ++Idx; 5506 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5507 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5508 const VariableArrayType *VLA = 5509 CGF.getContext().getAsVariableArrayType(PrivTy); 5510 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5511 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5512 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5513 CGF.EmitVariablyModifiedType(PrivTy); 5514 } 5515 } 5516 Scope.Privatize(); 5517 IPriv = Privates.begin(); 5518 auto ILHS = LHSExprs.begin(); 5519 auto IRHS = RHSExprs.begin(); 5520 for (const Expr *E : ReductionOps) { 5521 if ((*IPriv)->getType()->isArrayType()) { 5522 // Emit reduction for array section. 5523 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5524 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5525 EmitOMPAggregateReduction( 5526 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5527 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5528 emitReductionCombiner(CGF, E); 5529 }); 5530 } else { 5531 // Emit reduction for array subscript or single variable. 5532 emitReductionCombiner(CGF, E); 5533 } 5534 ++IPriv; 5535 ++ILHS; 5536 ++IRHS; 5537 } 5538 Scope.ForceCleanup(); 5539 CGF.FinishFunction(); 5540 return Fn; 5541 } 5542 5543 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5544 const Expr *ReductionOp, 5545 const Expr *PrivateRef, 5546 const DeclRefExpr *LHS, 5547 const DeclRefExpr *RHS) { 5548 if (PrivateRef->getType()->isArrayType()) { 5549 // Emit reduction for array section. 5550 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5551 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5552 EmitOMPAggregateReduction( 5553 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5554 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5555 emitReductionCombiner(CGF, ReductionOp); 5556 }); 5557 } else { 5558 // Emit reduction for array subscript or single variable. 5559 emitReductionCombiner(CGF, ReductionOp); 5560 } 5561 } 5562 5563 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5564 ArrayRef<const Expr *> Privates, 5565 ArrayRef<const Expr *> LHSExprs, 5566 ArrayRef<const Expr *> RHSExprs, 5567 ArrayRef<const Expr *> ReductionOps, 5568 ReductionOptionsTy Options) { 5569 if (!CGF.HaveInsertPoint()) 5570 return; 5571 5572 bool WithNowait = Options.WithNowait; 5573 bool SimpleReduction = Options.SimpleReduction; 5574 5575 // Next code should be emitted for reduction: 5576 // 5577 // static kmp_critical_name lock = { 0 }; 5578 // 5579 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5580 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5581 // ... 5582 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5583 // *(Type<n>-1*)rhs[<n>-1]); 5584 // } 5585 // 5586 // ... 5587 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5588 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5589 // RedList, reduce_func, &<lock>)) { 5590 // case 1: 5591 // ... 5592 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5593 // ... 5594 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5595 // break; 5596 // case 2: 5597 // ... 5598 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5599 // ... 5600 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5601 // break; 5602 // default:; 5603 // } 5604 // 5605 // if SimpleReduction is true, only the next code is generated: 5606 // ... 5607 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5608 // ... 5609 5610 ASTContext &C = CGM.getContext(); 5611 5612 if (SimpleReduction) { 5613 CodeGenFunction::RunCleanupsScope Scope(CGF); 5614 auto IPriv = Privates.begin(); 5615 auto ILHS = LHSExprs.begin(); 5616 auto IRHS = RHSExprs.begin(); 5617 for (const Expr *E : ReductionOps) { 5618 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5619 cast<DeclRefExpr>(*IRHS)); 5620 ++IPriv; 5621 ++ILHS; 5622 ++IRHS; 5623 } 5624 return; 5625 } 5626 5627 // 1. Build a list of reduction variables. 5628 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5629 auto Size = RHSExprs.size(); 5630 for (const Expr *E : Privates) { 5631 if (E->getType()->isVariablyModifiedType()) 5632 // Reserve place for array size. 5633 ++Size; 5634 } 5635 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5636 QualType ReductionArrayTy = 5637 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5638 /*IndexTypeQuals=*/0); 5639 Address ReductionList = 5640 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5641 auto IPriv = Privates.begin(); 5642 unsigned Idx = 0; 5643 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5644 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5645 CGF.Builder.CreateStore( 5646 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5647 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5648 Elem); 5649 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5650 // Store array size. 5651 ++Idx; 5652 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5653 llvm::Value *Size = CGF.Builder.CreateIntCast( 5654 CGF.getVLASize( 5655 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5656 .NumElts, 5657 CGF.SizeTy, /*isSigned=*/false); 5658 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5659 Elem); 5660 } 5661 } 5662 5663 // 2. Emit reduce_func(). 5664 llvm::Function *ReductionFn = emitReductionFunction( 5665 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5666 LHSExprs, RHSExprs, ReductionOps); 5667 5668 // 3. Create static kmp_critical_name lock = { 0 }; 5669 std::string Name = getName({"reduction"}); 5670 llvm::Value *Lock = getCriticalRegionLock(Name); 5671 5672 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5673 // RedList, reduce_func, &<lock>); 5674 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5675 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5676 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5677 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5678 ReductionList.getPointer(), CGF.VoidPtrTy); 5679 llvm::Value *Args[] = { 5680 IdentTLoc, // ident_t *<loc> 5681 ThreadId, // i32 <gtid> 5682 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5683 ReductionArrayTySize, // size_type sizeof(RedList) 5684 RL, // void *RedList 5685 ReductionFn, // void (*) (void *, void *) <reduce_func> 5686 Lock // kmp_critical_name *&<lock> 5687 }; 5688 llvm::Value *Res = CGF.EmitRuntimeCall( 5689 OMPBuilder.getOrCreateRuntimeFunction( 5690 CGM.getModule(), 5691 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5692 Args); 5693 5694 // 5. Build switch(res) 5695 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5696 llvm::SwitchInst *SwInst = 5697 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5698 5699 // 6. Build case 1: 5700 // ... 5701 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5702 // ... 5703 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5704 // break; 5705 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5706 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5707 CGF.EmitBlock(Case1BB); 5708 5709 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5710 llvm::Value *EndArgs[] = { 5711 IdentTLoc, // ident_t *<loc> 5712 ThreadId, // i32 <gtid> 5713 Lock // kmp_critical_name *&<lock> 5714 }; 5715 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5716 CodeGenFunction &CGF, PrePostActionTy &Action) { 5717 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5718 auto IPriv = Privates.begin(); 5719 auto ILHS = LHSExprs.begin(); 5720 auto IRHS = RHSExprs.begin(); 5721 for (const Expr *E : ReductionOps) { 5722 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5723 cast<DeclRefExpr>(*IRHS)); 5724 ++IPriv; 5725 ++ILHS; 5726 ++IRHS; 5727 } 5728 }; 5729 RegionCodeGenTy RCG(CodeGen); 5730 CommonActionTy Action( 5731 nullptr, llvm::None, 5732 OMPBuilder.getOrCreateRuntimeFunction( 5733 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5734 : OMPRTL___kmpc_end_reduce), 5735 EndArgs); 5736 RCG.setAction(Action); 5737 RCG(CGF); 5738 5739 CGF.EmitBranch(DefaultBB); 5740 5741 // 7. Build case 2: 5742 // ... 5743 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5744 // ... 5745 // break; 5746 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5747 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5748 CGF.EmitBlock(Case2BB); 5749 5750 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5751 CodeGenFunction &CGF, PrePostActionTy &Action) { 5752 auto ILHS = LHSExprs.begin(); 5753 auto IRHS = RHSExprs.begin(); 5754 auto IPriv = Privates.begin(); 5755 for (const Expr *E : ReductionOps) { 5756 const Expr *XExpr = nullptr; 5757 const Expr *EExpr = nullptr; 5758 const Expr *UpExpr = nullptr; 5759 BinaryOperatorKind BO = BO_Comma; 5760 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5761 if (BO->getOpcode() == BO_Assign) { 5762 XExpr = BO->getLHS(); 5763 UpExpr = BO->getRHS(); 5764 } 5765 } 5766 // Try to emit update expression as a simple atomic. 5767 const Expr *RHSExpr = UpExpr; 5768 if (RHSExpr) { 5769 // Analyze RHS part of the whole expression. 5770 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5771 RHSExpr->IgnoreParenImpCasts())) { 5772 // If this is a conditional operator, analyze its condition for 5773 // min/max reduction operator. 5774 RHSExpr = ACO->getCond(); 5775 } 5776 if (const auto *BORHS = 5777 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5778 EExpr = BORHS->getRHS(); 5779 BO = BORHS->getOpcode(); 5780 } 5781 } 5782 if (XExpr) { 5783 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5784 auto &&AtomicRedGen = [BO, VD, 5785 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5786 const Expr *EExpr, const Expr *UpExpr) { 5787 LValue X = CGF.EmitLValue(XExpr); 5788 RValue E; 5789 if (EExpr) 5790 E = CGF.EmitAnyExpr(EExpr); 5791 CGF.EmitOMPAtomicSimpleUpdateExpr( 5792 X, E, BO, /*IsXLHSInRHSPart=*/true, 5793 llvm::AtomicOrdering::Monotonic, Loc, 5794 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5795 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5796 PrivateScope.addPrivate( 5797 VD, [&CGF, VD, XRValue, Loc]() { 5798 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5799 CGF.emitOMPSimpleStore( 5800 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5801 VD->getType().getNonReferenceType(), Loc); 5802 return LHSTemp; 5803 }); 5804 (void)PrivateScope.Privatize(); 5805 return CGF.EmitAnyExpr(UpExpr); 5806 }); 5807 }; 5808 if ((*IPriv)->getType()->isArrayType()) { 5809 // Emit atomic reduction for array section. 5810 const auto *RHSVar = 5811 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5812 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5813 AtomicRedGen, XExpr, EExpr, UpExpr); 5814 } else { 5815 // Emit atomic reduction for array subscript or single variable. 5816 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5817 } 5818 } else { 5819 // Emit as a critical region. 5820 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5821 const Expr *, const Expr *) { 5822 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5823 std::string Name = RT.getName({"atomic_reduction"}); 5824 RT.emitCriticalRegion( 5825 CGF, Name, 5826 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5827 Action.Enter(CGF); 5828 emitReductionCombiner(CGF, E); 5829 }, 5830 Loc); 5831 }; 5832 if ((*IPriv)->getType()->isArrayType()) { 5833 const auto *LHSVar = 5834 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5835 const auto *RHSVar = 5836 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5837 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5838 CritRedGen); 5839 } else { 5840 CritRedGen(CGF, nullptr, nullptr, nullptr); 5841 } 5842 } 5843 ++ILHS; 5844 ++IRHS; 5845 ++IPriv; 5846 } 5847 }; 5848 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5849 if (!WithNowait) { 5850 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5851 llvm::Value *EndArgs[] = { 5852 IdentTLoc, // ident_t *<loc> 5853 ThreadId, // i32 <gtid> 5854 Lock // kmp_critical_name *&<lock> 5855 }; 5856 CommonActionTy Action(nullptr, llvm::None, 5857 OMPBuilder.getOrCreateRuntimeFunction( 5858 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5859 EndArgs); 5860 AtomicRCG.setAction(Action); 5861 AtomicRCG(CGF); 5862 } else { 5863 AtomicRCG(CGF); 5864 } 5865 5866 CGF.EmitBranch(DefaultBB); 5867 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5868 } 5869 5870 /// Generates unique name for artificial threadprivate variables. 5871 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5872 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5873 const Expr *Ref) { 5874 SmallString<256> Buffer; 5875 llvm::raw_svector_ostream Out(Buffer); 5876 const clang::DeclRefExpr *DE; 5877 const VarDecl *D = ::getBaseDecl(Ref, DE); 5878 if (!D) 5879 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5880 D = D->getCanonicalDecl(); 5881 std::string Name = CGM.getOpenMPRuntime().getName( 5882 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5883 Out << Prefix << Name << "_" 5884 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5885 return std::string(Out.str()); 5886 } 5887 5888 /// Emits reduction initializer function: 5889 /// \code 5890 /// void @.red_init(void* %arg, void* %orig) { 5891 /// %0 = bitcast void* %arg to <type>* 5892 /// store <type> <init>, <type>* %0 5893 /// ret void 5894 /// } 5895 /// \endcode 5896 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5897 SourceLocation Loc, 5898 ReductionCodeGen &RCG, unsigned N) { 5899 ASTContext &C = CGM.getContext(); 5900 QualType VoidPtrTy = C.VoidPtrTy; 5901 VoidPtrTy.addRestrict(); 5902 FunctionArgList Args; 5903 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5904 ImplicitParamDecl::Other); 5905 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5906 ImplicitParamDecl::Other); 5907 Args.emplace_back(&Param); 5908 Args.emplace_back(&ParamOrig); 5909 const auto &FnInfo = 5910 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5911 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5912 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5913 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5914 Name, &CGM.getModule()); 5915 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5916 Fn->setDoesNotRecurse(); 5917 CodeGenFunction CGF(CGM); 5918 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5919 Address PrivateAddr = CGF.EmitLoadOfPointer( 5920 CGF.GetAddrOfLocalVar(&Param), 5921 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5922 llvm::Value *Size = nullptr; 5923 // If the size of the reduction item is non-constant, load it from global 5924 // threadprivate variable. 5925 if (RCG.getSizes(N).second) { 5926 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5927 CGF, CGM.getContext().getSizeType(), 5928 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5929 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5930 CGM.getContext().getSizeType(), Loc); 5931 } 5932 RCG.emitAggregateType(CGF, N, Size); 5933 LValue OrigLVal; 5934 // If initializer uses initializer from declare reduction construct, emit a 5935 // pointer to the address of the original reduction item (reuired by reduction 5936 // initializer) 5937 if (RCG.usesReductionInitializer(N)) { 5938 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5939 SharedAddr = CGF.EmitLoadOfPointer( 5940 SharedAddr, 5941 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5942 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5943 } else { 5944 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5945 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5946 CGM.getContext().VoidPtrTy); 5947 } 5948 // Emit the initializer: 5949 // %0 = bitcast void* %arg to <type>* 5950 // store <type> <init>, <type>* %0 5951 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5952 [](CodeGenFunction &) { return false; }); 5953 CGF.FinishFunction(); 5954 return Fn; 5955 } 5956 5957 /// Emits reduction combiner function: 5958 /// \code 5959 /// void @.red_comb(void* %arg0, void* %arg1) { 5960 /// %lhs = bitcast void* %arg0 to <type>* 5961 /// %rhs = bitcast void* %arg1 to <type>* 5962 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5963 /// store <type> %2, <type>* %lhs 5964 /// ret void 5965 /// } 5966 /// \endcode 5967 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5968 SourceLocation Loc, 5969 ReductionCodeGen &RCG, unsigned N, 5970 const Expr *ReductionOp, 5971 const Expr *LHS, const Expr *RHS, 5972 const Expr *PrivateRef) { 5973 ASTContext &C = CGM.getContext(); 5974 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5975 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5976 FunctionArgList Args; 5977 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5978 C.VoidPtrTy, ImplicitParamDecl::Other); 5979 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5980 ImplicitParamDecl::Other); 5981 Args.emplace_back(&ParamInOut); 5982 Args.emplace_back(&ParamIn); 5983 const auto &FnInfo = 5984 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5985 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5986 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5987 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5988 Name, &CGM.getModule()); 5989 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5990 Fn->setDoesNotRecurse(); 5991 CodeGenFunction CGF(CGM); 5992 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5993 llvm::Value *Size = nullptr; 5994 // If the size of the reduction item is non-constant, load it from global 5995 // threadprivate variable. 5996 if (RCG.getSizes(N).second) { 5997 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5998 CGF, CGM.getContext().getSizeType(), 5999 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6000 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6001 CGM.getContext().getSizeType(), Loc); 6002 } 6003 RCG.emitAggregateType(CGF, N, Size); 6004 // Remap lhs and rhs variables to the addresses of the function arguments. 6005 // %lhs = bitcast void* %arg0 to <type>* 6006 // %rhs = bitcast void* %arg1 to <type>* 6007 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6008 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6009 // Pull out the pointer to the variable. 6010 Address PtrAddr = CGF.EmitLoadOfPointer( 6011 CGF.GetAddrOfLocalVar(&ParamInOut), 6012 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6013 return CGF.Builder.CreateElementBitCast( 6014 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6015 }); 6016 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6017 // Pull out the pointer to the variable. 6018 Address PtrAddr = CGF.EmitLoadOfPointer( 6019 CGF.GetAddrOfLocalVar(&ParamIn), 6020 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6021 return CGF.Builder.CreateElementBitCast( 6022 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6023 }); 6024 PrivateScope.Privatize(); 6025 // Emit the combiner body: 6026 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6027 // store <type> %2, <type>* %lhs 6028 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6029 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6030 cast<DeclRefExpr>(RHS)); 6031 CGF.FinishFunction(); 6032 return Fn; 6033 } 6034 6035 /// Emits reduction finalizer function: 6036 /// \code 6037 /// void @.red_fini(void* %arg) { 6038 /// %0 = bitcast void* %arg to <type>* 6039 /// <destroy>(<type>* %0) 6040 /// ret void 6041 /// } 6042 /// \endcode 6043 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6044 SourceLocation Loc, 6045 ReductionCodeGen &RCG, unsigned N) { 6046 if (!RCG.needCleanups(N)) 6047 return nullptr; 6048 ASTContext &C = CGM.getContext(); 6049 FunctionArgList Args; 6050 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6051 ImplicitParamDecl::Other); 6052 Args.emplace_back(&Param); 6053 const auto &FnInfo = 6054 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6055 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6056 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6057 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6058 Name, &CGM.getModule()); 6059 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6060 Fn->setDoesNotRecurse(); 6061 CodeGenFunction CGF(CGM); 6062 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6063 Address PrivateAddr = CGF.EmitLoadOfPointer( 6064 CGF.GetAddrOfLocalVar(&Param), 6065 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6066 llvm::Value *Size = nullptr; 6067 // If the size of the reduction item is non-constant, load it from global 6068 // threadprivate variable. 6069 if (RCG.getSizes(N).second) { 6070 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6071 CGF, CGM.getContext().getSizeType(), 6072 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6073 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6074 CGM.getContext().getSizeType(), Loc); 6075 } 6076 RCG.emitAggregateType(CGF, N, Size); 6077 // Emit the finalizer body: 6078 // <destroy>(<type>* %0) 6079 RCG.emitCleanups(CGF, N, PrivateAddr); 6080 CGF.FinishFunction(Loc); 6081 return Fn; 6082 } 6083 6084 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6085 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6086 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6087 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6088 return nullptr; 6089 6090 // Build typedef struct: 6091 // kmp_taskred_input { 6092 // void *reduce_shar; // shared reduction item 6093 // void *reduce_orig; // original reduction item used for initialization 6094 // size_t reduce_size; // size of data item 6095 // void *reduce_init; // data initialization routine 6096 // void *reduce_fini; // data finalization routine 6097 // void *reduce_comb; // data combiner routine 6098 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6099 // } kmp_taskred_input_t; 6100 ASTContext &C = CGM.getContext(); 6101 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6102 RD->startDefinition(); 6103 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6104 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6105 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6106 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6107 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6108 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6109 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6110 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6111 RD->completeDefinition(); 6112 QualType RDType = C.getRecordType(RD); 6113 unsigned Size = Data.ReductionVars.size(); 6114 llvm::APInt ArraySize(/*numBits=*/64, Size); 6115 QualType ArrayRDType = C.getConstantArrayType( 6116 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6117 // kmp_task_red_input_t .rd_input.[Size]; 6118 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6119 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6120 Data.ReductionCopies, Data.ReductionOps); 6121 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6122 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6123 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6124 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6125 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6126 TaskRedInput.getPointer(), Idxs, 6127 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6128 ".rd_input.gep."); 6129 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6130 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6131 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6132 RCG.emitSharedOrigLValue(CGF, Cnt); 6133 llvm::Value *CastedShared = 6134 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6135 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6136 // ElemLVal.reduce_orig = &Origs[Cnt]; 6137 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6138 llvm::Value *CastedOrig = 6139 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6140 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6141 RCG.emitAggregateType(CGF, Cnt); 6142 llvm::Value *SizeValInChars; 6143 llvm::Value *SizeVal; 6144 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6145 // We use delayed creation/initialization for VLAs and array sections. It is 6146 // required because runtime does not provide the way to pass the sizes of 6147 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6148 // threadprivate global variables are used to store these values and use 6149 // them in the functions. 6150 bool DelayedCreation = !!SizeVal; 6151 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6152 /*isSigned=*/false); 6153 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6154 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6155 // ElemLVal.reduce_init = init; 6156 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6157 llvm::Value *InitAddr = 6158 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6159 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6160 // ElemLVal.reduce_fini = fini; 6161 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6162 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6163 llvm::Value *FiniAddr = Fini 6164 ? CGF.EmitCastToVoidPtr(Fini) 6165 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6166 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6167 // ElemLVal.reduce_comb = comb; 6168 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6169 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6170 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6171 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6172 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6173 // ElemLVal.flags = 0; 6174 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6175 if (DelayedCreation) { 6176 CGF.EmitStoreOfScalar( 6177 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6178 FlagsLVal); 6179 } else 6180 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6181 FlagsLVal.getType()); 6182 } 6183 if (Data.IsReductionWithTaskMod) { 6184 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6185 // is_ws, int num, void *data); 6186 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6187 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6188 CGM.IntTy, /*isSigned=*/true); 6189 llvm::Value *Args[] = { 6190 IdentTLoc, GTid, 6191 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6192 /*isSigned=*/true), 6193 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6194 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6195 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6196 return CGF.EmitRuntimeCall( 6197 OMPBuilder.getOrCreateRuntimeFunction( 6198 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6199 Args); 6200 } 6201 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6202 llvm::Value *Args[] = { 6203 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6204 /*isSigned=*/true), 6205 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6206 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6207 CGM.VoidPtrTy)}; 6208 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6209 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6210 Args); 6211 } 6212 6213 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6214 SourceLocation Loc, 6215 bool IsWorksharingReduction) { 6216 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6217 // is_ws, int num, void *data); 6218 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6219 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6220 CGM.IntTy, /*isSigned=*/true); 6221 llvm::Value *Args[] = {IdentTLoc, GTid, 6222 llvm::ConstantInt::get(CGM.IntTy, 6223 IsWorksharingReduction ? 1 : 0, 6224 /*isSigned=*/true)}; 6225 (void)CGF.EmitRuntimeCall( 6226 OMPBuilder.getOrCreateRuntimeFunction( 6227 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6228 Args); 6229 } 6230 6231 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6232 SourceLocation Loc, 6233 ReductionCodeGen &RCG, 6234 unsigned N) { 6235 auto Sizes = RCG.getSizes(N); 6236 // Emit threadprivate global variable if the type is non-constant 6237 // (Sizes.second = nullptr). 6238 if (Sizes.second) { 6239 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6240 /*isSigned=*/false); 6241 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6242 CGF, CGM.getContext().getSizeType(), 6243 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6244 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6245 } 6246 } 6247 6248 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6249 SourceLocation Loc, 6250 llvm::Value *ReductionsPtr, 6251 LValue SharedLVal) { 6252 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6253 // *d); 6254 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6255 CGM.IntTy, 6256 /*isSigned=*/true), 6257 ReductionsPtr, 6258 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6259 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6260 return Address( 6261 CGF.EmitRuntimeCall( 6262 OMPBuilder.getOrCreateRuntimeFunction( 6263 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6264 Args), 6265 SharedLVal.getAlignment()); 6266 } 6267 6268 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6269 const OMPTaskDataTy &Data) { 6270 if (!CGF.HaveInsertPoint()) 6271 return; 6272 6273 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6274 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6275 OMPBuilder.createTaskwait(CGF.Builder); 6276 } else { 6277 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6278 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6279 auto &M = CGM.getModule(); 6280 Address DependenciesArray = Address::invalid(); 6281 llvm::Value *NumOfElements; 6282 std::tie(NumOfElements, DependenciesArray) = 6283 emitDependClause(CGF, Data.Dependences, Loc); 6284 llvm::Value *DepWaitTaskArgs[6]; 6285 if (!Data.Dependences.empty()) { 6286 DepWaitTaskArgs[0] = UpLoc; 6287 DepWaitTaskArgs[1] = ThreadID; 6288 DepWaitTaskArgs[2] = NumOfElements; 6289 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6290 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6291 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6292 6293 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6294 6295 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6296 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6297 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6298 // is specified. 6299 CGF.EmitRuntimeCall( 6300 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6301 DepWaitTaskArgs); 6302 6303 } else { 6304 6305 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6306 // global_tid); 6307 llvm::Value *Args[] = {UpLoc, ThreadID}; 6308 // Ignore return result until untied tasks are supported. 6309 CGF.EmitRuntimeCall( 6310 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6311 Args); 6312 } 6313 } 6314 6315 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6316 Region->emitUntiedSwitch(CGF); 6317 } 6318 6319 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6320 OpenMPDirectiveKind InnerKind, 6321 const RegionCodeGenTy &CodeGen, 6322 bool HasCancel) { 6323 if (!CGF.HaveInsertPoint()) 6324 return; 6325 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6326 InnerKind != OMPD_critical && 6327 InnerKind != OMPD_master && 6328 InnerKind != OMPD_masked); 6329 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6330 } 6331 6332 namespace { 6333 enum RTCancelKind { 6334 CancelNoreq = 0, 6335 CancelParallel = 1, 6336 CancelLoop = 2, 6337 CancelSections = 3, 6338 CancelTaskgroup = 4 6339 }; 6340 } // anonymous namespace 6341 6342 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6343 RTCancelKind CancelKind = CancelNoreq; 6344 if (CancelRegion == OMPD_parallel) 6345 CancelKind = CancelParallel; 6346 else if (CancelRegion == OMPD_for) 6347 CancelKind = CancelLoop; 6348 else if (CancelRegion == OMPD_sections) 6349 CancelKind = CancelSections; 6350 else { 6351 assert(CancelRegion == OMPD_taskgroup); 6352 CancelKind = CancelTaskgroup; 6353 } 6354 return CancelKind; 6355 } 6356 6357 void CGOpenMPRuntime::emitCancellationPointCall( 6358 CodeGenFunction &CGF, SourceLocation Loc, 6359 OpenMPDirectiveKind CancelRegion) { 6360 if (!CGF.HaveInsertPoint()) 6361 return; 6362 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6363 // global_tid, kmp_int32 cncl_kind); 6364 if (auto *OMPRegionInfo = 6365 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6366 // For 'cancellation point taskgroup', the task region info may not have a 6367 // cancel. This may instead happen in another adjacent task. 6368 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6369 llvm::Value *Args[] = { 6370 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6371 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6372 // Ignore return result until untied tasks are supported. 6373 llvm::Value *Result = CGF.EmitRuntimeCall( 6374 OMPBuilder.getOrCreateRuntimeFunction( 6375 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6376 Args); 6377 // if (__kmpc_cancellationpoint()) { 6378 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6379 // exit from construct; 6380 // } 6381 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6382 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6383 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6384 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6385 CGF.EmitBlock(ExitBB); 6386 if (CancelRegion == OMPD_parallel) 6387 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6388 // exit from construct; 6389 CodeGenFunction::JumpDest CancelDest = 6390 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6391 CGF.EmitBranchThroughCleanup(CancelDest); 6392 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6393 } 6394 } 6395 } 6396 6397 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6398 const Expr *IfCond, 6399 OpenMPDirectiveKind CancelRegion) { 6400 if (!CGF.HaveInsertPoint()) 6401 return; 6402 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6403 // kmp_int32 cncl_kind); 6404 auto &M = CGM.getModule(); 6405 if (auto *OMPRegionInfo = 6406 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6407 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6408 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6409 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6410 llvm::Value *Args[] = { 6411 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6412 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6413 // Ignore return result until untied tasks are supported. 6414 llvm::Value *Result = CGF.EmitRuntimeCall( 6415 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6416 // if (__kmpc_cancel()) { 6417 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6418 // exit from construct; 6419 // } 6420 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6421 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6422 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6423 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6424 CGF.EmitBlock(ExitBB); 6425 if (CancelRegion == OMPD_parallel) 6426 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6427 // exit from construct; 6428 CodeGenFunction::JumpDest CancelDest = 6429 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6430 CGF.EmitBranchThroughCleanup(CancelDest); 6431 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6432 }; 6433 if (IfCond) { 6434 emitIfClause(CGF, IfCond, ThenGen, 6435 [](CodeGenFunction &, PrePostActionTy &) {}); 6436 } else { 6437 RegionCodeGenTy ThenRCG(ThenGen); 6438 ThenRCG(CGF); 6439 } 6440 } 6441 } 6442 6443 namespace { 6444 /// Cleanup action for uses_allocators support. 6445 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6446 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6447 6448 public: 6449 OMPUsesAllocatorsActionTy( 6450 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6451 : Allocators(Allocators) {} 6452 void Enter(CodeGenFunction &CGF) override { 6453 if (!CGF.HaveInsertPoint()) 6454 return; 6455 for (const auto &AllocatorData : Allocators) { 6456 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6457 CGF, AllocatorData.first, AllocatorData.second); 6458 } 6459 } 6460 void Exit(CodeGenFunction &CGF) override { 6461 if (!CGF.HaveInsertPoint()) 6462 return; 6463 for (const auto &AllocatorData : Allocators) { 6464 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6465 AllocatorData.first); 6466 } 6467 } 6468 }; 6469 } // namespace 6470 6471 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6472 const OMPExecutableDirective &D, StringRef ParentName, 6473 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6474 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6475 assert(!ParentName.empty() && "Invalid target region parent name!"); 6476 HasEmittedTargetRegion = true; 6477 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6478 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6479 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6480 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6481 if (!D.AllocatorTraits) 6482 continue; 6483 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6484 } 6485 } 6486 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6487 CodeGen.setAction(UsesAllocatorAction); 6488 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6489 IsOffloadEntry, CodeGen); 6490 } 6491 6492 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6493 const Expr *Allocator, 6494 const Expr *AllocatorTraits) { 6495 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6496 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6497 // Use default memspace handle. 6498 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6499 llvm::Value *NumTraits = llvm::ConstantInt::get( 6500 CGF.IntTy, cast<ConstantArrayType>( 6501 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6502 ->getSize() 6503 .getLimitedValue()); 6504 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6505 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6506 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6507 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6508 AllocatorTraitsLVal.getBaseInfo(), 6509 AllocatorTraitsLVal.getTBAAInfo()); 6510 llvm::Value *Traits = 6511 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6512 6513 llvm::Value *AllocatorVal = 6514 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6515 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6516 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6517 // Store to allocator. 6518 CGF.EmitVarDecl(*cast<VarDecl>( 6519 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6520 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6521 AllocatorVal = 6522 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6523 Allocator->getType(), Allocator->getExprLoc()); 6524 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6525 } 6526 6527 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6528 const Expr *Allocator) { 6529 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6530 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6531 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6532 llvm::Value *AllocatorVal = 6533 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6534 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6535 CGF.getContext().VoidPtrTy, 6536 Allocator->getExprLoc()); 6537 (void)CGF.EmitRuntimeCall( 6538 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6539 OMPRTL___kmpc_destroy_allocator), 6540 {ThreadId, AllocatorVal}); 6541 } 6542 6543 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6544 const OMPExecutableDirective &D, StringRef ParentName, 6545 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6546 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6547 // Create a unique name for the entry function using the source location 6548 // information of the current target region. The name will be something like: 6549 // 6550 // __omp_offloading_DD_FFFF_PP_lBB 6551 // 6552 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6553 // mangled name of the function that encloses the target region and BB is the 6554 // line number of the target region. 6555 6556 unsigned DeviceID; 6557 unsigned FileID; 6558 unsigned Line; 6559 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6560 Line); 6561 SmallString<64> EntryFnName; 6562 { 6563 llvm::raw_svector_ostream OS(EntryFnName); 6564 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6565 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6566 } 6567 6568 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6569 6570 CodeGenFunction CGF(CGM, true); 6571 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6572 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6573 6574 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6575 6576 // If this target outline function is not an offload entry, we don't need to 6577 // register it. 6578 if (!IsOffloadEntry) 6579 return; 6580 6581 // The target region ID is used by the runtime library to identify the current 6582 // target region, so it only has to be unique and not necessarily point to 6583 // anything. It could be the pointer to the outlined function that implements 6584 // the target region, but we aren't using that so that the compiler doesn't 6585 // need to keep that, and could therefore inline the host function if proven 6586 // worthwhile during optimization. In the other hand, if emitting code for the 6587 // device, the ID has to be the function address so that it can retrieved from 6588 // the offloading entry and launched by the runtime library. We also mark the 6589 // outlined function to have external linkage in case we are emitting code for 6590 // the device, because these functions will be entry points to the device. 6591 6592 if (CGM.getLangOpts().OpenMPIsDevice) { 6593 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6594 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6595 OutlinedFn->setDSOLocal(false); 6596 if (CGM.getTriple().isAMDGCN()) 6597 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6598 } else { 6599 std::string Name = getName({EntryFnName, "region_id"}); 6600 OutlinedFnID = new llvm::GlobalVariable( 6601 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6602 llvm::GlobalValue::WeakAnyLinkage, 6603 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6604 } 6605 6606 // Register the information for the entry associated with this target region. 6607 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6608 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6609 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6610 6611 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6612 int32_t DefaultValTeams = -1; 6613 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6614 if (DefaultValTeams > 0) { 6615 OutlinedFn->addFnAttr("omp_target_num_teams", 6616 std::to_string(DefaultValTeams)); 6617 } 6618 int32_t DefaultValThreads = -1; 6619 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6620 if (DefaultValThreads > 0) { 6621 OutlinedFn->addFnAttr("omp_target_thread_limit", 6622 std::to_string(DefaultValThreads)); 6623 } 6624 6625 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6626 } 6627 6628 /// Checks if the expression is constant or does not have non-trivial function 6629 /// calls. 6630 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6631 // We can skip constant expressions. 6632 // We can skip expressions with trivial calls or simple expressions. 6633 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6634 !E->hasNonTrivialCall(Ctx)) && 6635 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6636 } 6637 6638 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6639 const Stmt *Body) { 6640 const Stmt *Child = Body->IgnoreContainers(); 6641 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6642 Child = nullptr; 6643 for (const Stmt *S : C->body()) { 6644 if (const auto *E = dyn_cast<Expr>(S)) { 6645 if (isTrivial(Ctx, E)) 6646 continue; 6647 } 6648 // Some of the statements can be ignored. 6649 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6650 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6651 continue; 6652 // Analyze declarations. 6653 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6654 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6655 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6656 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6657 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6658 isa<UsingDirectiveDecl>(D) || 6659 isa<OMPDeclareReductionDecl>(D) || 6660 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6661 return true; 6662 const auto *VD = dyn_cast<VarDecl>(D); 6663 if (!VD) 6664 return false; 6665 return VD->hasGlobalStorage() || !VD->isUsed(); 6666 })) 6667 continue; 6668 } 6669 // Found multiple children - cannot get the one child only. 6670 if (Child) 6671 return nullptr; 6672 Child = S; 6673 } 6674 if (Child) 6675 Child = Child->IgnoreContainers(); 6676 } 6677 return Child; 6678 } 6679 6680 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6681 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6682 int32_t &DefaultVal) { 6683 6684 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6685 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6686 "Expected target-based executable directive."); 6687 switch (DirectiveKind) { 6688 case OMPD_target: { 6689 const auto *CS = D.getInnermostCapturedStmt(); 6690 const auto *Body = 6691 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6692 const Stmt *ChildStmt = 6693 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6694 if (const auto *NestedDir = 6695 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6696 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6697 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6698 const Expr *NumTeams = 6699 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6700 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6701 if (auto Constant = 6702 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6703 DefaultVal = Constant->getExtValue(); 6704 return NumTeams; 6705 } 6706 DefaultVal = 0; 6707 return nullptr; 6708 } 6709 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6710 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6711 DefaultVal = 1; 6712 return nullptr; 6713 } 6714 DefaultVal = 1; 6715 return nullptr; 6716 } 6717 // A value of -1 is used to check if we need to emit no teams region 6718 DefaultVal = -1; 6719 return nullptr; 6720 } 6721 case OMPD_target_teams: 6722 case OMPD_target_teams_distribute: 6723 case OMPD_target_teams_distribute_simd: 6724 case OMPD_target_teams_distribute_parallel_for: 6725 case OMPD_target_teams_distribute_parallel_for_simd: { 6726 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6727 const Expr *NumTeams = 6728 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6729 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6730 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6731 DefaultVal = Constant->getExtValue(); 6732 return NumTeams; 6733 } 6734 DefaultVal = 0; 6735 return nullptr; 6736 } 6737 case OMPD_target_parallel: 6738 case OMPD_target_parallel_for: 6739 case OMPD_target_parallel_for_simd: 6740 case OMPD_target_simd: 6741 DefaultVal = 1; 6742 return nullptr; 6743 case OMPD_parallel: 6744 case OMPD_for: 6745 case OMPD_parallel_for: 6746 case OMPD_parallel_master: 6747 case OMPD_parallel_sections: 6748 case OMPD_for_simd: 6749 case OMPD_parallel_for_simd: 6750 case OMPD_cancel: 6751 case OMPD_cancellation_point: 6752 case OMPD_ordered: 6753 case OMPD_threadprivate: 6754 case OMPD_allocate: 6755 case OMPD_task: 6756 case OMPD_simd: 6757 case OMPD_tile: 6758 case OMPD_unroll: 6759 case OMPD_sections: 6760 case OMPD_section: 6761 case OMPD_single: 6762 case OMPD_master: 6763 case OMPD_critical: 6764 case OMPD_taskyield: 6765 case OMPD_barrier: 6766 case OMPD_taskwait: 6767 case OMPD_taskgroup: 6768 case OMPD_atomic: 6769 case OMPD_flush: 6770 case OMPD_depobj: 6771 case OMPD_scan: 6772 case OMPD_teams: 6773 case OMPD_target_data: 6774 case OMPD_target_exit_data: 6775 case OMPD_target_enter_data: 6776 case OMPD_distribute: 6777 case OMPD_distribute_simd: 6778 case OMPD_distribute_parallel_for: 6779 case OMPD_distribute_parallel_for_simd: 6780 case OMPD_teams_distribute: 6781 case OMPD_teams_distribute_simd: 6782 case OMPD_teams_distribute_parallel_for: 6783 case OMPD_teams_distribute_parallel_for_simd: 6784 case OMPD_target_update: 6785 case OMPD_declare_simd: 6786 case OMPD_declare_variant: 6787 case OMPD_begin_declare_variant: 6788 case OMPD_end_declare_variant: 6789 case OMPD_declare_target: 6790 case OMPD_end_declare_target: 6791 case OMPD_declare_reduction: 6792 case OMPD_declare_mapper: 6793 case OMPD_taskloop: 6794 case OMPD_taskloop_simd: 6795 case OMPD_master_taskloop: 6796 case OMPD_master_taskloop_simd: 6797 case OMPD_parallel_master_taskloop: 6798 case OMPD_parallel_master_taskloop_simd: 6799 case OMPD_requires: 6800 case OMPD_metadirective: 6801 case OMPD_unknown: 6802 break; 6803 default: 6804 break; 6805 } 6806 llvm_unreachable("Unexpected directive kind."); 6807 } 6808 6809 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6810 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6811 assert(!CGF.getLangOpts().OpenMPIsDevice && 6812 "Clauses associated with the teams directive expected to be emitted " 6813 "only for the host!"); 6814 CGBuilderTy &Bld = CGF.Builder; 6815 int32_t DefaultNT = -1; 6816 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6817 if (NumTeams != nullptr) { 6818 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6819 6820 switch (DirectiveKind) { 6821 case OMPD_target: { 6822 const auto *CS = D.getInnermostCapturedStmt(); 6823 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6824 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6825 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6826 /*IgnoreResultAssign*/ true); 6827 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6828 /*isSigned=*/true); 6829 } 6830 case OMPD_target_teams: 6831 case OMPD_target_teams_distribute: 6832 case OMPD_target_teams_distribute_simd: 6833 case OMPD_target_teams_distribute_parallel_for: 6834 case OMPD_target_teams_distribute_parallel_for_simd: { 6835 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6836 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6837 /*IgnoreResultAssign*/ true); 6838 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6839 /*isSigned=*/true); 6840 } 6841 default: 6842 break; 6843 } 6844 } else if (DefaultNT == -1) { 6845 return nullptr; 6846 } 6847 6848 return Bld.getInt32(DefaultNT); 6849 } 6850 6851 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6852 llvm::Value *DefaultThreadLimitVal) { 6853 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6854 CGF.getContext(), CS->getCapturedStmt()); 6855 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6856 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6857 llvm::Value *NumThreads = nullptr; 6858 llvm::Value *CondVal = nullptr; 6859 // Handle if clause. If if clause present, the number of threads is 6860 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6861 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6862 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6863 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6864 const OMPIfClause *IfClause = nullptr; 6865 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6866 if (C->getNameModifier() == OMPD_unknown || 6867 C->getNameModifier() == OMPD_parallel) { 6868 IfClause = C; 6869 break; 6870 } 6871 } 6872 if (IfClause) { 6873 const Expr *Cond = IfClause->getCondition(); 6874 bool Result; 6875 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6876 if (!Result) 6877 return CGF.Builder.getInt32(1); 6878 } else { 6879 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6880 if (const auto *PreInit = 6881 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6882 for (const auto *I : PreInit->decls()) { 6883 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6884 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6885 } else { 6886 CodeGenFunction::AutoVarEmission Emission = 6887 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6888 CGF.EmitAutoVarCleanups(Emission); 6889 } 6890 } 6891 } 6892 CondVal = CGF.EvaluateExprAsBool(Cond); 6893 } 6894 } 6895 } 6896 // Check the value of num_threads clause iff if clause was not specified 6897 // or is not evaluated to false. 6898 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6899 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6900 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6901 const auto *NumThreadsClause = 6902 Dir->getSingleClause<OMPNumThreadsClause>(); 6903 CodeGenFunction::LexicalScope Scope( 6904 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6905 if (const auto *PreInit = 6906 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6907 for (const auto *I : PreInit->decls()) { 6908 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6909 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6910 } else { 6911 CodeGenFunction::AutoVarEmission Emission = 6912 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6913 CGF.EmitAutoVarCleanups(Emission); 6914 } 6915 } 6916 } 6917 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6918 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6919 /*isSigned=*/false); 6920 if (DefaultThreadLimitVal) 6921 NumThreads = CGF.Builder.CreateSelect( 6922 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6923 DefaultThreadLimitVal, NumThreads); 6924 } else { 6925 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6926 : CGF.Builder.getInt32(0); 6927 } 6928 // Process condition of the if clause. 6929 if (CondVal) { 6930 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6931 CGF.Builder.getInt32(1)); 6932 } 6933 return NumThreads; 6934 } 6935 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6936 return CGF.Builder.getInt32(1); 6937 return DefaultThreadLimitVal; 6938 } 6939 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6940 : CGF.Builder.getInt32(0); 6941 } 6942 6943 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6944 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6945 int32_t &DefaultVal) { 6946 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6947 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6948 "Expected target-based executable directive."); 6949 6950 switch (DirectiveKind) { 6951 case OMPD_target: 6952 // Teams have no clause thread_limit 6953 return nullptr; 6954 case OMPD_target_teams: 6955 case OMPD_target_teams_distribute: 6956 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6957 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6958 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6959 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6960 if (auto Constant = 6961 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6962 DefaultVal = Constant->getExtValue(); 6963 return ThreadLimit; 6964 } 6965 return nullptr; 6966 case OMPD_target_parallel: 6967 case OMPD_target_parallel_for: 6968 case OMPD_target_parallel_for_simd: 6969 case OMPD_target_teams_distribute_parallel_for: 6970 case OMPD_target_teams_distribute_parallel_for_simd: { 6971 Expr *ThreadLimit = nullptr; 6972 Expr *NumThreads = nullptr; 6973 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6974 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6975 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6976 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6977 if (auto Constant = 6978 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6979 DefaultVal = Constant->getExtValue(); 6980 } 6981 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6982 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6983 NumThreads = NumThreadsClause->getNumThreads(); 6984 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6985 if (auto Constant = 6986 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6987 if (Constant->getExtValue() < DefaultVal) { 6988 DefaultVal = Constant->getExtValue(); 6989 ThreadLimit = NumThreads; 6990 } 6991 } 6992 } 6993 } 6994 return ThreadLimit; 6995 } 6996 case OMPD_target_teams_distribute_simd: 6997 case OMPD_target_simd: 6998 DefaultVal = 1; 6999 return nullptr; 7000 case OMPD_parallel: 7001 case OMPD_for: 7002 case OMPD_parallel_for: 7003 case OMPD_parallel_master: 7004 case OMPD_parallel_sections: 7005 case OMPD_for_simd: 7006 case OMPD_parallel_for_simd: 7007 case OMPD_cancel: 7008 case OMPD_cancellation_point: 7009 case OMPD_ordered: 7010 case OMPD_threadprivate: 7011 case OMPD_allocate: 7012 case OMPD_task: 7013 case OMPD_simd: 7014 case OMPD_tile: 7015 case OMPD_unroll: 7016 case OMPD_sections: 7017 case OMPD_section: 7018 case OMPD_single: 7019 case OMPD_master: 7020 case OMPD_critical: 7021 case OMPD_taskyield: 7022 case OMPD_barrier: 7023 case OMPD_taskwait: 7024 case OMPD_taskgroup: 7025 case OMPD_atomic: 7026 case OMPD_flush: 7027 case OMPD_depobj: 7028 case OMPD_scan: 7029 case OMPD_teams: 7030 case OMPD_target_data: 7031 case OMPD_target_exit_data: 7032 case OMPD_target_enter_data: 7033 case OMPD_distribute: 7034 case OMPD_distribute_simd: 7035 case OMPD_distribute_parallel_for: 7036 case OMPD_distribute_parallel_for_simd: 7037 case OMPD_teams_distribute: 7038 case OMPD_teams_distribute_simd: 7039 case OMPD_teams_distribute_parallel_for: 7040 case OMPD_teams_distribute_parallel_for_simd: 7041 case OMPD_target_update: 7042 case OMPD_declare_simd: 7043 case OMPD_declare_variant: 7044 case OMPD_begin_declare_variant: 7045 case OMPD_end_declare_variant: 7046 case OMPD_declare_target: 7047 case OMPD_end_declare_target: 7048 case OMPD_declare_reduction: 7049 case OMPD_declare_mapper: 7050 case OMPD_taskloop: 7051 case OMPD_taskloop_simd: 7052 case OMPD_master_taskloop: 7053 case OMPD_master_taskloop_simd: 7054 case OMPD_parallel_master_taskloop: 7055 case OMPD_parallel_master_taskloop_simd: 7056 case OMPD_requires: 7057 case OMPD_unknown: 7058 break; 7059 default: 7060 break; 7061 } 7062 llvm_unreachable("Unsupported directive kind."); 7063 } 7064 7065 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7066 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7067 assert(!CGF.getLangOpts().OpenMPIsDevice && 7068 "Clauses associated with the teams directive expected to be emitted " 7069 "only for the host!"); 7070 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7071 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7072 "Expected target-based executable directive."); 7073 CGBuilderTy &Bld = CGF.Builder; 7074 llvm::Value *ThreadLimitVal = nullptr; 7075 llvm::Value *NumThreadsVal = nullptr; 7076 switch (DirectiveKind) { 7077 case OMPD_target: { 7078 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7079 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7080 return NumThreads; 7081 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7082 CGF.getContext(), CS->getCapturedStmt()); 7083 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7084 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7085 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7086 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7087 const auto *ThreadLimitClause = 7088 Dir->getSingleClause<OMPThreadLimitClause>(); 7089 CodeGenFunction::LexicalScope Scope( 7090 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7091 if (const auto *PreInit = 7092 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7093 for (const auto *I : PreInit->decls()) { 7094 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7095 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7096 } else { 7097 CodeGenFunction::AutoVarEmission Emission = 7098 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7099 CGF.EmitAutoVarCleanups(Emission); 7100 } 7101 } 7102 } 7103 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7104 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7105 ThreadLimitVal = 7106 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7107 } 7108 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7109 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7110 CS = Dir->getInnermostCapturedStmt(); 7111 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7112 CGF.getContext(), CS->getCapturedStmt()); 7113 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7114 } 7115 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7116 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7117 CS = Dir->getInnermostCapturedStmt(); 7118 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7119 return NumThreads; 7120 } 7121 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7122 return Bld.getInt32(1); 7123 } 7124 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7125 } 7126 case OMPD_target_teams: { 7127 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7128 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7129 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7130 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7131 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7132 ThreadLimitVal = 7133 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7134 } 7135 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7136 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7137 return NumThreads; 7138 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7139 CGF.getContext(), CS->getCapturedStmt()); 7140 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7141 if (Dir->getDirectiveKind() == OMPD_distribute) { 7142 CS = Dir->getInnermostCapturedStmt(); 7143 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7144 return NumThreads; 7145 } 7146 } 7147 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7148 } 7149 case OMPD_target_teams_distribute: 7150 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7151 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7152 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7153 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7154 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7155 ThreadLimitVal = 7156 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7157 } 7158 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7159 case OMPD_target_parallel: 7160 case OMPD_target_parallel_for: 7161 case OMPD_target_parallel_for_simd: 7162 case OMPD_target_teams_distribute_parallel_for: 7163 case OMPD_target_teams_distribute_parallel_for_simd: { 7164 llvm::Value *CondVal = nullptr; 7165 // Handle if clause. If if clause present, the number of threads is 7166 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7167 if (D.hasClausesOfKind<OMPIfClause>()) { 7168 const OMPIfClause *IfClause = nullptr; 7169 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7170 if (C->getNameModifier() == OMPD_unknown || 7171 C->getNameModifier() == OMPD_parallel) { 7172 IfClause = C; 7173 break; 7174 } 7175 } 7176 if (IfClause) { 7177 const Expr *Cond = IfClause->getCondition(); 7178 bool Result; 7179 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7180 if (!Result) 7181 return Bld.getInt32(1); 7182 } else { 7183 CodeGenFunction::RunCleanupsScope Scope(CGF); 7184 CondVal = CGF.EvaluateExprAsBool(Cond); 7185 } 7186 } 7187 } 7188 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7189 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7190 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7191 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7192 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7193 ThreadLimitVal = 7194 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7195 } 7196 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7197 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7198 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7199 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7200 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7201 NumThreadsVal = 7202 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7203 ThreadLimitVal = ThreadLimitVal 7204 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7205 ThreadLimitVal), 7206 NumThreadsVal, ThreadLimitVal) 7207 : NumThreadsVal; 7208 } 7209 if (!ThreadLimitVal) 7210 ThreadLimitVal = Bld.getInt32(0); 7211 if (CondVal) 7212 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7213 return ThreadLimitVal; 7214 } 7215 case OMPD_target_teams_distribute_simd: 7216 case OMPD_target_simd: 7217 return Bld.getInt32(1); 7218 case OMPD_parallel: 7219 case OMPD_for: 7220 case OMPD_parallel_for: 7221 case OMPD_parallel_master: 7222 case OMPD_parallel_sections: 7223 case OMPD_for_simd: 7224 case OMPD_parallel_for_simd: 7225 case OMPD_cancel: 7226 case OMPD_cancellation_point: 7227 case OMPD_ordered: 7228 case OMPD_threadprivate: 7229 case OMPD_allocate: 7230 case OMPD_task: 7231 case OMPD_simd: 7232 case OMPD_tile: 7233 case OMPD_unroll: 7234 case OMPD_sections: 7235 case OMPD_section: 7236 case OMPD_single: 7237 case OMPD_master: 7238 case OMPD_critical: 7239 case OMPD_taskyield: 7240 case OMPD_barrier: 7241 case OMPD_taskwait: 7242 case OMPD_taskgroup: 7243 case OMPD_atomic: 7244 case OMPD_flush: 7245 case OMPD_depobj: 7246 case OMPD_scan: 7247 case OMPD_teams: 7248 case OMPD_target_data: 7249 case OMPD_target_exit_data: 7250 case OMPD_target_enter_data: 7251 case OMPD_distribute: 7252 case OMPD_distribute_simd: 7253 case OMPD_distribute_parallel_for: 7254 case OMPD_distribute_parallel_for_simd: 7255 case OMPD_teams_distribute: 7256 case OMPD_teams_distribute_simd: 7257 case OMPD_teams_distribute_parallel_for: 7258 case OMPD_teams_distribute_parallel_for_simd: 7259 case OMPD_target_update: 7260 case OMPD_declare_simd: 7261 case OMPD_declare_variant: 7262 case OMPD_begin_declare_variant: 7263 case OMPD_end_declare_variant: 7264 case OMPD_declare_target: 7265 case OMPD_end_declare_target: 7266 case OMPD_declare_reduction: 7267 case OMPD_declare_mapper: 7268 case OMPD_taskloop: 7269 case OMPD_taskloop_simd: 7270 case OMPD_master_taskloop: 7271 case OMPD_master_taskloop_simd: 7272 case OMPD_parallel_master_taskloop: 7273 case OMPD_parallel_master_taskloop_simd: 7274 case OMPD_requires: 7275 case OMPD_metadirective: 7276 case OMPD_unknown: 7277 break; 7278 default: 7279 break; 7280 } 7281 llvm_unreachable("Unsupported directive kind."); 7282 } 7283 7284 namespace { 7285 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7286 7287 // Utility to handle information from clauses associated with a given 7288 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7289 // It provides a convenient interface to obtain the information and generate 7290 // code for that information. 7291 class MappableExprsHandler { 7292 public: 7293 /// Values for bit flags used to specify the mapping type for 7294 /// offloading. 7295 enum OpenMPOffloadMappingFlags : uint64_t { 7296 /// No flags 7297 OMP_MAP_NONE = 0x0, 7298 /// Allocate memory on the device and move data from host to device. 7299 OMP_MAP_TO = 0x01, 7300 /// Allocate memory on the device and move data from device to host. 7301 OMP_MAP_FROM = 0x02, 7302 /// Always perform the requested mapping action on the element, even 7303 /// if it was already mapped before. 7304 OMP_MAP_ALWAYS = 0x04, 7305 /// Delete the element from the device environment, ignoring the 7306 /// current reference count associated with the element. 7307 OMP_MAP_DELETE = 0x08, 7308 /// The element being mapped is a pointer-pointee pair; both the 7309 /// pointer and the pointee should be mapped. 7310 OMP_MAP_PTR_AND_OBJ = 0x10, 7311 /// This flags signals that the base address of an entry should be 7312 /// passed to the target kernel as an argument. 7313 OMP_MAP_TARGET_PARAM = 0x20, 7314 /// Signal that the runtime library has to return the device pointer 7315 /// in the current position for the data being mapped. Used when we have the 7316 /// use_device_ptr or use_device_addr clause. 7317 OMP_MAP_RETURN_PARAM = 0x40, 7318 /// This flag signals that the reference being passed is a pointer to 7319 /// private data. 7320 OMP_MAP_PRIVATE = 0x80, 7321 /// Pass the element to the device by value. 7322 OMP_MAP_LITERAL = 0x100, 7323 /// Implicit map 7324 OMP_MAP_IMPLICIT = 0x200, 7325 /// Close is a hint to the runtime to allocate memory close to 7326 /// the target device. 7327 OMP_MAP_CLOSE = 0x400, 7328 /// 0x800 is reserved for compatibility with XLC. 7329 /// Produce a runtime error if the data is not already allocated. 7330 OMP_MAP_PRESENT = 0x1000, 7331 // Increment and decrement a separate reference counter so that the data 7332 // cannot be unmapped within the associated region. Thus, this flag is 7333 // intended to be used on 'target' and 'target data' directives because they 7334 // are inherently structured. It is not intended to be used on 'target 7335 // enter data' and 'target exit data' directives because they are inherently 7336 // dynamic. 7337 // This is an OpenMP extension for the sake of OpenACC support. 7338 OMP_MAP_OMPX_HOLD = 0x2000, 7339 /// Signal that the runtime library should use args as an array of 7340 /// descriptor_dim pointers and use args_size as dims. Used when we have 7341 /// non-contiguous list items in target update directive 7342 OMP_MAP_NON_CONTIG = 0x100000000000, 7343 /// The 16 MSBs of the flags indicate whether the entry is member of some 7344 /// struct/class. 7345 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7346 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7347 }; 7348 7349 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7350 static unsigned getFlagMemberOffset() { 7351 unsigned Offset = 0; 7352 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7353 Remain = Remain >> 1) 7354 Offset++; 7355 return Offset; 7356 } 7357 7358 /// Class that holds debugging information for a data mapping to be passed to 7359 /// the runtime library. 7360 class MappingExprInfo { 7361 /// The variable declaration used for the data mapping. 7362 const ValueDecl *MapDecl = nullptr; 7363 /// The original expression used in the map clause, or null if there is 7364 /// none. 7365 const Expr *MapExpr = nullptr; 7366 7367 public: 7368 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7369 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7370 7371 const ValueDecl *getMapDecl() const { return MapDecl; } 7372 const Expr *getMapExpr() const { return MapExpr; } 7373 }; 7374 7375 /// Class that associates information with a base pointer to be passed to the 7376 /// runtime library. 7377 class BasePointerInfo { 7378 /// The base pointer. 7379 llvm::Value *Ptr = nullptr; 7380 /// The base declaration that refers to this device pointer, or null if 7381 /// there is none. 7382 const ValueDecl *DevPtrDecl = nullptr; 7383 7384 public: 7385 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7386 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7387 llvm::Value *operator*() const { return Ptr; } 7388 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7389 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7390 }; 7391 7392 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7393 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7394 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7395 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7396 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7397 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7398 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7399 7400 /// This structure contains combined information generated for mappable 7401 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7402 /// mappers, and non-contiguous information. 7403 struct MapCombinedInfoTy { 7404 struct StructNonContiguousInfo { 7405 bool IsNonContiguous = false; 7406 MapDimArrayTy Dims; 7407 MapNonContiguousArrayTy Offsets; 7408 MapNonContiguousArrayTy Counts; 7409 MapNonContiguousArrayTy Strides; 7410 }; 7411 MapExprsArrayTy Exprs; 7412 MapBaseValuesArrayTy BasePointers; 7413 MapValuesArrayTy Pointers; 7414 MapValuesArrayTy Sizes; 7415 MapFlagsArrayTy Types; 7416 MapMappersArrayTy Mappers; 7417 StructNonContiguousInfo NonContigInfo; 7418 7419 /// Append arrays in \a CurInfo. 7420 void append(MapCombinedInfoTy &CurInfo) { 7421 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7422 BasePointers.append(CurInfo.BasePointers.begin(), 7423 CurInfo.BasePointers.end()); 7424 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7425 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7426 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7427 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7428 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7429 CurInfo.NonContigInfo.Dims.end()); 7430 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7431 CurInfo.NonContigInfo.Offsets.end()); 7432 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7433 CurInfo.NonContigInfo.Counts.end()); 7434 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7435 CurInfo.NonContigInfo.Strides.end()); 7436 } 7437 }; 7438 7439 /// Map between a struct and the its lowest & highest elements which have been 7440 /// mapped. 7441 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7442 /// HE(FieldIndex, Pointer)} 7443 struct StructRangeInfoTy { 7444 MapCombinedInfoTy PreliminaryMapData; 7445 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7446 0, Address::invalid()}; 7447 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7448 0, Address::invalid()}; 7449 Address Base = Address::invalid(); 7450 Address LB = Address::invalid(); 7451 bool IsArraySection = false; 7452 bool HasCompleteRecord = false; 7453 }; 7454 7455 private: 7456 /// Kind that defines how a device pointer has to be returned. 7457 struct MapInfo { 7458 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7459 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7460 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7461 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7462 bool ReturnDevicePointer = false; 7463 bool IsImplicit = false; 7464 const ValueDecl *Mapper = nullptr; 7465 const Expr *VarRef = nullptr; 7466 bool ForDeviceAddr = false; 7467 7468 MapInfo() = default; 7469 MapInfo( 7470 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7471 OpenMPMapClauseKind MapType, 7472 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7473 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7474 bool ReturnDevicePointer, bool IsImplicit, 7475 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7476 bool ForDeviceAddr = false) 7477 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7478 MotionModifiers(MotionModifiers), 7479 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7480 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7481 }; 7482 7483 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7484 /// member and there is no map information about it, then emission of that 7485 /// entry is deferred until the whole struct has been processed. 7486 struct DeferredDevicePtrEntryTy { 7487 const Expr *IE = nullptr; 7488 const ValueDecl *VD = nullptr; 7489 bool ForDeviceAddr = false; 7490 7491 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7492 bool ForDeviceAddr) 7493 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7494 }; 7495 7496 /// The target directive from where the mappable clauses were extracted. It 7497 /// is either a executable directive or a user-defined mapper directive. 7498 llvm::PointerUnion<const OMPExecutableDirective *, 7499 const OMPDeclareMapperDecl *> 7500 CurDir; 7501 7502 /// Function the directive is being generated for. 7503 CodeGenFunction &CGF; 7504 7505 /// Set of all first private variables in the current directive. 7506 /// bool data is set to true if the variable is implicitly marked as 7507 /// firstprivate, false otherwise. 7508 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7509 7510 /// Map between device pointer declarations and their expression components. 7511 /// The key value for declarations in 'this' is null. 7512 llvm::DenseMap< 7513 const ValueDecl *, 7514 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7515 DevPointersMap; 7516 7517 /// Map between lambda declarations and their map type. 7518 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7519 7520 llvm::Value *getExprTypeSize(const Expr *E) const { 7521 QualType ExprTy = E->getType().getCanonicalType(); 7522 7523 // Calculate the size for array shaping expression. 7524 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7525 llvm::Value *Size = 7526 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7527 for (const Expr *SE : OAE->getDimensions()) { 7528 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7529 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7530 CGF.getContext().getSizeType(), 7531 SE->getExprLoc()); 7532 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7533 } 7534 return Size; 7535 } 7536 7537 // Reference types are ignored for mapping purposes. 7538 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7539 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7540 7541 // Given that an array section is considered a built-in type, we need to 7542 // do the calculation based on the length of the section instead of relying 7543 // on CGF.getTypeSize(E->getType()). 7544 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7545 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7546 OAE->getBase()->IgnoreParenImpCasts()) 7547 .getCanonicalType(); 7548 7549 // If there is no length associated with the expression and lower bound is 7550 // not specified too, that means we are using the whole length of the 7551 // base. 7552 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7553 !OAE->getLowerBound()) 7554 return CGF.getTypeSize(BaseTy); 7555 7556 llvm::Value *ElemSize; 7557 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7558 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7559 } else { 7560 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7561 assert(ATy && "Expecting array type if not a pointer type."); 7562 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7563 } 7564 7565 // If we don't have a length at this point, that is because we have an 7566 // array section with a single element. 7567 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7568 return ElemSize; 7569 7570 if (const Expr *LenExpr = OAE->getLength()) { 7571 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7572 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7573 CGF.getContext().getSizeType(), 7574 LenExpr->getExprLoc()); 7575 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7576 } 7577 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7578 OAE->getLowerBound() && "expected array_section[lb:]."); 7579 // Size = sizetype - lb * elemtype; 7580 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7581 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7582 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7583 CGF.getContext().getSizeType(), 7584 OAE->getLowerBound()->getExprLoc()); 7585 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7586 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7587 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7588 LengthVal = CGF.Builder.CreateSelect( 7589 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7590 return LengthVal; 7591 } 7592 return CGF.getTypeSize(ExprTy); 7593 } 7594 7595 /// Return the corresponding bits for a given map clause modifier. Add 7596 /// a flag marking the map as a pointer if requested. Add a flag marking the 7597 /// map as the first one of a series of maps that relate to the same map 7598 /// expression. 7599 OpenMPOffloadMappingFlags getMapTypeBits( 7600 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7601 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7602 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7603 OpenMPOffloadMappingFlags Bits = 7604 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7605 switch (MapType) { 7606 case OMPC_MAP_alloc: 7607 case OMPC_MAP_release: 7608 // alloc and release is the default behavior in the runtime library, i.e. 7609 // if we don't pass any bits alloc/release that is what the runtime is 7610 // going to do. Therefore, we don't need to signal anything for these two 7611 // type modifiers. 7612 break; 7613 case OMPC_MAP_to: 7614 Bits |= OMP_MAP_TO; 7615 break; 7616 case OMPC_MAP_from: 7617 Bits |= OMP_MAP_FROM; 7618 break; 7619 case OMPC_MAP_tofrom: 7620 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7621 break; 7622 case OMPC_MAP_delete: 7623 Bits |= OMP_MAP_DELETE; 7624 break; 7625 case OMPC_MAP_unknown: 7626 llvm_unreachable("Unexpected map type!"); 7627 } 7628 if (AddPtrFlag) 7629 Bits |= OMP_MAP_PTR_AND_OBJ; 7630 if (AddIsTargetParamFlag) 7631 Bits |= OMP_MAP_TARGET_PARAM; 7632 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7633 Bits |= OMP_MAP_ALWAYS; 7634 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7635 Bits |= OMP_MAP_CLOSE; 7636 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7637 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7638 Bits |= OMP_MAP_PRESENT; 7639 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7640 Bits |= OMP_MAP_OMPX_HOLD; 7641 if (IsNonContiguous) 7642 Bits |= OMP_MAP_NON_CONTIG; 7643 return Bits; 7644 } 7645 7646 /// Return true if the provided expression is a final array section. A 7647 /// final array section, is one whose length can't be proved to be one. 7648 bool isFinalArraySectionExpression(const Expr *E) const { 7649 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7650 7651 // It is not an array section and therefore not a unity-size one. 7652 if (!OASE) 7653 return false; 7654 7655 // An array section with no colon always refer to a single element. 7656 if (OASE->getColonLocFirst().isInvalid()) 7657 return false; 7658 7659 const Expr *Length = OASE->getLength(); 7660 7661 // If we don't have a length we have to check if the array has size 1 7662 // for this dimension. Also, we should always expect a length if the 7663 // base type is pointer. 7664 if (!Length) { 7665 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7666 OASE->getBase()->IgnoreParenImpCasts()) 7667 .getCanonicalType(); 7668 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7669 return ATy->getSize().getSExtValue() != 1; 7670 // If we don't have a constant dimension length, we have to consider 7671 // the current section as having any size, so it is not necessarily 7672 // unitary. If it happen to be unity size, that's user fault. 7673 return true; 7674 } 7675 7676 // Check if the length evaluates to 1. 7677 Expr::EvalResult Result; 7678 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7679 return true; // Can have more that size 1. 7680 7681 llvm::APSInt ConstLength = Result.Val.getInt(); 7682 return ConstLength.getSExtValue() != 1; 7683 } 7684 7685 /// Generate the base pointers, section pointers, sizes, map type bits, and 7686 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7687 /// map type, map or motion modifiers, and expression components. 7688 /// \a IsFirstComponent should be set to true if the provided set of 7689 /// components is the first associated with a capture. 7690 void generateInfoForComponentList( 7691 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7692 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7693 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7694 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7695 bool IsFirstComponentList, bool IsImplicit, 7696 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7697 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7698 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7699 OverlappedElements = llvm::None) const { 7700 // The following summarizes what has to be generated for each map and the 7701 // types below. The generated information is expressed in this order: 7702 // base pointer, section pointer, size, flags 7703 // (to add to the ones that come from the map type and modifier). 7704 // 7705 // double d; 7706 // int i[100]; 7707 // float *p; 7708 // 7709 // struct S1 { 7710 // int i; 7711 // float f[50]; 7712 // } 7713 // struct S2 { 7714 // int i; 7715 // float f[50]; 7716 // S1 s; 7717 // double *p; 7718 // struct S2 *ps; 7719 // int &ref; 7720 // } 7721 // S2 s; 7722 // S2 *ps; 7723 // 7724 // map(d) 7725 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7726 // 7727 // map(i) 7728 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7729 // 7730 // map(i[1:23]) 7731 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7732 // 7733 // map(p) 7734 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7735 // 7736 // map(p[1:24]) 7737 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7738 // in unified shared memory mode or for local pointers 7739 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7740 // 7741 // map(s) 7742 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7743 // 7744 // map(s.i) 7745 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7746 // 7747 // map(s.s.f) 7748 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7749 // 7750 // map(s.p) 7751 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7752 // 7753 // map(to: s.p[:22]) 7754 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7755 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7756 // &(s.p), &(s.p[0]), 22*sizeof(double), 7757 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7758 // (*) alloc space for struct members, only this is a target parameter 7759 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7760 // optimizes this entry out, same in the examples below) 7761 // (***) map the pointee (map: to) 7762 // 7763 // map(to: s.ref) 7764 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7765 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7766 // (*) alloc space for struct members, only this is a target parameter 7767 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7768 // optimizes this entry out, same in the examples below) 7769 // (***) map the pointee (map: to) 7770 // 7771 // map(s.ps) 7772 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7773 // 7774 // map(from: s.ps->s.i) 7775 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7776 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7777 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7778 // 7779 // map(to: s.ps->ps) 7780 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7781 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7782 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7783 // 7784 // map(s.ps->ps->ps) 7785 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7786 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7787 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7788 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7789 // 7790 // map(to: s.ps->ps->s.f[:22]) 7791 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7792 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7793 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7794 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7795 // 7796 // map(ps) 7797 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7798 // 7799 // map(ps->i) 7800 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7801 // 7802 // map(ps->s.f) 7803 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7804 // 7805 // map(from: ps->p) 7806 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7807 // 7808 // map(to: ps->p[:22]) 7809 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7810 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7811 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7812 // 7813 // map(ps->ps) 7814 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7815 // 7816 // map(from: ps->ps->s.i) 7817 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7818 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7819 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7820 // 7821 // map(from: ps->ps->ps) 7822 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7823 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7824 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7825 // 7826 // map(ps->ps->ps->ps) 7827 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7828 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7829 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7830 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7831 // 7832 // map(to: ps->ps->ps->s.f[:22]) 7833 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7834 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7835 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7836 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7837 // 7838 // map(to: s.f[:22]) map(from: s.p[:33]) 7839 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7840 // sizeof(double*) (**), TARGET_PARAM 7841 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7842 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7843 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7844 // (*) allocate contiguous space needed to fit all mapped members even if 7845 // we allocate space for members not mapped (in this example, 7846 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7847 // them as well because they fall between &s.f[0] and &s.p) 7848 // 7849 // map(from: s.f[:22]) map(to: ps->p[:33]) 7850 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7851 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7852 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7853 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7854 // (*) the struct this entry pertains to is the 2nd element in the list of 7855 // arguments, hence MEMBER_OF(2) 7856 // 7857 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7858 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7859 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7860 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7861 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7862 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7863 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7864 // (*) the struct this entry pertains to is the 4th element in the list 7865 // of arguments, hence MEMBER_OF(4) 7866 7867 // Track if the map information being generated is the first for a capture. 7868 bool IsCaptureFirstInfo = IsFirstComponentList; 7869 // When the variable is on a declare target link or in a to clause with 7870 // unified memory, a reference is needed to hold the host/device address 7871 // of the variable. 7872 bool RequiresReference = false; 7873 7874 // Scan the components from the base to the complete expression. 7875 auto CI = Components.rbegin(); 7876 auto CE = Components.rend(); 7877 auto I = CI; 7878 7879 // Track if the map information being generated is the first for a list of 7880 // components. 7881 bool IsExpressionFirstInfo = true; 7882 bool FirstPointerInComplexData = false; 7883 Address BP = Address::invalid(); 7884 const Expr *AssocExpr = I->getAssociatedExpression(); 7885 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7886 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7887 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7888 7889 if (isa<MemberExpr>(AssocExpr)) { 7890 // The base is the 'this' pointer. The content of the pointer is going 7891 // to be the base of the field being mapped. 7892 BP = CGF.LoadCXXThisAddress(); 7893 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7894 (OASE && 7895 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7896 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7897 } else if (OAShE && 7898 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7899 BP = Address( 7900 CGF.EmitScalarExpr(OAShE->getBase()), 7901 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7902 } else { 7903 // The base is the reference to the variable. 7904 // BP = &Var. 7905 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7906 if (const auto *VD = 7907 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7908 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7909 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7910 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7911 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7912 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7913 RequiresReference = true; 7914 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7915 } 7916 } 7917 } 7918 7919 // If the variable is a pointer and is being dereferenced (i.e. is not 7920 // the last component), the base has to be the pointer itself, not its 7921 // reference. References are ignored for mapping purposes. 7922 QualType Ty = 7923 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7924 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7925 // No need to generate individual map information for the pointer, it 7926 // can be associated with the combined storage if shared memory mode is 7927 // active or the base declaration is not global variable. 7928 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7929 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7930 !VD || VD->hasLocalStorage()) 7931 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7932 else 7933 FirstPointerInComplexData = true; 7934 ++I; 7935 } 7936 } 7937 7938 // Track whether a component of the list should be marked as MEMBER_OF some 7939 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7940 // in a component list should be marked as MEMBER_OF, all subsequent entries 7941 // do not belong to the base struct. E.g. 7942 // struct S2 s; 7943 // s.ps->ps->ps->f[:] 7944 // (1) (2) (3) (4) 7945 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7946 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7947 // is the pointee of ps(2) which is not member of struct s, so it should not 7948 // be marked as such (it is still PTR_AND_OBJ). 7949 // The variable is initialized to false so that PTR_AND_OBJ entries which 7950 // are not struct members are not considered (e.g. array of pointers to 7951 // data). 7952 bool ShouldBeMemberOf = false; 7953 7954 // Variable keeping track of whether or not we have encountered a component 7955 // in the component list which is a member expression. Useful when we have a 7956 // pointer or a final array section, in which case it is the previous 7957 // component in the list which tells us whether we have a member expression. 7958 // E.g. X.f[:] 7959 // While processing the final array section "[:]" it is "f" which tells us 7960 // whether we are dealing with a member of a declared struct. 7961 const MemberExpr *EncounteredME = nullptr; 7962 7963 // Track for the total number of dimension. Start from one for the dummy 7964 // dimension. 7965 uint64_t DimSize = 1; 7966 7967 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7968 bool IsPrevMemberReference = false; 7969 7970 for (; I != CE; ++I) { 7971 // If the current component is member of a struct (parent struct) mark it. 7972 if (!EncounteredME) { 7973 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7974 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7975 // as MEMBER_OF the parent struct. 7976 if (EncounteredME) { 7977 ShouldBeMemberOf = true; 7978 // Do not emit as complex pointer if this is actually not array-like 7979 // expression. 7980 if (FirstPointerInComplexData) { 7981 QualType Ty = std::prev(I) 7982 ->getAssociatedDeclaration() 7983 ->getType() 7984 .getNonReferenceType(); 7985 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7986 FirstPointerInComplexData = false; 7987 } 7988 } 7989 } 7990 7991 auto Next = std::next(I); 7992 7993 // We need to generate the addresses and sizes if this is the last 7994 // component, if the component is a pointer or if it is an array section 7995 // whose length can't be proved to be one. If this is a pointer, it 7996 // becomes the base address for the following components. 7997 7998 // A final array section, is one whose length can't be proved to be one. 7999 // If the map item is non-contiguous then we don't treat any array section 8000 // as final array section. 8001 bool IsFinalArraySection = 8002 !IsNonContiguous && 8003 isFinalArraySectionExpression(I->getAssociatedExpression()); 8004 8005 // If we have a declaration for the mapping use that, otherwise use 8006 // the base declaration of the map clause. 8007 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 8008 ? I->getAssociatedDeclaration() 8009 : BaseDecl; 8010 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 8011 : MapExpr; 8012 8013 // Get information on whether the element is a pointer. Have to do a 8014 // special treatment for array sections given that they are built-in 8015 // types. 8016 const auto *OASE = 8017 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 8018 const auto *OAShE = 8019 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 8020 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 8021 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 8022 bool IsPointer = 8023 OAShE || 8024 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8025 .getCanonicalType() 8026 ->isAnyPointerType()) || 8027 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8028 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 8029 MapDecl && 8030 MapDecl->getType()->isLValueReferenceType(); 8031 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 8032 8033 if (OASE) 8034 ++DimSize; 8035 8036 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8037 IsFinalArraySection) { 8038 // If this is not the last component, we expect the pointer to be 8039 // associated with an array expression or member expression. 8040 assert((Next == CE || 8041 isa<MemberExpr>(Next->getAssociatedExpression()) || 8042 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8043 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8044 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8045 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8046 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8047 "Unexpected expression"); 8048 8049 Address LB = Address::invalid(); 8050 Address LowestElem = Address::invalid(); 8051 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8052 const MemberExpr *E) { 8053 const Expr *BaseExpr = E->getBase(); 8054 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8055 // scalar. 8056 LValue BaseLV; 8057 if (E->isArrow()) { 8058 LValueBaseInfo BaseInfo; 8059 TBAAAccessInfo TBAAInfo; 8060 Address Addr = 8061 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8062 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8063 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8064 } else { 8065 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8066 } 8067 return BaseLV; 8068 }; 8069 if (OAShE) { 8070 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 8071 CGF.getContext().getTypeAlignInChars( 8072 OAShE->getBase()->getType())); 8073 } else if (IsMemberReference) { 8074 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8075 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8076 LowestElem = CGF.EmitLValueForFieldInitialization( 8077 BaseLVal, cast<FieldDecl>(MapDecl)) 8078 .getAddress(CGF); 8079 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8080 .getAddress(CGF); 8081 } else { 8082 LowestElem = LB = 8083 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8084 .getAddress(CGF); 8085 } 8086 8087 // If this component is a pointer inside the base struct then we don't 8088 // need to create any entry for it - it will be combined with the object 8089 // it is pointing to into a single PTR_AND_OBJ entry. 8090 bool IsMemberPointerOrAddr = 8091 EncounteredME && 8092 (((IsPointer || ForDeviceAddr) && 8093 I->getAssociatedExpression() == EncounteredME) || 8094 (IsPrevMemberReference && !IsPointer) || 8095 (IsMemberReference && Next != CE && 8096 !Next->getAssociatedExpression()->getType()->isPointerType())); 8097 if (!OverlappedElements.empty() && Next == CE) { 8098 // Handle base element with the info for overlapped elements. 8099 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8100 assert(!IsPointer && 8101 "Unexpected base element with the pointer type."); 8102 // Mark the whole struct as the struct that requires allocation on the 8103 // device. 8104 PartialStruct.LowestElem = {0, LowestElem}; 8105 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8106 I->getAssociatedExpression()->getType()); 8107 Address HB = CGF.Builder.CreateConstGEP( 8108 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 8109 CGF.VoidPtrTy), 8110 TypeSize.getQuantity() - 1); 8111 PartialStruct.HighestElem = { 8112 std::numeric_limits<decltype( 8113 PartialStruct.HighestElem.first)>::max(), 8114 HB}; 8115 PartialStruct.Base = BP; 8116 PartialStruct.LB = LB; 8117 assert( 8118 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8119 "Overlapped elements must be used only once for the variable."); 8120 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8121 // Emit data for non-overlapped data. 8122 OpenMPOffloadMappingFlags Flags = 8123 OMP_MAP_MEMBER_OF | 8124 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8125 /*AddPtrFlag=*/false, 8126 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8127 llvm::Value *Size = nullptr; 8128 // Do bitcopy of all non-overlapped structure elements. 8129 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8130 Component : OverlappedElements) { 8131 Address ComponentLB = Address::invalid(); 8132 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8133 Component) { 8134 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8135 const auto *FD = dyn_cast<FieldDecl>(VD); 8136 if (FD && FD->getType()->isLValueReferenceType()) { 8137 const auto *ME = 8138 cast<MemberExpr>(MC.getAssociatedExpression()); 8139 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8140 ComponentLB = 8141 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8142 .getAddress(CGF); 8143 } else { 8144 ComponentLB = 8145 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8146 .getAddress(CGF); 8147 } 8148 Size = CGF.Builder.CreatePtrDiff( 8149 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8150 CGF.EmitCastToVoidPtr(LB.getPointer())); 8151 break; 8152 } 8153 } 8154 assert(Size && "Failed to determine structure size"); 8155 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8156 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8157 CombinedInfo.Pointers.push_back(LB.getPointer()); 8158 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8159 Size, CGF.Int64Ty, /*isSigned=*/true)); 8160 CombinedInfo.Types.push_back(Flags); 8161 CombinedInfo.Mappers.push_back(nullptr); 8162 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8163 : 1); 8164 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8165 } 8166 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8167 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8168 CombinedInfo.Pointers.push_back(LB.getPointer()); 8169 Size = CGF.Builder.CreatePtrDiff( 8170 CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8171 CGF.EmitCastToVoidPtr(LB.getPointer())); 8172 CombinedInfo.Sizes.push_back( 8173 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8174 CombinedInfo.Types.push_back(Flags); 8175 CombinedInfo.Mappers.push_back(nullptr); 8176 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8177 : 1); 8178 break; 8179 } 8180 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8181 if (!IsMemberPointerOrAddr || 8182 (Next == CE && MapType != OMPC_MAP_unknown)) { 8183 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8184 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8185 CombinedInfo.Pointers.push_back(LB.getPointer()); 8186 CombinedInfo.Sizes.push_back( 8187 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8188 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8189 : 1); 8190 8191 // If Mapper is valid, the last component inherits the mapper. 8192 bool HasMapper = Mapper && Next == CE; 8193 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8194 8195 // We need to add a pointer flag for each map that comes from the 8196 // same expression except for the first one. We also need to signal 8197 // this map is the first one that relates with the current capture 8198 // (there is a set of entries for each capture). 8199 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8200 MapType, MapModifiers, MotionModifiers, IsImplicit, 8201 !IsExpressionFirstInfo || RequiresReference || 8202 FirstPointerInComplexData || IsMemberReference, 8203 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8204 8205 if (!IsExpressionFirstInfo || IsMemberReference) { 8206 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8207 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8208 if (IsPointer || (IsMemberReference && Next != CE)) 8209 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8210 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8211 8212 if (ShouldBeMemberOf) { 8213 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8214 // should be later updated with the correct value of MEMBER_OF. 8215 Flags |= OMP_MAP_MEMBER_OF; 8216 // From now on, all subsequent PTR_AND_OBJ entries should not be 8217 // marked as MEMBER_OF. 8218 ShouldBeMemberOf = false; 8219 } 8220 } 8221 8222 CombinedInfo.Types.push_back(Flags); 8223 } 8224 8225 // If we have encountered a member expression so far, keep track of the 8226 // mapped member. If the parent is "*this", then the value declaration 8227 // is nullptr. 8228 if (EncounteredME) { 8229 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8230 unsigned FieldIndex = FD->getFieldIndex(); 8231 8232 // Update info about the lowest and highest elements for this struct 8233 if (!PartialStruct.Base.isValid()) { 8234 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8235 if (IsFinalArraySection) { 8236 Address HB = 8237 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8238 .getAddress(CGF); 8239 PartialStruct.HighestElem = {FieldIndex, HB}; 8240 } else { 8241 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8242 } 8243 PartialStruct.Base = BP; 8244 PartialStruct.LB = BP; 8245 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8246 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8247 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8248 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8249 } 8250 } 8251 8252 // Need to emit combined struct for array sections. 8253 if (IsFinalArraySection || IsNonContiguous) 8254 PartialStruct.IsArraySection = true; 8255 8256 // If we have a final array section, we are done with this expression. 8257 if (IsFinalArraySection) 8258 break; 8259 8260 // The pointer becomes the base for the next element. 8261 if (Next != CE) 8262 BP = IsMemberReference ? LowestElem : LB; 8263 8264 IsExpressionFirstInfo = false; 8265 IsCaptureFirstInfo = false; 8266 FirstPointerInComplexData = false; 8267 IsPrevMemberReference = IsMemberReference; 8268 } else if (FirstPointerInComplexData) { 8269 QualType Ty = Components.rbegin() 8270 ->getAssociatedDeclaration() 8271 ->getType() 8272 .getNonReferenceType(); 8273 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8274 FirstPointerInComplexData = false; 8275 } 8276 } 8277 // If ran into the whole component - allocate the space for the whole 8278 // record. 8279 if (!EncounteredME) 8280 PartialStruct.HasCompleteRecord = true; 8281 8282 if (!IsNonContiguous) 8283 return; 8284 8285 const ASTContext &Context = CGF.getContext(); 8286 8287 // For supporting stride in array section, we need to initialize the first 8288 // dimension size as 1, first offset as 0, and first count as 1 8289 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8290 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8291 MapValuesArrayTy CurStrides; 8292 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8293 uint64_t ElementTypeSize; 8294 8295 // Collect Size information for each dimension and get the element size as 8296 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8297 // should be [10, 10] and the first stride is 4 btyes. 8298 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8299 Components) { 8300 const Expr *AssocExpr = Component.getAssociatedExpression(); 8301 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8302 8303 if (!OASE) 8304 continue; 8305 8306 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8307 auto *CAT = Context.getAsConstantArrayType(Ty); 8308 auto *VAT = Context.getAsVariableArrayType(Ty); 8309 8310 // We need all the dimension size except for the last dimension. 8311 assert((VAT || CAT || &Component == &*Components.begin()) && 8312 "Should be either ConstantArray or VariableArray if not the " 8313 "first Component"); 8314 8315 // Get element size if CurStrides is empty. 8316 if (CurStrides.empty()) { 8317 const Type *ElementType = nullptr; 8318 if (CAT) 8319 ElementType = CAT->getElementType().getTypePtr(); 8320 else if (VAT) 8321 ElementType = VAT->getElementType().getTypePtr(); 8322 else 8323 assert(&Component == &*Components.begin() && 8324 "Only expect pointer (non CAT or VAT) when this is the " 8325 "first Component"); 8326 // If ElementType is null, then it means the base is a pointer 8327 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8328 // for next iteration. 8329 if (ElementType) { 8330 // For the case that having pointer as base, we need to remove one 8331 // level of indirection. 8332 if (&Component != &*Components.begin()) 8333 ElementType = ElementType->getPointeeOrArrayElementType(); 8334 ElementTypeSize = 8335 Context.getTypeSizeInChars(ElementType).getQuantity(); 8336 CurStrides.push_back( 8337 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8338 } 8339 } 8340 // Get dimension value except for the last dimension since we don't need 8341 // it. 8342 if (DimSizes.size() < Components.size() - 1) { 8343 if (CAT) 8344 DimSizes.push_back(llvm::ConstantInt::get( 8345 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8346 else if (VAT) 8347 DimSizes.push_back(CGF.Builder.CreateIntCast( 8348 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8349 /*IsSigned=*/false)); 8350 } 8351 } 8352 8353 // Skip the dummy dimension since we have already have its information. 8354 auto DI = DimSizes.begin() + 1; 8355 // Product of dimension. 8356 llvm::Value *DimProd = 8357 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8358 8359 // Collect info for non-contiguous. Notice that offset, count, and stride 8360 // are only meaningful for array-section, so we insert a null for anything 8361 // other than array-section. 8362 // Also, the size of offset, count, and stride are not the same as 8363 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8364 // count, and stride are the same as the number of non-contiguous 8365 // declaration in target update to/from clause. 8366 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8367 Components) { 8368 const Expr *AssocExpr = Component.getAssociatedExpression(); 8369 8370 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8371 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8372 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8373 /*isSigned=*/false); 8374 CurOffsets.push_back(Offset); 8375 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8376 CurStrides.push_back(CurStrides.back()); 8377 continue; 8378 } 8379 8380 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8381 8382 if (!OASE) 8383 continue; 8384 8385 // Offset 8386 const Expr *OffsetExpr = OASE->getLowerBound(); 8387 llvm::Value *Offset = nullptr; 8388 if (!OffsetExpr) { 8389 // If offset is absent, then we just set it to zero. 8390 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8391 } else { 8392 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8393 CGF.Int64Ty, 8394 /*isSigned=*/false); 8395 } 8396 CurOffsets.push_back(Offset); 8397 8398 // Count 8399 const Expr *CountExpr = OASE->getLength(); 8400 llvm::Value *Count = nullptr; 8401 if (!CountExpr) { 8402 // In Clang, once a high dimension is an array section, we construct all 8403 // the lower dimension as array section, however, for case like 8404 // arr[0:2][2], Clang construct the inner dimension as an array section 8405 // but it actually is not in an array section form according to spec. 8406 if (!OASE->getColonLocFirst().isValid() && 8407 !OASE->getColonLocSecond().isValid()) { 8408 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8409 } else { 8410 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8411 // When the length is absent it defaults to ⌈(size − 8412 // lower-bound)/stride⌉, where size is the size of the array 8413 // dimension. 8414 const Expr *StrideExpr = OASE->getStride(); 8415 llvm::Value *Stride = 8416 StrideExpr 8417 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8418 CGF.Int64Ty, /*isSigned=*/false) 8419 : nullptr; 8420 if (Stride) 8421 Count = CGF.Builder.CreateUDiv( 8422 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8423 else 8424 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8425 } 8426 } else { 8427 Count = CGF.EmitScalarExpr(CountExpr); 8428 } 8429 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8430 CurCounts.push_back(Count); 8431 8432 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8433 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8434 // Offset Count Stride 8435 // D0 0 1 4 (int) <- dummy dimension 8436 // D1 0 2 8 (2 * (1) * 4) 8437 // D2 1 2 20 (1 * (1 * 5) * 4) 8438 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8439 const Expr *StrideExpr = OASE->getStride(); 8440 llvm::Value *Stride = 8441 StrideExpr 8442 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8443 CGF.Int64Ty, /*isSigned=*/false) 8444 : nullptr; 8445 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8446 if (Stride) 8447 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8448 else 8449 CurStrides.push_back(DimProd); 8450 if (DI != DimSizes.end()) 8451 ++DI; 8452 } 8453 8454 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8455 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8456 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8457 } 8458 8459 /// Return the adjusted map modifiers if the declaration a capture refers to 8460 /// appears in a first-private clause. This is expected to be used only with 8461 /// directives that start with 'target'. 8462 MappableExprsHandler::OpenMPOffloadMappingFlags 8463 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8464 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8465 8466 // A first private variable captured by reference will use only the 8467 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8468 // declaration is known as first-private in this handler. 8469 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8470 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8471 return MappableExprsHandler::OMP_MAP_TO | 8472 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8473 return MappableExprsHandler::OMP_MAP_PRIVATE | 8474 MappableExprsHandler::OMP_MAP_TO; 8475 } 8476 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8477 if (I != LambdasMap.end()) 8478 // for map(to: lambda): using user specified map type. 8479 return getMapTypeBits( 8480 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8481 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8482 /*AddPtrFlag=*/false, 8483 /*AddIsTargetParamFlag=*/false, 8484 /*isNonContiguous=*/false); 8485 return MappableExprsHandler::OMP_MAP_TO | 8486 MappableExprsHandler::OMP_MAP_FROM; 8487 } 8488 8489 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8490 // Rotate by getFlagMemberOffset() bits. 8491 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8492 << getFlagMemberOffset()); 8493 } 8494 8495 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8496 OpenMPOffloadMappingFlags MemberOfFlag) { 8497 // If the entry is PTR_AND_OBJ but has not been marked with the special 8498 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8499 // marked as MEMBER_OF. 8500 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8501 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8502 return; 8503 8504 // Reset the placeholder value to prepare the flag for the assignment of the 8505 // proper MEMBER_OF value. 8506 Flags &= ~OMP_MAP_MEMBER_OF; 8507 Flags |= MemberOfFlag; 8508 } 8509 8510 void getPlainLayout(const CXXRecordDecl *RD, 8511 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8512 bool AsBase) const { 8513 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8514 8515 llvm::StructType *St = 8516 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8517 8518 unsigned NumElements = St->getNumElements(); 8519 llvm::SmallVector< 8520 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8521 RecordLayout(NumElements); 8522 8523 // Fill bases. 8524 for (const auto &I : RD->bases()) { 8525 if (I.isVirtual()) 8526 continue; 8527 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8528 // Ignore empty bases. 8529 if (Base->isEmpty() || CGF.getContext() 8530 .getASTRecordLayout(Base) 8531 .getNonVirtualSize() 8532 .isZero()) 8533 continue; 8534 8535 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8536 RecordLayout[FieldIndex] = Base; 8537 } 8538 // Fill in virtual bases. 8539 for (const auto &I : RD->vbases()) { 8540 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8541 // Ignore empty bases. 8542 if (Base->isEmpty()) 8543 continue; 8544 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8545 if (RecordLayout[FieldIndex]) 8546 continue; 8547 RecordLayout[FieldIndex] = Base; 8548 } 8549 // Fill in all the fields. 8550 assert(!RD->isUnion() && "Unexpected union."); 8551 for (const auto *Field : RD->fields()) { 8552 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8553 // will fill in later.) 8554 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8555 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8556 RecordLayout[FieldIndex] = Field; 8557 } 8558 } 8559 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8560 &Data : RecordLayout) { 8561 if (Data.isNull()) 8562 continue; 8563 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8564 getPlainLayout(Base, Layout, /*AsBase=*/true); 8565 else 8566 Layout.push_back(Data.get<const FieldDecl *>()); 8567 } 8568 } 8569 8570 /// Generate all the base pointers, section pointers, sizes, map types, and 8571 /// mappers for the extracted mappable expressions (all included in \a 8572 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8573 /// pair of the relevant declaration and index where it occurs is appended to 8574 /// the device pointers info array. 8575 void generateAllInfoForClauses( 8576 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8577 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8578 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8579 // We have to process the component lists that relate with the same 8580 // declaration in a single chunk so that we can generate the map flags 8581 // correctly. Therefore, we organize all lists in a map. 8582 enum MapKind { Present, Allocs, Other, Total }; 8583 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8584 SmallVector<SmallVector<MapInfo, 8>, 4>> 8585 Info; 8586 8587 // Helper function to fill the information map for the different supported 8588 // clauses. 8589 auto &&InfoGen = 8590 [&Info, &SkipVarSet]( 8591 const ValueDecl *D, MapKind Kind, 8592 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8593 OpenMPMapClauseKind MapType, 8594 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8595 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8596 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8597 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8598 if (SkipVarSet.contains(D)) 8599 return; 8600 auto It = Info.find(D); 8601 if (It == Info.end()) 8602 It = Info 8603 .insert(std::make_pair( 8604 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8605 .first; 8606 It->second[Kind].emplace_back( 8607 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8608 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8609 }; 8610 8611 for (const auto *Cl : Clauses) { 8612 const auto *C = dyn_cast<OMPMapClause>(Cl); 8613 if (!C) 8614 continue; 8615 MapKind Kind = Other; 8616 if (llvm::is_contained(C->getMapTypeModifiers(), 8617 OMPC_MAP_MODIFIER_present)) 8618 Kind = Present; 8619 else if (C->getMapType() == OMPC_MAP_alloc) 8620 Kind = Allocs; 8621 const auto *EI = C->getVarRefs().begin(); 8622 for (const auto L : C->component_lists()) { 8623 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8624 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8625 C->getMapTypeModifiers(), llvm::None, 8626 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8627 E); 8628 ++EI; 8629 } 8630 } 8631 for (const auto *Cl : Clauses) { 8632 const auto *C = dyn_cast<OMPToClause>(Cl); 8633 if (!C) 8634 continue; 8635 MapKind Kind = Other; 8636 if (llvm::is_contained(C->getMotionModifiers(), 8637 OMPC_MOTION_MODIFIER_present)) 8638 Kind = Present; 8639 const auto *EI = C->getVarRefs().begin(); 8640 for (const auto L : C->component_lists()) { 8641 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8642 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8643 C->isImplicit(), std::get<2>(L), *EI); 8644 ++EI; 8645 } 8646 } 8647 for (const auto *Cl : Clauses) { 8648 const auto *C = dyn_cast<OMPFromClause>(Cl); 8649 if (!C) 8650 continue; 8651 MapKind Kind = Other; 8652 if (llvm::is_contained(C->getMotionModifiers(), 8653 OMPC_MOTION_MODIFIER_present)) 8654 Kind = Present; 8655 const auto *EI = C->getVarRefs().begin(); 8656 for (const auto L : C->component_lists()) { 8657 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8658 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8659 C->isImplicit(), std::get<2>(L), *EI); 8660 ++EI; 8661 } 8662 } 8663 8664 // Look at the use_device_ptr clause information and mark the existing map 8665 // entries as such. If there is no map information for an entry in the 8666 // use_device_ptr list, we create one with map type 'alloc' and zero size 8667 // section. It is the user fault if that was not mapped before. If there is 8668 // no map information and the pointer is a struct member, then we defer the 8669 // emission of that entry until the whole struct has been processed. 8670 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8671 SmallVector<DeferredDevicePtrEntryTy, 4>> 8672 DeferredInfo; 8673 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8674 8675 for (const auto *Cl : Clauses) { 8676 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8677 if (!C) 8678 continue; 8679 for (const auto L : C->component_lists()) { 8680 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8681 std::get<1>(L); 8682 assert(!Components.empty() && 8683 "Not expecting empty list of components!"); 8684 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8685 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8686 const Expr *IE = Components.back().getAssociatedExpression(); 8687 // If the first component is a member expression, we have to look into 8688 // 'this', which maps to null in the map of map information. Otherwise 8689 // look directly for the information. 8690 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8691 8692 // We potentially have map information for this declaration already. 8693 // Look for the first set of components that refer to it. 8694 if (It != Info.end()) { 8695 bool Found = false; 8696 for (auto &Data : It->second) { 8697 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8698 return MI.Components.back().getAssociatedDeclaration() == VD; 8699 }); 8700 // If we found a map entry, signal that the pointer has to be 8701 // returned and move on to the next declaration. Exclude cases where 8702 // the base pointer is mapped as array subscript, array section or 8703 // array shaping. The base address is passed as a pointer to base in 8704 // this case and cannot be used as a base for use_device_ptr list 8705 // item. 8706 if (CI != Data.end()) { 8707 auto PrevCI = std::next(CI->Components.rbegin()); 8708 const auto *VarD = dyn_cast<VarDecl>(VD); 8709 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8710 isa<MemberExpr>(IE) || 8711 !VD->getType().getNonReferenceType()->isPointerType() || 8712 PrevCI == CI->Components.rend() || 8713 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8714 VarD->hasLocalStorage()) { 8715 CI->ReturnDevicePointer = true; 8716 Found = true; 8717 break; 8718 } 8719 } 8720 } 8721 if (Found) 8722 continue; 8723 } 8724 8725 // We didn't find any match in our map information - generate a zero 8726 // size array section - if the pointer is a struct member we defer this 8727 // action until the whole struct has been processed. 8728 if (isa<MemberExpr>(IE)) { 8729 // Insert the pointer into Info to be processed by 8730 // generateInfoForComponentList. Because it is a member pointer 8731 // without a pointee, no entry will be generated for it, therefore 8732 // we need to generate one after the whole struct has been processed. 8733 // Nonetheless, generateInfoForComponentList must be called to take 8734 // the pointer into account for the calculation of the range of the 8735 // partial struct. 8736 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8737 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8738 nullptr); 8739 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8740 } else { 8741 llvm::Value *Ptr = 8742 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8743 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8744 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8745 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8746 UseDevicePtrCombinedInfo.Sizes.push_back( 8747 llvm::Constant::getNullValue(CGF.Int64Ty)); 8748 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8749 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8750 } 8751 } 8752 } 8753 8754 // Look at the use_device_addr clause information and mark the existing map 8755 // entries as such. If there is no map information for an entry in the 8756 // use_device_addr list, we create one with map type 'alloc' and zero size 8757 // section. It is the user fault if that was not mapped before. If there is 8758 // no map information and the pointer is a struct member, then we defer the 8759 // emission of that entry until the whole struct has been processed. 8760 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8761 for (const auto *Cl : Clauses) { 8762 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8763 if (!C) 8764 continue; 8765 for (const auto L : C->component_lists()) { 8766 assert(!std::get<1>(L).empty() && 8767 "Not expecting empty list of components!"); 8768 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8769 if (!Processed.insert(VD).second) 8770 continue; 8771 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8772 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8773 // If the first component is a member expression, we have to look into 8774 // 'this', which maps to null in the map of map information. Otherwise 8775 // look directly for the information. 8776 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8777 8778 // We potentially have map information for this declaration already. 8779 // Look for the first set of components that refer to it. 8780 if (It != Info.end()) { 8781 bool Found = false; 8782 for (auto &Data : It->second) { 8783 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8784 return MI.Components.back().getAssociatedDeclaration() == VD; 8785 }); 8786 // If we found a map entry, signal that the pointer has to be 8787 // returned and move on to the next declaration. 8788 if (CI != Data.end()) { 8789 CI->ReturnDevicePointer = true; 8790 Found = true; 8791 break; 8792 } 8793 } 8794 if (Found) 8795 continue; 8796 } 8797 8798 // We didn't find any match in our map information - generate a zero 8799 // size array section - if the pointer is a struct member we defer this 8800 // action until the whole struct has been processed. 8801 if (isa<MemberExpr>(IE)) { 8802 // Insert the pointer into Info to be processed by 8803 // generateInfoForComponentList. Because it is a member pointer 8804 // without a pointee, no entry will be generated for it, therefore 8805 // we need to generate one after the whole struct has been processed. 8806 // Nonetheless, generateInfoForComponentList must be called to take 8807 // the pointer into account for the calculation of the range of the 8808 // partial struct. 8809 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8810 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8811 nullptr, nullptr, /*ForDeviceAddr=*/true); 8812 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8813 } else { 8814 llvm::Value *Ptr; 8815 if (IE->isGLValue()) 8816 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8817 else 8818 Ptr = CGF.EmitScalarExpr(IE); 8819 CombinedInfo.Exprs.push_back(VD); 8820 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8821 CombinedInfo.Pointers.push_back(Ptr); 8822 CombinedInfo.Sizes.push_back( 8823 llvm::Constant::getNullValue(CGF.Int64Ty)); 8824 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8825 CombinedInfo.Mappers.push_back(nullptr); 8826 } 8827 } 8828 } 8829 8830 for (const auto &Data : Info) { 8831 StructRangeInfoTy PartialStruct; 8832 // Temporary generated information. 8833 MapCombinedInfoTy CurInfo; 8834 const Decl *D = Data.first; 8835 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8836 for (const auto &M : Data.second) { 8837 for (const MapInfo &L : M) { 8838 assert(!L.Components.empty() && 8839 "Not expecting declaration with no component lists."); 8840 8841 // Remember the current base pointer index. 8842 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8843 CurInfo.NonContigInfo.IsNonContiguous = 8844 L.Components.back().isNonContiguous(); 8845 generateInfoForComponentList( 8846 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8847 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8848 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8849 8850 // If this entry relates with a device pointer, set the relevant 8851 // declaration and add the 'return pointer' flag. 8852 if (L.ReturnDevicePointer) { 8853 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8854 "Unexpected number of mapped base pointers."); 8855 8856 const ValueDecl *RelevantVD = 8857 L.Components.back().getAssociatedDeclaration(); 8858 assert(RelevantVD && 8859 "No relevant declaration related with device pointer??"); 8860 8861 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8862 RelevantVD); 8863 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8864 } 8865 } 8866 } 8867 8868 // Append any pending zero-length pointers which are struct members and 8869 // used with use_device_ptr or use_device_addr. 8870 auto CI = DeferredInfo.find(Data.first); 8871 if (CI != DeferredInfo.end()) { 8872 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8873 llvm::Value *BasePtr; 8874 llvm::Value *Ptr; 8875 if (L.ForDeviceAddr) { 8876 if (L.IE->isGLValue()) 8877 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8878 else 8879 Ptr = this->CGF.EmitScalarExpr(L.IE); 8880 BasePtr = Ptr; 8881 // Entry is RETURN_PARAM. Also, set the placeholder value 8882 // MEMBER_OF=FFFF so that the entry is later updated with the 8883 // correct value of MEMBER_OF. 8884 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8885 } else { 8886 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8887 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8888 L.IE->getExprLoc()); 8889 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8890 // placeholder value MEMBER_OF=FFFF so that the entry is later 8891 // updated with the correct value of MEMBER_OF. 8892 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8893 OMP_MAP_MEMBER_OF); 8894 } 8895 CurInfo.Exprs.push_back(L.VD); 8896 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8897 CurInfo.Pointers.push_back(Ptr); 8898 CurInfo.Sizes.push_back( 8899 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8900 CurInfo.Mappers.push_back(nullptr); 8901 } 8902 } 8903 // If there is an entry in PartialStruct it means we have a struct with 8904 // individual members mapped. Emit an extra combined entry. 8905 if (PartialStruct.Base.isValid()) { 8906 CurInfo.NonContigInfo.Dims.push_back(0); 8907 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8908 } 8909 8910 // We need to append the results of this capture to what we already 8911 // have. 8912 CombinedInfo.append(CurInfo); 8913 } 8914 // Append data for use_device_ptr clauses. 8915 CombinedInfo.append(UseDevicePtrCombinedInfo); 8916 } 8917 8918 public: 8919 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8920 : CurDir(&Dir), CGF(CGF) { 8921 // Extract firstprivate clause information. 8922 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8923 for (const auto *D : C->varlists()) 8924 FirstPrivateDecls.try_emplace( 8925 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8926 // Extract implicit firstprivates from uses_allocators clauses. 8927 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8928 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8929 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8930 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8931 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8932 /*Implicit=*/true); 8933 else if (const auto *VD = dyn_cast<VarDecl>( 8934 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8935 ->getDecl())) 8936 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8937 } 8938 } 8939 // Extract device pointer clause information. 8940 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8941 for (auto L : C->component_lists()) 8942 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8943 // Extract map information. 8944 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8945 if (C->getMapType() != OMPC_MAP_to) 8946 continue; 8947 for (auto L : C->component_lists()) { 8948 const ValueDecl *VD = std::get<0>(L); 8949 const auto *RD = VD ? VD->getType() 8950 .getCanonicalType() 8951 .getNonReferenceType() 8952 ->getAsCXXRecordDecl() 8953 : nullptr; 8954 if (RD && RD->isLambda()) 8955 LambdasMap.try_emplace(std::get<0>(L), C); 8956 } 8957 } 8958 } 8959 8960 /// Constructor for the declare mapper directive. 8961 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8962 : CurDir(&Dir), CGF(CGF) {} 8963 8964 /// Generate code for the combined entry if we have a partially mapped struct 8965 /// and take care of the mapping flags of the arguments corresponding to 8966 /// individual struct members. 8967 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8968 MapFlagsArrayTy &CurTypes, 8969 const StructRangeInfoTy &PartialStruct, 8970 const ValueDecl *VD = nullptr, 8971 bool NotTargetParams = true) const { 8972 if (CurTypes.size() == 1 && 8973 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8974 !PartialStruct.IsArraySection) 8975 return; 8976 Address LBAddr = PartialStruct.LowestElem.second; 8977 Address HBAddr = PartialStruct.HighestElem.second; 8978 if (PartialStruct.HasCompleteRecord) { 8979 LBAddr = PartialStruct.LB; 8980 HBAddr = PartialStruct.LB; 8981 } 8982 CombinedInfo.Exprs.push_back(VD); 8983 // Base is the base of the struct 8984 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8985 // Pointer is the address of the lowest element 8986 llvm::Value *LB = LBAddr.getPointer(); 8987 CombinedInfo.Pointers.push_back(LB); 8988 // There should not be a mapper for a combined entry. 8989 CombinedInfo.Mappers.push_back(nullptr); 8990 // Size is (addr of {highest+1} element) - (addr of lowest element) 8991 llvm::Value *HB = HBAddr.getPointer(); 8992 llvm::Value *HAddr = 8993 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8994 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8995 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8996 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8997 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8998 /*isSigned=*/false); 8999 CombinedInfo.Sizes.push_back(Size); 9000 // Map type is always TARGET_PARAM, if generate info for captures. 9001 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 9002 : OMP_MAP_TARGET_PARAM); 9003 // If any element has the present modifier, then make sure the runtime 9004 // doesn't attempt to allocate the struct. 9005 if (CurTypes.end() != 9006 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9007 return Type & OMP_MAP_PRESENT; 9008 })) 9009 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 9010 // Remove TARGET_PARAM flag from the first element 9011 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 9012 // If any element has the ompx_hold modifier, then make sure the runtime 9013 // uses the hold reference count for the struct as a whole so that it won't 9014 // be unmapped by an extra dynamic reference count decrement. Add it to all 9015 // elements as well so the runtime knows which reference count to check 9016 // when determining whether it's time for device-to-host transfers of 9017 // individual elements. 9018 if (CurTypes.end() != 9019 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9020 return Type & OMP_MAP_OMPX_HOLD; 9021 })) { 9022 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 9023 for (auto &M : CurTypes) 9024 M |= OMP_MAP_OMPX_HOLD; 9025 } 9026 9027 // All other current entries will be MEMBER_OF the combined entry 9028 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9029 // 0xFFFF in the MEMBER_OF field). 9030 OpenMPOffloadMappingFlags MemberOfFlag = 9031 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 9032 for (auto &M : CurTypes) 9033 setCorrectMemberOfFlag(M, MemberOfFlag); 9034 } 9035 9036 /// Generate all the base pointers, section pointers, sizes, map types, and 9037 /// mappers for the extracted mappable expressions (all included in \a 9038 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9039 /// pair of the relevant declaration and index where it occurs is appended to 9040 /// the device pointers info array. 9041 void generateAllInfo( 9042 MapCombinedInfoTy &CombinedInfo, 9043 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9044 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9045 assert(CurDir.is<const OMPExecutableDirective *>() && 9046 "Expect a executable directive"); 9047 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9048 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9049 } 9050 9051 /// Generate all the base pointers, section pointers, sizes, map types, and 9052 /// mappers for the extracted map clauses of user-defined mapper (all included 9053 /// in \a CombinedInfo). 9054 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9055 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9056 "Expect a declare mapper directive"); 9057 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9058 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9059 } 9060 9061 /// Emit capture info for lambdas for variables captured by reference. 9062 void generateInfoForLambdaCaptures( 9063 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9064 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9065 const auto *RD = VD->getType() 9066 .getCanonicalType() 9067 .getNonReferenceType() 9068 ->getAsCXXRecordDecl(); 9069 if (!RD || !RD->isLambda()) 9070 return; 9071 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 9072 LValue VDLVal = CGF.MakeAddrLValue( 9073 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 9074 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9075 FieldDecl *ThisCapture = nullptr; 9076 RD->getCaptureFields(Captures, ThisCapture); 9077 if (ThisCapture) { 9078 LValue ThisLVal = 9079 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9080 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9081 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9082 VDLVal.getPointer(CGF)); 9083 CombinedInfo.Exprs.push_back(VD); 9084 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9085 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9086 CombinedInfo.Sizes.push_back( 9087 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9088 CGF.Int64Ty, /*isSigned=*/true)); 9089 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9090 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9091 CombinedInfo.Mappers.push_back(nullptr); 9092 } 9093 for (const LambdaCapture &LC : RD->captures()) { 9094 if (!LC.capturesVariable()) 9095 continue; 9096 const VarDecl *VD = LC.getCapturedVar(); 9097 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9098 continue; 9099 auto It = Captures.find(VD); 9100 assert(It != Captures.end() && "Found lambda capture without field."); 9101 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9102 if (LC.getCaptureKind() == LCK_ByRef) { 9103 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9104 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9105 VDLVal.getPointer(CGF)); 9106 CombinedInfo.Exprs.push_back(VD); 9107 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9108 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9109 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9110 CGF.getTypeSize( 9111 VD->getType().getCanonicalType().getNonReferenceType()), 9112 CGF.Int64Ty, /*isSigned=*/true)); 9113 } else { 9114 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9115 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9116 VDLVal.getPointer(CGF)); 9117 CombinedInfo.Exprs.push_back(VD); 9118 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9119 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9120 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9121 } 9122 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9123 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9124 CombinedInfo.Mappers.push_back(nullptr); 9125 } 9126 } 9127 9128 /// Set correct indices for lambdas captures. 9129 void adjustMemberOfForLambdaCaptures( 9130 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9131 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9132 MapFlagsArrayTy &Types) const { 9133 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9134 // Set correct member_of idx for all implicit lambda captures. 9135 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9136 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9137 continue; 9138 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9139 assert(BasePtr && "Unable to find base lambda address."); 9140 int TgtIdx = -1; 9141 for (unsigned J = I; J > 0; --J) { 9142 unsigned Idx = J - 1; 9143 if (Pointers[Idx] != BasePtr) 9144 continue; 9145 TgtIdx = Idx; 9146 break; 9147 } 9148 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9149 // All other current entries will be MEMBER_OF the combined entry 9150 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9151 // 0xFFFF in the MEMBER_OF field). 9152 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9153 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9154 } 9155 } 9156 9157 /// Generate the base pointers, section pointers, sizes, map types, and 9158 /// mappers associated to a given capture (all included in \a CombinedInfo). 9159 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9160 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9161 StructRangeInfoTy &PartialStruct) const { 9162 assert(!Cap->capturesVariableArrayType() && 9163 "Not expecting to generate map info for a variable array type!"); 9164 9165 // We need to know when we generating information for the first component 9166 const ValueDecl *VD = Cap->capturesThis() 9167 ? nullptr 9168 : Cap->getCapturedVar()->getCanonicalDecl(); 9169 9170 // for map(to: lambda): skip here, processing it in 9171 // generateDefaultMapInfo 9172 if (LambdasMap.count(VD)) 9173 return; 9174 9175 // If this declaration appears in a is_device_ptr clause we just have to 9176 // pass the pointer by value. If it is a reference to a declaration, we just 9177 // pass its value. 9178 if (DevPointersMap.count(VD)) { 9179 CombinedInfo.Exprs.push_back(VD); 9180 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9181 CombinedInfo.Pointers.push_back(Arg); 9182 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9183 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9184 /*isSigned=*/true)); 9185 CombinedInfo.Types.push_back( 9186 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9187 OMP_MAP_TARGET_PARAM); 9188 CombinedInfo.Mappers.push_back(nullptr); 9189 return; 9190 } 9191 9192 using MapData = 9193 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9194 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9195 const ValueDecl *, const Expr *>; 9196 SmallVector<MapData, 4> DeclComponentLists; 9197 assert(CurDir.is<const OMPExecutableDirective *>() && 9198 "Expect a executable directive"); 9199 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9200 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9201 const auto *EI = C->getVarRefs().begin(); 9202 for (const auto L : C->decl_component_lists(VD)) { 9203 const ValueDecl *VDecl, *Mapper; 9204 // The Expression is not correct if the mapping is implicit 9205 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9206 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9207 std::tie(VDecl, Components, Mapper) = L; 9208 assert(VDecl == VD && "We got information for the wrong declaration??"); 9209 assert(!Components.empty() && 9210 "Not expecting declaration with no component lists."); 9211 DeclComponentLists.emplace_back(Components, C->getMapType(), 9212 C->getMapTypeModifiers(), 9213 C->isImplicit(), Mapper, E); 9214 ++EI; 9215 } 9216 } 9217 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9218 const MapData &RHS) { 9219 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9220 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9221 bool HasPresent = 9222 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9223 bool HasAllocs = MapType == OMPC_MAP_alloc; 9224 MapModifiers = std::get<2>(RHS); 9225 MapType = std::get<1>(LHS); 9226 bool HasPresentR = 9227 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9228 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9229 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9230 }); 9231 9232 // Find overlapping elements (including the offset from the base element). 9233 llvm::SmallDenseMap< 9234 const MapData *, 9235 llvm::SmallVector< 9236 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9237 4> 9238 OverlappedData; 9239 size_t Count = 0; 9240 for (const MapData &L : DeclComponentLists) { 9241 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9242 OpenMPMapClauseKind MapType; 9243 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9244 bool IsImplicit; 9245 const ValueDecl *Mapper; 9246 const Expr *VarRef; 9247 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9248 L; 9249 ++Count; 9250 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9251 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9252 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9253 VarRef) = L1; 9254 auto CI = Components.rbegin(); 9255 auto CE = Components.rend(); 9256 auto SI = Components1.rbegin(); 9257 auto SE = Components1.rend(); 9258 for (; CI != CE && SI != SE; ++CI, ++SI) { 9259 if (CI->getAssociatedExpression()->getStmtClass() != 9260 SI->getAssociatedExpression()->getStmtClass()) 9261 break; 9262 // Are we dealing with different variables/fields? 9263 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9264 break; 9265 } 9266 // Found overlapping if, at least for one component, reached the head 9267 // of the components list. 9268 if (CI == CE || SI == SE) { 9269 // Ignore it if it is the same component. 9270 if (CI == CE && SI == SE) 9271 continue; 9272 const auto It = (SI == SE) ? CI : SI; 9273 // If one component is a pointer and another one is a kind of 9274 // dereference of this pointer (array subscript, section, dereference, 9275 // etc.), it is not an overlapping. 9276 // Same, if one component is a base and another component is a 9277 // dereferenced pointer memberexpr with the same base. 9278 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9279 (std::prev(It)->getAssociatedDeclaration() && 9280 std::prev(It) 9281 ->getAssociatedDeclaration() 9282 ->getType() 9283 ->isPointerType()) || 9284 (It->getAssociatedDeclaration() && 9285 It->getAssociatedDeclaration()->getType()->isPointerType() && 9286 std::next(It) != CE && std::next(It) != SE)) 9287 continue; 9288 const MapData &BaseData = CI == CE ? L : L1; 9289 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9290 SI == SE ? Components : Components1; 9291 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9292 OverlappedElements.getSecond().push_back(SubData); 9293 } 9294 } 9295 } 9296 // Sort the overlapped elements for each item. 9297 llvm::SmallVector<const FieldDecl *, 4> Layout; 9298 if (!OverlappedData.empty()) { 9299 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9300 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9301 while (BaseType != OrigType) { 9302 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9303 OrigType = BaseType->getPointeeOrArrayElementType(); 9304 } 9305 9306 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9307 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9308 else { 9309 const auto *RD = BaseType->getAsRecordDecl(); 9310 Layout.append(RD->field_begin(), RD->field_end()); 9311 } 9312 } 9313 for (auto &Pair : OverlappedData) { 9314 llvm::stable_sort( 9315 Pair.getSecond(), 9316 [&Layout]( 9317 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9318 OMPClauseMappableExprCommon::MappableExprComponentListRef 9319 Second) { 9320 auto CI = First.rbegin(); 9321 auto CE = First.rend(); 9322 auto SI = Second.rbegin(); 9323 auto SE = Second.rend(); 9324 for (; CI != CE && SI != SE; ++CI, ++SI) { 9325 if (CI->getAssociatedExpression()->getStmtClass() != 9326 SI->getAssociatedExpression()->getStmtClass()) 9327 break; 9328 // Are we dealing with different variables/fields? 9329 if (CI->getAssociatedDeclaration() != 9330 SI->getAssociatedDeclaration()) 9331 break; 9332 } 9333 9334 // Lists contain the same elements. 9335 if (CI == CE && SI == SE) 9336 return false; 9337 9338 // List with less elements is less than list with more elements. 9339 if (CI == CE || SI == SE) 9340 return CI == CE; 9341 9342 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9343 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9344 if (FD1->getParent() == FD2->getParent()) 9345 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9346 const auto *It = 9347 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9348 return FD == FD1 || FD == FD2; 9349 }); 9350 return *It == FD1; 9351 }); 9352 } 9353 9354 // Associated with a capture, because the mapping flags depend on it. 9355 // Go through all of the elements with the overlapped elements. 9356 bool IsFirstComponentList = true; 9357 for (const auto &Pair : OverlappedData) { 9358 const MapData &L = *Pair.getFirst(); 9359 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9360 OpenMPMapClauseKind MapType; 9361 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9362 bool IsImplicit; 9363 const ValueDecl *Mapper; 9364 const Expr *VarRef; 9365 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9366 L; 9367 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9368 OverlappedComponents = Pair.getSecond(); 9369 generateInfoForComponentList( 9370 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9371 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9372 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9373 IsFirstComponentList = false; 9374 } 9375 // Go through other elements without overlapped elements. 9376 for (const MapData &L : DeclComponentLists) { 9377 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9378 OpenMPMapClauseKind MapType; 9379 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9380 bool IsImplicit; 9381 const ValueDecl *Mapper; 9382 const Expr *VarRef; 9383 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9384 L; 9385 auto It = OverlappedData.find(&L); 9386 if (It == OverlappedData.end()) 9387 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9388 Components, CombinedInfo, PartialStruct, 9389 IsFirstComponentList, IsImplicit, Mapper, 9390 /*ForDeviceAddr=*/false, VD, VarRef); 9391 IsFirstComponentList = false; 9392 } 9393 } 9394 9395 /// Generate the default map information for a given capture \a CI, 9396 /// record field declaration \a RI and captured value \a CV. 9397 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9398 const FieldDecl &RI, llvm::Value *CV, 9399 MapCombinedInfoTy &CombinedInfo) const { 9400 bool IsImplicit = true; 9401 // Do the default mapping. 9402 if (CI.capturesThis()) { 9403 CombinedInfo.Exprs.push_back(nullptr); 9404 CombinedInfo.BasePointers.push_back(CV); 9405 CombinedInfo.Pointers.push_back(CV); 9406 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9407 CombinedInfo.Sizes.push_back( 9408 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9409 CGF.Int64Ty, /*isSigned=*/true)); 9410 // Default map type. 9411 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9412 } else if (CI.capturesVariableByCopy()) { 9413 const VarDecl *VD = CI.getCapturedVar(); 9414 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9415 CombinedInfo.BasePointers.push_back(CV); 9416 CombinedInfo.Pointers.push_back(CV); 9417 if (!RI.getType()->isAnyPointerType()) { 9418 // We have to signal to the runtime captures passed by value that are 9419 // not pointers. 9420 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9421 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9422 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9423 } else { 9424 // Pointers are implicitly mapped with a zero size and no flags 9425 // (other than first map that is added for all implicit maps). 9426 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9427 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9428 } 9429 auto I = FirstPrivateDecls.find(VD); 9430 if (I != FirstPrivateDecls.end()) 9431 IsImplicit = I->getSecond(); 9432 } else { 9433 assert(CI.capturesVariable() && "Expected captured reference."); 9434 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9435 QualType ElementType = PtrTy->getPointeeType(); 9436 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9437 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9438 // The default map type for a scalar/complex type is 'to' because by 9439 // default the value doesn't have to be retrieved. For an aggregate 9440 // type, the default is 'tofrom'. 9441 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9442 const VarDecl *VD = CI.getCapturedVar(); 9443 auto I = FirstPrivateDecls.find(VD); 9444 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9445 CombinedInfo.BasePointers.push_back(CV); 9446 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9447 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9448 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9449 AlignmentSource::Decl)); 9450 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9451 } else { 9452 CombinedInfo.Pointers.push_back(CV); 9453 } 9454 if (I != FirstPrivateDecls.end()) 9455 IsImplicit = I->getSecond(); 9456 } 9457 // Every default map produces a single argument which is a target parameter. 9458 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9459 9460 // Add flag stating this is an implicit map. 9461 if (IsImplicit) 9462 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9463 9464 // No user-defined mapper for default mapping. 9465 CombinedInfo.Mappers.push_back(nullptr); 9466 } 9467 }; 9468 } // anonymous namespace 9469 9470 static void emitNonContiguousDescriptor( 9471 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9472 CGOpenMPRuntime::TargetDataInfo &Info) { 9473 CodeGenModule &CGM = CGF.CGM; 9474 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9475 &NonContigInfo = CombinedInfo.NonContigInfo; 9476 9477 // Build an array of struct descriptor_dim and then assign it to 9478 // offload_args. 9479 // 9480 // struct descriptor_dim { 9481 // uint64_t offset; 9482 // uint64_t count; 9483 // uint64_t stride 9484 // }; 9485 ASTContext &C = CGF.getContext(); 9486 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9487 RecordDecl *RD; 9488 RD = C.buildImplicitRecord("descriptor_dim"); 9489 RD->startDefinition(); 9490 addFieldToRecordDecl(C, RD, Int64Ty); 9491 addFieldToRecordDecl(C, RD, Int64Ty); 9492 addFieldToRecordDecl(C, RD, Int64Ty); 9493 RD->completeDefinition(); 9494 QualType DimTy = C.getRecordType(RD); 9495 9496 enum { OffsetFD = 0, CountFD, StrideFD }; 9497 // We need two index variable here since the size of "Dims" is the same as the 9498 // size of Components, however, the size of offset, count, and stride is equal 9499 // to the size of base declaration that is non-contiguous. 9500 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9501 // Skip emitting ir if dimension size is 1 since it cannot be 9502 // non-contiguous. 9503 if (NonContigInfo.Dims[I] == 1) 9504 continue; 9505 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9506 QualType ArrayTy = 9507 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9508 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9509 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9510 unsigned RevIdx = EE - II - 1; 9511 LValue DimsLVal = CGF.MakeAddrLValue( 9512 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9513 // Offset 9514 LValue OffsetLVal = CGF.EmitLValueForField( 9515 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9516 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9517 // Count 9518 LValue CountLVal = CGF.EmitLValueForField( 9519 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9520 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9521 // Stride 9522 LValue StrideLVal = CGF.EmitLValueForField( 9523 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9524 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9525 } 9526 // args[I] = &dims 9527 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9528 DimsAddr, CGM.Int8PtrTy); 9529 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9530 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9531 Info.PointersArray, 0, I); 9532 Address PAddr(P, CGF.getPointerAlign()); 9533 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9534 ++L; 9535 } 9536 } 9537 9538 // Try to extract the base declaration from a `this->x` expression if possible. 9539 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9540 if (!E) 9541 return nullptr; 9542 9543 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9544 if (const MemberExpr *ME = 9545 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9546 return ME->getMemberDecl(); 9547 return nullptr; 9548 } 9549 9550 /// Emit a string constant containing the names of the values mapped to the 9551 /// offloading runtime library. 9552 llvm::Constant * 9553 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9554 MappableExprsHandler::MappingExprInfo &MapExprs) { 9555 9556 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9557 return OMPBuilder.getOrCreateDefaultSrcLocStr(); 9558 9559 SourceLocation Loc; 9560 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9561 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9562 Loc = VD->getLocation(); 9563 else 9564 Loc = MapExprs.getMapExpr()->getExprLoc(); 9565 } else { 9566 Loc = MapExprs.getMapDecl()->getLocation(); 9567 } 9568 9569 std::string ExprName = ""; 9570 if (MapExprs.getMapExpr()) { 9571 PrintingPolicy P(CGF.getContext().getLangOpts()); 9572 llvm::raw_string_ostream OS(ExprName); 9573 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9574 OS.flush(); 9575 } else { 9576 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9577 } 9578 9579 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9580 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(), 9581 PLoc.getLine(), PLoc.getColumn()); 9582 } 9583 9584 /// Emit the arrays used to pass the captures and map information to the 9585 /// offloading runtime library. If there is no map or capture information, 9586 /// return nullptr by reference. 9587 static void emitOffloadingArrays( 9588 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9589 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9590 bool IsNonContiguous = false) { 9591 CodeGenModule &CGM = CGF.CGM; 9592 ASTContext &Ctx = CGF.getContext(); 9593 9594 // Reset the array information. 9595 Info.clearArrayInfo(); 9596 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9597 9598 if (Info.NumberOfPtrs) { 9599 // Detect if we have any capture size requiring runtime evaluation of the 9600 // size so that a constant array could be eventually used. 9601 bool hasRuntimeEvaluationCaptureSize = false; 9602 for (llvm::Value *S : CombinedInfo.Sizes) 9603 if (!isa<llvm::Constant>(S)) { 9604 hasRuntimeEvaluationCaptureSize = true; 9605 break; 9606 } 9607 9608 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9609 QualType PointerArrayType = Ctx.getConstantArrayType( 9610 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9611 /*IndexTypeQuals=*/0); 9612 9613 Info.BasePointersArray = 9614 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9615 Info.PointersArray = 9616 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9617 Address MappersArray = 9618 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9619 Info.MappersArray = MappersArray.getPointer(); 9620 9621 // If we don't have any VLA types or other types that require runtime 9622 // evaluation, we can use a constant array for the map sizes, otherwise we 9623 // need to fill up the arrays as we do for the pointers. 9624 QualType Int64Ty = 9625 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9626 if (hasRuntimeEvaluationCaptureSize) { 9627 QualType SizeArrayType = Ctx.getConstantArrayType( 9628 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9629 /*IndexTypeQuals=*/0); 9630 Info.SizesArray = 9631 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9632 } else { 9633 // We expect all the sizes to be constant, so we collect them to create 9634 // a constant array. 9635 SmallVector<llvm::Constant *, 16> ConstSizes; 9636 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9637 if (IsNonContiguous && 9638 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9639 ConstSizes.push_back(llvm::ConstantInt::get( 9640 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9641 } else { 9642 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9643 } 9644 } 9645 9646 auto *SizesArrayInit = llvm::ConstantArray::get( 9647 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9648 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9649 auto *SizesArrayGbl = new llvm::GlobalVariable( 9650 CGM.getModule(), SizesArrayInit->getType(), 9651 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9652 SizesArrayInit, Name); 9653 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9654 Info.SizesArray = SizesArrayGbl; 9655 } 9656 9657 // The map types are always constant so we don't need to generate code to 9658 // fill arrays. Instead, we create an array constant. 9659 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9660 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9661 std::string MaptypesName = 9662 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9663 auto *MapTypesArrayGbl = 9664 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9665 Info.MapTypesArray = MapTypesArrayGbl; 9666 9667 // The information types are only built if there is debug information 9668 // requested. 9669 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9670 Info.MapNamesArray = llvm::Constant::getNullValue( 9671 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9672 } else { 9673 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9674 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9675 }; 9676 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9677 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9678 std::string MapnamesName = 9679 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9680 auto *MapNamesArrayGbl = 9681 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9682 Info.MapNamesArray = MapNamesArrayGbl; 9683 } 9684 9685 // If there's a present map type modifier, it must not be applied to the end 9686 // of a region, so generate a separate map type array in that case. 9687 if (Info.separateBeginEndCalls()) { 9688 bool EndMapTypesDiffer = false; 9689 for (uint64_t &Type : Mapping) { 9690 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9691 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9692 EndMapTypesDiffer = true; 9693 } 9694 } 9695 if (EndMapTypesDiffer) { 9696 MapTypesArrayGbl = 9697 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9698 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9699 } 9700 } 9701 9702 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9703 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9704 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9705 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9706 Info.BasePointersArray, 0, I); 9707 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9708 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9709 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9710 CGF.Builder.CreateStore(BPVal, BPAddr); 9711 9712 if (Info.requiresDevicePointerInfo()) 9713 if (const ValueDecl *DevVD = 9714 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9715 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9716 9717 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9718 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9719 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9720 Info.PointersArray, 0, I); 9721 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9722 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9723 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9724 CGF.Builder.CreateStore(PVal, PAddr); 9725 9726 if (hasRuntimeEvaluationCaptureSize) { 9727 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9728 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9729 Info.SizesArray, 9730 /*Idx0=*/0, 9731 /*Idx1=*/I); 9732 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9733 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9734 CGM.Int64Ty, 9735 /*isSigned=*/true), 9736 SAddr); 9737 } 9738 9739 // Fill up the mapper array. 9740 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9741 if (CombinedInfo.Mappers[I]) { 9742 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9743 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9744 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9745 Info.HasMapper = true; 9746 } 9747 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9748 CGF.Builder.CreateStore(MFunc, MAddr); 9749 } 9750 } 9751 9752 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9753 Info.NumberOfPtrs == 0) 9754 return; 9755 9756 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9757 } 9758 9759 namespace { 9760 /// Additional arguments for emitOffloadingArraysArgument function. 9761 struct ArgumentsOptions { 9762 bool ForEndCall = false; 9763 ArgumentsOptions() = default; 9764 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9765 }; 9766 } // namespace 9767 9768 /// Emit the arguments to be passed to the runtime library based on the 9769 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9770 /// ForEndCall, emit map types to be passed for the end of the region instead of 9771 /// the beginning. 9772 static void emitOffloadingArraysArgument( 9773 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9774 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9775 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9776 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9777 const ArgumentsOptions &Options = ArgumentsOptions()) { 9778 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9779 "expected region end call to runtime only when end call is separate"); 9780 CodeGenModule &CGM = CGF.CGM; 9781 if (Info.NumberOfPtrs) { 9782 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9783 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9784 Info.BasePointersArray, 9785 /*Idx0=*/0, /*Idx1=*/0); 9786 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9787 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9788 Info.PointersArray, 9789 /*Idx0=*/0, 9790 /*Idx1=*/0); 9791 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9792 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9793 /*Idx0=*/0, /*Idx1=*/0); 9794 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9795 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9796 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9797 : Info.MapTypesArray, 9798 /*Idx0=*/0, 9799 /*Idx1=*/0); 9800 9801 // Only emit the mapper information arrays if debug information is 9802 // requested. 9803 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9804 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9805 else 9806 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9807 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9808 Info.MapNamesArray, 9809 /*Idx0=*/0, 9810 /*Idx1=*/0); 9811 // If there is no user-defined mapper, set the mapper array to nullptr to 9812 // avoid an unnecessary data privatization 9813 if (!Info.HasMapper) 9814 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9815 else 9816 MappersArrayArg = 9817 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9818 } else { 9819 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9820 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9821 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9822 MapTypesArrayArg = 9823 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9824 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9825 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9826 } 9827 } 9828 9829 /// Check for inner distribute directive. 9830 static const OMPExecutableDirective * 9831 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9832 const auto *CS = D.getInnermostCapturedStmt(); 9833 const auto *Body = 9834 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9835 const Stmt *ChildStmt = 9836 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9837 9838 if (const auto *NestedDir = 9839 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9840 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9841 switch (D.getDirectiveKind()) { 9842 case OMPD_target: 9843 if (isOpenMPDistributeDirective(DKind)) 9844 return NestedDir; 9845 if (DKind == OMPD_teams) { 9846 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9847 /*IgnoreCaptured=*/true); 9848 if (!Body) 9849 return nullptr; 9850 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9851 if (const auto *NND = 9852 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9853 DKind = NND->getDirectiveKind(); 9854 if (isOpenMPDistributeDirective(DKind)) 9855 return NND; 9856 } 9857 } 9858 return nullptr; 9859 case OMPD_target_teams: 9860 if (isOpenMPDistributeDirective(DKind)) 9861 return NestedDir; 9862 return nullptr; 9863 case OMPD_target_parallel: 9864 case OMPD_target_simd: 9865 case OMPD_target_parallel_for: 9866 case OMPD_target_parallel_for_simd: 9867 return nullptr; 9868 case OMPD_target_teams_distribute: 9869 case OMPD_target_teams_distribute_simd: 9870 case OMPD_target_teams_distribute_parallel_for: 9871 case OMPD_target_teams_distribute_parallel_for_simd: 9872 case OMPD_parallel: 9873 case OMPD_for: 9874 case OMPD_parallel_for: 9875 case OMPD_parallel_master: 9876 case OMPD_parallel_sections: 9877 case OMPD_for_simd: 9878 case OMPD_parallel_for_simd: 9879 case OMPD_cancel: 9880 case OMPD_cancellation_point: 9881 case OMPD_ordered: 9882 case OMPD_threadprivate: 9883 case OMPD_allocate: 9884 case OMPD_task: 9885 case OMPD_simd: 9886 case OMPD_tile: 9887 case OMPD_unroll: 9888 case OMPD_sections: 9889 case OMPD_section: 9890 case OMPD_single: 9891 case OMPD_master: 9892 case OMPD_critical: 9893 case OMPD_taskyield: 9894 case OMPD_barrier: 9895 case OMPD_taskwait: 9896 case OMPD_taskgroup: 9897 case OMPD_atomic: 9898 case OMPD_flush: 9899 case OMPD_depobj: 9900 case OMPD_scan: 9901 case OMPD_teams: 9902 case OMPD_target_data: 9903 case OMPD_target_exit_data: 9904 case OMPD_target_enter_data: 9905 case OMPD_distribute: 9906 case OMPD_distribute_simd: 9907 case OMPD_distribute_parallel_for: 9908 case OMPD_distribute_parallel_for_simd: 9909 case OMPD_teams_distribute: 9910 case OMPD_teams_distribute_simd: 9911 case OMPD_teams_distribute_parallel_for: 9912 case OMPD_teams_distribute_parallel_for_simd: 9913 case OMPD_target_update: 9914 case OMPD_declare_simd: 9915 case OMPD_declare_variant: 9916 case OMPD_begin_declare_variant: 9917 case OMPD_end_declare_variant: 9918 case OMPD_declare_target: 9919 case OMPD_end_declare_target: 9920 case OMPD_declare_reduction: 9921 case OMPD_declare_mapper: 9922 case OMPD_taskloop: 9923 case OMPD_taskloop_simd: 9924 case OMPD_master_taskloop: 9925 case OMPD_master_taskloop_simd: 9926 case OMPD_parallel_master_taskloop: 9927 case OMPD_parallel_master_taskloop_simd: 9928 case OMPD_requires: 9929 case OMPD_metadirective: 9930 case OMPD_unknown: 9931 default: 9932 llvm_unreachable("Unexpected directive."); 9933 } 9934 } 9935 9936 return nullptr; 9937 } 9938 9939 /// Emit the user-defined mapper function. The code generation follows the 9940 /// pattern in the example below. 9941 /// \code 9942 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9943 /// void *base, void *begin, 9944 /// int64_t size, int64_t type, 9945 /// void *name = nullptr) { 9946 /// // Allocate space for an array section first or add a base/begin for 9947 /// // pointer dereference. 9948 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9949 /// !maptype.IsDelete) 9950 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9951 /// size*sizeof(Ty), clearToFromMember(type)); 9952 /// // Map members. 9953 /// for (unsigned i = 0; i < size; i++) { 9954 /// // For each component specified by this mapper: 9955 /// for (auto c : begin[i]->all_components) { 9956 /// if (c.hasMapper()) 9957 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9958 /// c.arg_type, c.arg_name); 9959 /// else 9960 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9961 /// c.arg_begin, c.arg_size, c.arg_type, 9962 /// c.arg_name); 9963 /// } 9964 /// } 9965 /// // Delete the array section. 9966 /// if (size > 1 && maptype.IsDelete) 9967 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9968 /// size*sizeof(Ty), clearToFromMember(type)); 9969 /// } 9970 /// \endcode 9971 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9972 CodeGenFunction *CGF) { 9973 if (UDMMap.count(D) > 0) 9974 return; 9975 ASTContext &C = CGM.getContext(); 9976 QualType Ty = D->getType(); 9977 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9978 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9979 auto *MapperVarDecl = 9980 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9981 SourceLocation Loc = D->getLocation(); 9982 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9983 9984 // Prepare mapper function arguments and attributes. 9985 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9986 C.VoidPtrTy, ImplicitParamDecl::Other); 9987 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9988 ImplicitParamDecl::Other); 9989 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9990 C.VoidPtrTy, ImplicitParamDecl::Other); 9991 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9992 ImplicitParamDecl::Other); 9993 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9994 ImplicitParamDecl::Other); 9995 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9996 ImplicitParamDecl::Other); 9997 FunctionArgList Args; 9998 Args.push_back(&HandleArg); 9999 Args.push_back(&BaseArg); 10000 Args.push_back(&BeginArg); 10001 Args.push_back(&SizeArg); 10002 Args.push_back(&TypeArg); 10003 Args.push_back(&NameArg); 10004 const CGFunctionInfo &FnInfo = 10005 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 10006 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 10007 SmallString<64> TyStr; 10008 llvm::raw_svector_ostream Out(TyStr); 10009 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 10010 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 10011 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 10012 Name, &CGM.getModule()); 10013 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 10014 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 10015 // Start the mapper function code generation. 10016 CodeGenFunction MapperCGF(CGM); 10017 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 10018 // Compute the starting and end addresses of array elements. 10019 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 10020 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 10021 C.getPointerType(Int64Ty), Loc); 10022 // Prepare common arguments for array initiation and deletion. 10023 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 10024 MapperCGF.GetAddrOfLocalVar(&HandleArg), 10025 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10026 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10027 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10028 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10029 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10030 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10031 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10032 // Convert the size in bytes into the number of array elements. 10033 Size = MapperCGF.Builder.CreateExactUDiv( 10034 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10035 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10036 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10037 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP( 10038 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size); 10039 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10040 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10041 C.getPointerType(Int64Ty), Loc); 10042 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10043 MapperCGF.GetAddrOfLocalVar(&NameArg), 10044 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10045 10046 // Emit array initiation if this is an array section and \p MapType indicates 10047 // that memory allocation is required. 10048 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10049 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10050 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10051 10052 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10053 10054 // Emit the loop header block. 10055 MapperCGF.EmitBlock(HeadBB); 10056 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10057 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10058 // Evaluate whether the initial condition is satisfied. 10059 llvm::Value *IsEmpty = 10060 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10061 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10062 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10063 10064 // Emit the loop body block. 10065 MapperCGF.EmitBlock(BodyBB); 10066 llvm::BasicBlock *LastBB = BodyBB; 10067 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10068 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10069 PtrPHI->addIncoming(PtrBegin, EntryBB); 10070 Address PtrCurrent = 10071 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 10072 .getAlignment() 10073 .alignmentOfArrayElement(ElementSize)); 10074 // Privatize the declared variable of mapper to be the current array element. 10075 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10076 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 10077 (void)Scope.Privatize(); 10078 10079 // Get map clause information. Fill up the arrays with all mapped variables. 10080 MappableExprsHandler::MapCombinedInfoTy Info; 10081 MappableExprsHandler MEHandler(*D, MapperCGF); 10082 MEHandler.generateAllInfoForMapper(Info); 10083 10084 // Call the runtime API __tgt_mapper_num_components to get the number of 10085 // pre-existing components. 10086 llvm::Value *OffloadingArgs[] = {Handle}; 10087 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10088 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10089 OMPRTL___tgt_mapper_num_components), 10090 OffloadingArgs); 10091 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10092 PreviousSize, 10093 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10094 10095 // Fill up the runtime mapper handle for all components. 10096 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10097 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10098 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10099 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10100 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10101 llvm::Value *CurSizeArg = Info.Sizes[I]; 10102 llvm::Value *CurNameArg = 10103 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10104 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10105 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10106 10107 // Extract the MEMBER_OF field from the map type. 10108 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10109 llvm::Value *MemberMapType = 10110 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10111 10112 // Combine the map type inherited from user-defined mapper with that 10113 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10114 // bits of the \a MapType, which is the input argument of the mapper 10115 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10116 // bits of MemberMapType. 10117 // [OpenMP 5.0], 1.2.6. map-type decay. 10118 // | alloc | to | from | tofrom | release | delete 10119 // ---------------------------------------------------------- 10120 // alloc | alloc | alloc | alloc | alloc | release | delete 10121 // to | alloc | to | alloc | to | release | delete 10122 // from | alloc | alloc | from | from | release | delete 10123 // tofrom | alloc | to | from | tofrom | release | delete 10124 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10125 MapType, 10126 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10127 MappableExprsHandler::OMP_MAP_FROM)); 10128 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10129 llvm::BasicBlock *AllocElseBB = 10130 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10131 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10132 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10133 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10134 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10135 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10136 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10137 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10138 MapperCGF.EmitBlock(AllocBB); 10139 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10140 MemberMapType, 10141 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10142 MappableExprsHandler::OMP_MAP_FROM))); 10143 MapperCGF.Builder.CreateBr(EndBB); 10144 MapperCGF.EmitBlock(AllocElseBB); 10145 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10146 LeftToFrom, 10147 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10148 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10149 // In case of to, clear OMP_MAP_FROM. 10150 MapperCGF.EmitBlock(ToBB); 10151 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10152 MemberMapType, 10153 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10154 MapperCGF.Builder.CreateBr(EndBB); 10155 MapperCGF.EmitBlock(ToElseBB); 10156 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10157 LeftToFrom, 10158 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10159 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10160 // In case of from, clear OMP_MAP_TO. 10161 MapperCGF.EmitBlock(FromBB); 10162 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10163 MemberMapType, 10164 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10165 // In case of tofrom, do nothing. 10166 MapperCGF.EmitBlock(EndBB); 10167 LastBB = EndBB; 10168 llvm::PHINode *CurMapType = 10169 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10170 CurMapType->addIncoming(AllocMapType, AllocBB); 10171 CurMapType->addIncoming(ToMapType, ToBB); 10172 CurMapType->addIncoming(FromMapType, FromBB); 10173 CurMapType->addIncoming(MemberMapType, ToElseBB); 10174 10175 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10176 CurSizeArg, CurMapType, CurNameArg}; 10177 if (Info.Mappers[I]) { 10178 // Call the corresponding mapper function. 10179 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10180 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10181 assert(MapperFunc && "Expect a valid mapper function is available."); 10182 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10183 } else { 10184 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10185 // data structure. 10186 MapperCGF.EmitRuntimeCall( 10187 OMPBuilder.getOrCreateRuntimeFunction( 10188 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10189 OffloadingArgs); 10190 } 10191 } 10192 10193 // Update the pointer to point to the next element that needs to be mapped, 10194 // and check whether we have mapped all elements. 10195 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType(); 10196 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10197 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10198 PtrPHI->addIncoming(PtrNext, LastBB); 10199 llvm::Value *IsDone = 10200 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10201 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10202 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10203 10204 MapperCGF.EmitBlock(ExitBB); 10205 // Emit array deletion if this is an array section and \p MapType indicates 10206 // that deletion is required. 10207 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10208 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10209 10210 // Emit the function exit block. 10211 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10212 MapperCGF.FinishFunction(); 10213 UDMMap.try_emplace(D, Fn); 10214 if (CGF) { 10215 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10216 Decls.second.push_back(D); 10217 } 10218 } 10219 10220 /// Emit the array initialization or deletion portion for user-defined mapper 10221 /// code generation. First, it evaluates whether an array section is mapped and 10222 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10223 /// true, and \a MapType indicates to not delete this array, array 10224 /// initialization code is generated. If \a IsInit is false, and \a MapType 10225 /// indicates to not this array, array deletion code is generated. 10226 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10227 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10228 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10229 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10230 bool IsInit) { 10231 StringRef Prefix = IsInit ? ".init" : ".del"; 10232 10233 // Evaluate if this is an array section. 10234 llvm::BasicBlock *BodyBB = 10235 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10236 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10237 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10238 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10239 MapType, 10240 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10241 llvm::Value *DeleteCond; 10242 llvm::Value *Cond; 10243 if (IsInit) { 10244 // base != begin? 10245 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 10246 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 10247 // IsPtrAndObj? 10248 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10249 MapType, 10250 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10251 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10252 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10253 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10254 DeleteCond = MapperCGF.Builder.CreateIsNull( 10255 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10256 } else { 10257 Cond = IsArray; 10258 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10259 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10260 } 10261 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10262 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10263 10264 MapperCGF.EmitBlock(BodyBB); 10265 // Get the array size by multiplying element size and element number (i.e., \p 10266 // Size). 10267 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10268 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10269 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10270 // memory allocation/deletion purpose only. 10271 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10272 MapType, 10273 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10274 MappableExprsHandler::OMP_MAP_FROM))); 10275 MapTypeArg = MapperCGF.Builder.CreateOr( 10276 MapTypeArg, 10277 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10278 10279 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10280 // data structure. 10281 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10282 ArraySize, MapTypeArg, MapName}; 10283 MapperCGF.EmitRuntimeCall( 10284 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10285 OMPRTL___tgt_push_mapper_component), 10286 OffloadingArgs); 10287 } 10288 10289 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10290 const OMPDeclareMapperDecl *D) { 10291 auto I = UDMMap.find(D); 10292 if (I != UDMMap.end()) 10293 return I->second; 10294 emitUserDefinedMapper(D); 10295 return UDMMap.lookup(D); 10296 } 10297 10298 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10299 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10300 llvm::Value *DeviceID, 10301 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10302 const OMPLoopDirective &D)> 10303 SizeEmitter) { 10304 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10305 const OMPExecutableDirective *TD = &D; 10306 // Get nested teams distribute kind directive, if any. 10307 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10308 TD = getNestedDistributeDirective(CGM.getContext(), D); 10309 if (!TD) 10310 return; 10311 const auto *LD = cast<OMPLoopDirective>(TD); 10312 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10313 PrePostActionTy &) { 10314 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10315 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10316 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10317 CGF.EmitRuntimeCall( 10318 OMPBuilder.getOrCreateRuntimeFunction( 10319 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10320 Args); 10321 } 10322 }; 10323 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10324 } 10325 10326 void CGOpenMPRuntime::emitTargetCall( 10327 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10328 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10329 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10330 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10331 const OMPLoopDirective &D)> 10332 SizeEmitter) { 10333 if (!CGF.HaveInsertPoint()) 10334 return; 10335 10336 assert(OutlinedFn && "Invalid outlined function!"); 10337 10338 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10339 D.hasClausesOfKind<OMPNowaitClause>(); 10340 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10341 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10342 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10343 PrePostActionTy &) { 10344 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10345 }; 10346 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10347 10348 CodeGenFunction::OMPTargetDataInfo InputInfo; 10349 llvm::Value *MapTypesArray = nullptr; 10350 llvm::Value *MapNamesArray = nullptr; 10351 // Fill up the pointer arrays and transfer execution to the device. 10352 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10353 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10354 &CapturedVars, 10355 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10356 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10357 // Reverse offloading is not supported, so just execute on the host. 10358 if (RequiresOuterTask) { 10359 CapturedVars.clear(); 10360 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10361 } 10362 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10363 return; 10364 } 10365 10366 // On top of the arrays that were filled up, the target offloading call 10367 // takes as arguments the device id as well as the host pointer. The host 10368 // pointer is used by the runtime library to identify the current target 10369 // region, so it only has to be unique and not necessarily point to 10370 // anything. It could be the pointer to the outlined function that 10371 // implements the target region, but we aren't using that so that the 10372 // compiler doesn't need to keep that, and could therefore inline the host 10373 // function if proven worthwhile during optimization. 10374 10375 // From this point on, we need to have an ID of the target region defined. 10376 assert(OutlinedFnID && "Invalid outlined function ID!"); 10377 10378 // Emit device ID if any. 10379 llvm::Value *DeviceID; 10380 if (Device.getPointer()) { 10381 assert((Device.getInt() == OMPC_DEVICE_unknown || 10382 Device.getInt() == OMPC_DEVICE_device_num) && 10383 "Expected device_num modifier."); 10384 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10385 DeviceID = 10386 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10387 } else { 10388 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10389 } 10390 10391 // Emit the number of elements in the offloading arrays. 10392 llvm::Value *PointerNum = 10393 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10394 10395 // Return value of the runtime offloading call. 10396 llvm::Value *Return; 10397 10398 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10399 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10400 10401 // Source location for the ident struct 10402 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10403 10404 // Emit tripcount for the target loop-based directive. 10405 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10406 10407 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10408 // The target region is an outlined function launched by the runtime 10409 // via calls __tgt_target() or __tgt_target_teams(). 10410 // 10411 // __tgt_target() launches a target region with one team and one thread, 10412 // executing a serial region. This master thread may in turn launch 10413 // more threads within its team upon encountering a parallel region, 10414 // however, no additional teams can be launched on the device. 10415 // 10416 // __tgt_target_teams() launches a target region with one or more teams, 10417 // each with one or more threads. This call is required for target 10418 // constructs such as: 10419 // 'target teams' 10420 // 'target' / 'teams' 10421 // 'target teams distribute parallel for' 10422 // 'target parallel' 10423 // and so on. 10424 // 10425 // Note that on the host and CPU targets, the runtime implementation of 10426 // these calls simply call the outlined function without forking threads. 10427 // The outlined functions themselves have runtime calls to 10428 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10429 // the compiler in emitTeamsCall() and emitParallelCall(). 10430 // 10431 // In contrast, on the NVPTX target, the implementation of 10432 // __tgt_target_teams() launches a GPU kernel with the requested number 10433 // of teams and threads so no additional calls to the runtime are required. 10434 if (NumTeams) { 10435 // If we have NumTeams defined this means that we have an enclosed teams 10436 // region. Therefore we also expect to have NumThreads defined. These two 10437 // values should be defined in the presence of a teams directive, 10438 // regardless of having any clauses associated. If the user is using teams 10439 // but no clauses, these two values will be the default that should be 10440 // passed to the runtime library - a 32-bit integer with the value zero. 10441 assert(NumThreads && "Thread limit expression should be available along " 10442 "with number of teams."); 10443 SmallVector<llvm::Value *> OffloadingArgs = { 10444 RTLoc, 10445 DeviceID, 10446 OutlinedFnID, 10447 PointerNum, 10448 InputInfo.BasePointersArray.getPointer(), 10449 InputInfo.PointersArray.getPointer(), 10450 InputInfo.SizesArray.getPointer(), 10451 MapTypesArray, 10452 MapNamesArray, 10453 InputInfo.MappersArray.getPointer(), 10454 NumTeams, 10455 NumThreads}; 10456 if (HasNowait) { 10457 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10458 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10459 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10460 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10461 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10462 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10463 } 10464 Return = CGF.EmitRuntimeCall( 10465 OMPBuilder.getOrCreateRuntimeFunction( 10466 CGM.getModule(), HasNowait 10467 ? OMPRTL___tgt_target_teams_nowait_mapper 10468 : OMPRTL___tgt_target_teams_mapper), 10469 OffloadingArgs); 10470 } else { 10471 SmallVector<llvm::Value *> OffloadingArgs = { 10472 RTLoc, 10473 DeviceID, 10474 OutlinedFnID, 10475 PointerNum, 10476 InputInfo.BasePointersArray.getPointer(), 10477 InputInfo.PointersArray.getPointer(), 10478 InputInfo.SizesArray.getPointer(), 10479 MapTypesArray, 10480 MapNamesArray, 10481 InputInfo.MappersArray.getPointer()}; 10482 if (HasNowait) { 10483 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10484 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10485 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10486 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10487 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10488 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10489 } 10490 Return = CGF.EmitRuntimeCall( 10491 OMPBuilder.getOrCreateRuntimeFunction( 10492 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10493 : OMPRTL___tgt_target_mapper), 10494 OffloadingArgs); 10495 } 10496 10497 // Check the error code and execute the host version if required. 10498 llvm::BasicBlock *OffloadFailedBlock = 10499 CGF.createBasicBlock("omp_offload.failed"); 10500 llvm::BasicBlock *OffloadContBlock = 10501 CGF.createBasicBlock("omp_offload.cont"); 10502 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10503 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10504 10505 CGF.EmitBlock(OffloadFailedBlock); 10506 if (RequiresOuterTask) { 10507 CapturedVars.clear(); 10508 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10509 } 10510 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10511 CGF.EmitBranch(OffloadContBlock); 10512 10513 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10514 }; 10515 10516 // Notify that the host version must be executed. 10517 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10518 RequiresOuterTask](CodeGenFunction &CGF, 10519 PrePostActionTy &) { 10520 if (RequiresOuterTask) { 10521 CapturedVars.clear(); 10522 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10523 } 10524 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10525 }; 10526 10527 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10528 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10529 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10530 // Fill up the arrays with all the captured variables. 10531 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10532 10533 // Get mappable expression information. 10534 MappableExprsHandler MEHandler(D, CGF); 10535 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10536 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10537 10538 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10539 auto *CV = CapturedVars.begin(); 10540 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10541 CE = CS.capture_end(); 10542 CI != CE; ++CI, ++RI, ++CV) { 10543 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10544 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10545 10546 // VLA sizes are passed to the outlined region by copy and do not have map 10547 // information associated. 10548 if (CI->capturesVariableArrayType()) { 10549 CurInfo.Exprs.push_back(nullptr); 10550 CurInfo.BasePointers.push_back(*CV); 10551 CurInfo.Pointers.push_back(*CV); 10552 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10553 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10554 // Copy to the device as an argument. No need to retrieve it. 10555 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10556 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10557 MappableExprsHandler::OMP_MAP_IMPLICIT); 10558 CurInfo.Mappers.push_back(nullptr); 10559 } else { 10560 // If we have any information in the map clause, we use it, otherwise we 10561 // just do a default mapping. 10562 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10563 if (!CI->capturesThis()) 10564 MappedVarSet.insert(CI->getCapturedVar()); 10565 else 10566 MappedVarSet.insert(nullptr); 10567 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10568 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10569 // Generate correct mapping for variables captured by reference in 10570 // lambdas. 10571 if (CI->capturesVariable()) 10572 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10573 CurInfo, LambdaPointers); 10574 } 10575 // We expect to have at least an element of information for this capture. 10576 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10577 "Non-existing map pointer for capture!"); 10578 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10579 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10580 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10581 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10582 "Inconsistent map information sizes!"); 10583 10584 // If there is an entry in PartialStruct it means we have a struct with 10585 // individual members mapped. Emit an extra combined entry. 10586 if (PartialStruct.Base.isValid()) { 10587 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10588 MEHandler.emitCombinedEntry( 10589 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10590 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10591 } 10592 10593 // We need to append the results of this capture to what we already have. 10594 CombinedInfo.append(CurInfo); 10595 } 10596 // Adjust MEMBER_OF flags for the lambdas captures. 10597 MEHandler.adjustMemberOfForLambdaCaptures( 10598 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10599 CombinedInfo.Types); 10600 // Map any list items in a map clause that were not captures because they 10601 // weren't referenced within the construct. 10602 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10603 10604 TargetDataInfo Info; 10605 // Fill up the arrays and create the arguments. 10606 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10607 emitOffloadingArraysArgument( 10608 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10609 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10610 {/*ForEndTask=*/false}); 10611 10612 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10613 InputInfo.BasePointersArray = 10614 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10615 InputInfo.PointersArray = 10616 Address(Info.PointersArray, CGM.getPointerAlign()); 10617 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10618 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10619 MapTypesArray = Info.MapTypesArray; 10620 MapNamesArray = Info.MapNamesArray; 10621 if (RequiresOuterTask) 10622 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10623 else 10624 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10625 }; 10626 10627 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10628 CodeGenFunction &CGF, PrePostActionTy &) { 10629 if (RequiresOuterTask) { 10630 CodeGenFunction::OMPTargetDataInfo InputInfo; 10631 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10632 } else { 10633 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10634 } 10635 }; 10636 10637 // If we have a target function ID it means that we need to support 10638 // offloading, otherwise, just execute on the host. We need to execute on host 10639 // regardless of the conditional in the if clause if, e.g., the user do not 10640 // specify target triples. 10641 if (OutlinedFnID) { 10642 if (IfCond) { 10643 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10644 } else { 10645 RegionCodeGenTy ThenRCG(TargetThenGen); 10646 ThenRCG(CGF); 10647 } 10648 } else { 10649 RegionCodeGenTy ElseRCG(TargetElseGen); 10650 ElseRCG(CGF); 10651 } 10652 } 10653 10654 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10655 StringRef ParentName) { 10656 if (!S) 10657 return; 10658 10659 // Codegen OMP target directives that offload compute to the device. 10660 bool RequiresDeviceCodegen = 10661 isa<OMPExecutableDirective>(S) && 10662 isOpenMPTargetExecutionDirective( 10663 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10664 10665 if (RequiresDeviceCodegen) { 10666 const auto &E = *cast<OMPExecutableDirective>(S); 10667 unsigned DeviceID; 10668 unsigned FileID; 10669 unsigned Line; 10670 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10671 FileID, Line); 10672 10673 // Is this a target region that should not be emitted as an entry point? If 10674 // so just signal we are done with this target region. 10675 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10676 ParentName, Line)) 10677 return; 10678 10679 switch (E.getDirectiveKind()) { 10680 case OMPD_target: 10681 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10682 cast<OMPTargetDirective>(E)); 10683 break; 10684 case OMPD_target_parallel: 10685 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10686 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10687 break; 10688 case OMPD_target_teams: 10689 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10690 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10691 break; 10692 case OMPD_target_teams_distribute: 10693 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10694 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10695 break; 10696 case OMPD_target_teams_distribute_simd: 10697 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10698 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10699 break; 10700 case OMPD_target_parallel_for: 10701 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10702 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10703 break; 10704 case OMPD_target_parallel_for_simd: 10705 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10706 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10707 break; 10708 case OMPD_target_simd: 10709 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10710 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10711 break; 10712 case OMPD_target_teams_distribute_parallel_for: 10713 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10714 CGM, ParentName, 10715 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10716 break; 10717 case OMPD_target_teams_distribute_parallel_for_simd: 10718 CodeGenFunction:: 10719 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10720 CGM, ParentName, 10721 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10722 break; 10723 case OMPD_parallel: 10724 case OMPD_for: 10725 case OMPD_parallel_for: 10726 case OMPD_parallel_master: 10727 case OMPD_parallel_sections: 10728 case OMPD_for_simd: 10729 case OMPD_parallel_for_simd: 10730 case OMPD_cancel: 10731 case OMPD_cancellation_point: 10732 case OMPD_ordered: 10733 case OMPD_threadprivate: 10734 case OMPD_allocate: 10735 case OMPD_task: 10736 case OMPD_simd: 10737 case OMPD_tile: 10738 case OMPD_unroll: 10739 case OMPD_sections: 10740 case OMPD_section: 10741 case OMPD_single: 10742 case OMPD_master: 10743 case OMPD_critical: 10744 case OMPD_taskyield: 10745 case OMPD_barrier: 10746 case OMPD_taskwait: 10747 case OMPD_taskgroup: 10748 case OMPD_atomic: 10749 case OMPD_flush: 10750 case OMPD_depobj: 10751 case OMPD_scan: 10752 case OMPD_teams: 10753 case OMPD_target_data: 10754 case OMPD_target_exit_data: 10755 case OMPD_target_enter_data: 10756 case OMPD_distribute: 10757 case OMPD_distribute_simd: 10758 case OMPD_distribute_parallel_for: 10759 case OMPD_distribute_parallel_for_simd: 10760 case OMPD_teams_distribute: 10761 case OMPD_teams_distribute_simd: 10762 case OMPD_teams_distribute_parallel_for: 10763 case OMPD_teams_distribute_parallel_for_simd: 10764 case OMPD_target_update: 10765 case OMPD_declare_simd: 10766 case OMPD_declare_variant: 10767 case OMPD_begin_declare_variant: 10768 case OMPD_end_declare_variant: 10769 case OMPD_declare_target: 10770 case OMPD_end_declare_target: 10771 case OMPD_declare_reduction: 10772 case OMPD_declare_mapper: 10773 case OMPD_taskloop: 10774 case OMPD_taskloop_simd: 10775 case OMPD_master_taskloop: 10776 case OMPD_master_taskloop_simd: 10777 case OMPD_parallel_master_taskloop: 10778 case OMPD_parallel_master_taskloop_simd: 10779 case OMPD_requires: 10780 case OMPD_metadirective: 10781 case OMPD_unknown: 10782 default: 10783 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10784 } 10785 return; 10786 } 10787 10788 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10789 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10790 return; 10791 10792 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10793 return; 10794 } 10795 10796 // If this is a lambda function, look into its body. 10797 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10798 S = L->getBody(); 10799 10800 // Keep looking for target regions recursively. 10801 for (const Stmt *II : S->children()) 10802 scanForTargetRegionsFunctions(II, ParentName); 10803 } 10804 10805 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10806 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10807 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10808 if (!DevTy) 10809 return false; 10810 // Do not emit device_type(nohost) functions for the host. 10811 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10812 return true; 10813 // Do not emit device_type(host) functions for the device. 10814 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10815 return true; 10816 return false; 10817 } 10818 10819 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10820 // If emitting code for the host, we do not process FD here. Instead we do 10821 // the normal code generation. 10822 if (!CGM.getLangOpts().OpenMPIsDevice) { 10823 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10824 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10825 CGM.getLangOpts().OpenMPIsDevice)) 10826 return true; 10827 return false; 10828 } 10829 10830 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10831 // Try to detect target regions in the function. 10832 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10833 StringRef Name = CGM.getMangledName(GD); 10834 scanForTargetRegionsFunctions(FD->getBody(), Name); 10835 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10836 CGM.getLangOpts().OpenMPIsDevice)) 10837 return true; 10838 } 10839 10840 // Do not to emit function if it is not marked as declare target. 10841 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10842 AlreadyEmittedTargetDecls.count(VD) == 0; 10843 } 10844 10845 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10846 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10847 CGM.getLangOpts().OpenMPIsDevice)) 10848 return true; 10849 10850 if (!CGM.getLangOpts().OpenMPIsDevice) 10851 return false; 10852 10853 // Check if there are Ctors/Dtors in this declaration and look for target 10854 // regions in it. We use the complete variant to produce the kernel name 10855 // mangling. 10856 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10857 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10858 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10859 StringRef ParentName = 10860 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10861 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10862 } 10863 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10864 StringRef ParentName = 10865 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10866 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10867 } 10868 } 10869 10870 // Do not to emit variable if it is not marked as declare target. 10871 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10872 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10873 cast<VarDecl>(GD.getDecl())); 10874 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10875 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10876 HasRequiresUnifiedSharedMemory)) { 10877 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10878 return true; 10879 } 10880 return false; 10881 } 10882 10883 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10884 llvm::Constant *Addr) { 10885 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10886 !CGM.getLangOpts().OpenMPIsDevice) 10887 return; 10888 10889 // If we have host/nohost variables, they do not need to be registered. 10890 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10891 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10892 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10893 return; 10894 10895 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10896 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10897 if (!Res) { 10898 if (CGM.getLangOpts().OpenMPIsDevice) { 10899 // Register non-target variables being emitted in device code (debug info 10900 // may cause this). 10901 StringRef VarName = CGM.getMangledName(VD); 10902 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10903 } 10904 return; 10905 } 10906 // Register declare target variables. 10907 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10908 StringRef VarName; 10909 CharUnits VarSize; 10910 llvm::GlobalValue::LinkageTypes Linkage; 10911 10912 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10913 !HasRequiresUnifiedSharedMemory) { 10914 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10915 VarName = CGM.getMangledName(VD); 10916 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10917 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10918 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10919 } else { 10920 VarSize = CharUnits::Zero(); 10921 } 10922 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10923 // Temp solution to prevent optimizations of the internal variables. 10924 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10925 // Do not create a "ref-variable" if the original is not also available 10926 // on the host. 10927 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10928 return; 10929 std::string RefName = getName({VarName, "ref"}); 10930 if (!CGM.GetGlobalValue(RefName)) { 10931 llvm::Constant *AddrRef = 10932 getOrCreateInternalVariable(Addr->getType(), RefName); 10933 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10934 GVAddrRef->setConstant(/*Val=*/true); 10935 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10936 GVAddrRef->setInitializer(Addr); 10937 CGM.addCompilerUsedGlobal(GVAddrRef); 10938 } 10939 } 10940 } else { 10941 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10942 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10943 HasRequiresUnifiedSharedMemory)) && 10944 "Declare target attribute must link or to with unified memory."); 10945 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10946 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10947 else 10948 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10949 10950 if (CGM.getLangOpts().OpenMPIsDevice) { 10951 VarName = Addr->getName(); 10952 Addr = nullptr; 10953 } else { 10954 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10955 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10956 } 10957 VarSize = CGM.getPointerSize(); 10958 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10959 } 10960 10961 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10962 VarName, Addr, VarSize, Flags, Linkage); 10963 } 10964 10965 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10966 if (isa<FunctionDecl>(GD.getDecl()) || 10967 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10968 return emitTargetFunctions(GD); 10969 10970 return emitTargetGlobalVariable(GD); 10971 } 10972 10973 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10974 for (const VarDecl *VD : DeferredGlobalVariables) { 10975 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10976 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10977 if (!Res) 10978 continue; 10979 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10980 !HasRequiresUnifiedSharedMemory) { 10981 CGM.EmitGlobal(VD); 10982 } else { 10983 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10984 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10985 HasRequiresUnifiedSharedMemory)) && 10986 "Expected link clause or to clause with unified memory."); 10987 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10988 } 10989 } 10990 } 10991 10992 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10993 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10994 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10995 " Expected target-based directive."); 10996 } 10997 10998 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10999 for (const OMPClause *Clause : D->clauselists()) { 11000 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 11001 HasRequiresUnifiedSharedMemory = true; 11002 } else if (const auto *AC = 11003 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 11004 switch (AC->getAtomicDefaultMemOrderKind()) { 11005 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 11006 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 11007 break; 11008 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 11009 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 11010 break; 11011 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 11012 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 11013 break; 11014 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 11015 break; 11016 } 11017 } 11018 } 11019 } 11020 11021 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 11022 return RequiresAtomicOrdering; 11023 } 11024 11025 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11026 LangAS &AS) { 11027 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11028 return false; 11029 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11030 switch(A->getAllocatorType()) { 11031 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11032 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11033 // Not supported, fallback to the default mem space. 11034 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11035 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11036 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11037 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11038 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11039 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11040 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11041 AS = LangAS::Default; 11042 return true; 11043 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11044 llvm_unreachable("Expected predefined allocator for the variables with the " 11045 "static storage."); 11046 } 11047 return false; 11048 } 11049 11050 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11051 return HasRequiresUnifiedSharedMemory; 11052 } 11053 11054 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11055 CodeGenModule &CGM) 11056 : CGM(CGM) { 11057 if (CGM.getLangOpts().OpenMPIsDevice) { 11058 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11059 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11060 } 11061 } 11062 11063 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11064 if (CGM.getLangOpts().OpenMPIsDevice) 11065 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11066 } 11067 11068 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11069 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11070 return true; 11071 11072 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11073 // Do not to emit function if it is marked as declare target as it was already 11074 // emitted. 11075 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11076 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11077 if (auto *F = dyn_cast_or_null<llvm::Function>( 11078 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11079 return !F->isDeclaration(); 11080 return false; 11081 } 11082 return true; 11083 } 11084 11085 return !AlreadyEmittedTargetDecls.insert(D).second; 11086 } 11087 11088 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11089 // If we don't have entries or if we are emitting code for the device, we 11090 // don't need to do anything. 11091 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11092 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11093 (OffloadEntriesInfoManager.empty() && 11094 !HasEmittedDeclareTargetRegion && 11095 !HasEmittedTargetRegion)) 11096 return nullptr; 11097 11098 // Create and register the function that handles the requires directives. 11099 ASTContext &C = CGM.getContext(); 11100 11101 llvm::Function *RequiresRegFn; 11102 { 11103 CodeGenFunction CGF(CGM); 11104 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11105 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11106 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11107 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11108 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11109 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11110 // TODO: check for other requires clauses. 11111 // The requires directive takes effect only when a target region is 11112 // present in the compilation unit. Otherwise it is ignored and not 11113 // passed to the runtime. This avoids the runtime from throwing an error 11114 // for mismatching requires clauses across compilation units that don't 11115 // contain at least 1 target region. 11116 assert((HasEmittedTargetRegion || 11117 HasEmittedDeclareTargetRegion || 11118 !OffloadEntriesInfoManager.empty()) && 11119 "Target or declare target region expected."); 11120 if (HasRequiresUnifiedSharedMemory) 11121 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11122 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11123 CGM.getModule(), OMPRTL___tgt_register_requires), 11124 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11125 CGF.FinishFunction(); 11126 } 11127 return RequiresRegFn; 11128 } 11129 11130 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11131 const OMPExecutableDirective &D, 11132 SourceLocation Loc, 11133 llvm::Function *OutlinedFn, 11134 ArrayRef<llvm::Value *> CapturedVars) { 11135 if (!CGF.HaveInsertPoint()) 11136 return; 11137 11138 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11139 CodeGenFunction::RunCleanupsScope Scope(CGF); 11140 11141 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11142 llvm::Value *Args[] = { 11143 RTLoc, 11144 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11145 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11146 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11147 RealArgs.append(std::begin(Args), std::end(Args)); 11148 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11149 11150 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11151 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11152 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11153 } 11154 11155 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11156 const Expr *NumTeams, 11157 const Expr *ThreadLimit, 11158 SourceLocation Loc) { 11159 if (!CGF.HaveInsertPoint()) 11160 return; 11161 11162 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11163 11164 llvm::Value *NumTeamsVal = 11165 NumTeams 11166 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11167 CGF.CGM.Int32Ty, /* isSigned = */ true) 11168 : CGF.Builder.getInt32(0); 11169 11170 llvm::Value *ThreadLimitVal = 11171 ThreadLimit 11172 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11173 CGF.CGM.Int32Ty, /* isSigned = */ true) 11174 : CGF.Builder.getInt32(0); 11175 11176 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11177 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11178 ThreadLimitVal}; 11179 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11180 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11181 PushNumTeamsArgs); 11182 } 11183 11184 void CGOpenMPRuntime::emitTargetDataCalls( 11185 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11186 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11187 if (!CGF.HaveInsertPoint()) 11188 return; 11189 11190 // Action used to replace the default codegen action and turn privatization 11191 // off. 11192 PrePostActionTy NoPrivAction; 11193 11194 // Generate the code for the opening of the data environment. Capture all the 11195 // arguments of the runtime call by reference because they are used in the 11196 // closing of the region. 11197 auto &&BeginThenGen = [this, &D, Device, &Info, 11198 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11199 // Fill up the arrays with all the mapped variables. 11200 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11201 11202 // Get map clause information. 11203 MappableExprsHandler MEHandler(D, CGF); 11204 MEHandler.generateAllInfo(CombinedInfo); 11205 11206 // Fill up the arrays and create the arguments. 11207 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11208 /*IsNonContiguous=*/true); 11209 11210 llvm::Value *BasePointersArrayArg = nullptr; 11211 llvm::Value *PointersArrayArg = nullptr; 11212 llvm::Value *SizesArrayArg = nullptr; 11213 llvm::Value *MapTypesArrayArg = nullptr; 11214 llvm::Value *MapNamesArrayArg = nullptr; 11215 llvm::Value *MappersArrayArg = nullptr; 11216 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11217 SizesArrayArg, MapTypesArrayArg, 11218 MapNamesArrayArg, MappersArrayArg, Info); 11219 11220 // Emit device ID if any. 11221 llvm::Value *DeviceID = nullptr; 11222 if (Device) { 11223 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11224 CGF.Int64Ty, /*isSigned=*/true); 11225 } else { 11226 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11227 } 11228 11229 // Emit the number of elements in the offloading arrays. 11230 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11231 // 11232 // Source location for the ident struct 11233 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11234 11235 llvm::Value *OffloadingArgs[] = {RTLoc, 11236 DeviceID, 11237 PointerNum, 11238 BasePointersArrayArg, 11239 PointersArrayArg, 11240 SizesArrayArg, 11241 MapTypesArrayArg, 11242 MapNamesArrayArg, 11243 MappersArrayArg}; 11244 CGF.EmitRuntimeCall( 11245 OMPBuilder.getOrCreateRuntimeFunction( 11246 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11247 OffloadingArgs); 11248 11249 // If device pointer privatization is required, emit the body of the region 11250 // here. It will have to be duplicated: with and without privatization. 11251 if (!Info.CaptureDeviceAddrMap.empty()) 11252 CodeGen(CGF); 11253 }; 11254 11255 // Generate code for the closing of the data region. 11256 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11257 PrePostActionTy &) { 11258 assert(Info.isValid() && "Invalid data environment closing arguments."); 11259 11260 llvm::Value *BasePointersArrayArg = nullptr; 11261 llvm::Value *PointersArrayArg = nullptr; 11262 llvm::Value *SizesArrayArg = nullptr; 11263 llvm::Value *MapTypesArrayArg = nullptr; 11264 llvm::Value *MapNamesArrayArg = nullptr; 11265 llvm::Value *MappersArrayArg = nullptr; 11266 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11267 SizesArrayArg, MapTypesArrayArg, 11268 MapNamesArrayArg, MappersArrayArg, Info, 11269 {/*ForEndCall=*/true}); 11270 11271 // Emit device ID if any. 11272 llvm::Value *DeviceID = nullptr; 11273 if (Device) { 11274 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11275 CGF.Int64Ty, /*isSigned=*/true); 11276 } else { 11277 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11278 } 11279 11280 // Emit the number of elements in the offloading arrays. 11281 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11282 11283 // Source location for the ident struct 11284 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11285 11286 llvm::Value *OffloadingArgs[] = {RTLoc, 11287 DeviceID, 11288 PointerNum, 11289 BasePointersArrayArg, 11290 PointersArrayArg, 11291 SizesArrayArg, 11292 MapTypesArrayArg, 11293 MapNamesArrayArg, 11294 MappersArrayArg}; 11295 CGF.EmitRuntimeCall( 11296 OMPBuilder.getOrCreateRuntimeFunction( 11297 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11298 OffloadingArgs); 11299 }; 11300 11301 // If we need device pointer privatization, we need to emit the body of the 11302 // region with no privatization in the 'else' branch of the conditional. 11303 // Otherwise, we don't have to do anything. 11304 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11305 PrePostActionTy &) { 11306 if (!Info.CaptureDeviceAddrMap.empty()) { 11307 CodeGen.setAction(NoPrivAction); 11308 CodeGen(CGF); 11309 } 11310 }; 11311 11312 // We don't have to do anything to close the region if the if clause evaluates 11313 // to false. 11314 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11315 11316 if (IfCond) { 11317 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11318 } else { 11319 RegionCodeGenTy RCG(BeginThenGen); 11320 RCG(CGF); 11321 } 11322 11323 // If we don't require privatization of device pointers, we emit the body in 11324 // between the runtime calls. This avoids duplicating the body code. 11325 if (Info.CaptureDeviceAddrMap.empty()) { 11326 CodeGen.setAction(NoPrivAction); 11327 CodeGen(CGF); 11328 } 11329 11330 if (IfCond) { 11331 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11332 } else { 11333 RegionCodeGenTy RCG(EndThenGen); 11334 RCG(CGF); 11335 } 11336 } 11337 11338 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11339 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11340 const Expr *Device) { 11341 if (!CGF.HaveInsertPoint()) 11342 return; 11343 11344 assert((isa<OMPTargetEnterDataDirective>(D) || 11345 isa<OMPTargetExitDataDirective>(D) || 11346 isa<OMPTargetUpdateDirective>(D)) && 11347 "Expecting either target enter, exit data, or update directives."); 11348 11349 CodeGenFunction::OMPTargetDataInfo InputInfo; 11350 llvm::Value *MapTypesArray = nullptr; 11351 llvm::Value *MapNamesArray = nullptr; 11352 // Generate the code for the opening of the data environment. 11353 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11354 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11355 // Emit device ID if any. 11356 llvm::Value *DeviceID = nullptr; 11357 if (Device) { 11358 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11359 CGF.Int64Ty, /*isSigned=*/true); 11360 } else { 11361 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11362 } 11363 11364 // Emit the number of elements in the offloading arrays. 11365 llvm::Constant *PointerNum = 11366 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11367 11368 // Source location for the ident struct 11369 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11370 11371 llvm::Value *OffloadingArgs[] = {RTLoc, 11372 DeviceID, 11373 PointerNum, 11374 InputInfo.BasePointersArray.getPointer(), 11375 InputInfo.PointersArray.getPointer(), 11376 InputInfo.SizesArray.getPointer(), 11377 MapTypesArray, 11378 MapNamesArray, 11379 InputInfo.MappersArray.getPointer()}; 11380 11381 // Select the right runtime function call for each standalone 11382 // directive. 11383 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11384 RuntimeFunction RTLFn; 11385 switch (D.getDirectiveKind()) { 11386 case OMPD_target_enter_data: 11387 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11388 : OMPRTL___tgt_target_data_begin_mapper; 11389 break; 11390 case OMPD_target_exit_data: 11391 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11392 : OMPRTL___tgt_target_data_end_mapper; 11393 break; 11394 case OMPD_target_update: 11395 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11396 : OMPRTL___tgt_target_data_update_mapper; 11397 break; 11398 case OMPD_parallel: 11399 case OMPD_for: 11400 case OMPD_parallel_for: 11401 case OMPD_parallel_master: 11402 case OMPD_parallel_sections: 11403 case OMPD_for_simd: 11404 case OMPD_parallel_for_simd: 11405 case OMPD_cancel: 11406 case OMPD_cancellation_point: 11407 case OMPD_ordered: 11408 case OMPD_threadprivate: 11409 case OMPD_allocate: 11410 case OMPD_task: 11411 case OMPD_simd: 11412 case OMPD_tile: 11413 case OMPD_unroll: 11414 case OMPD_sections: 11415 case OMPD_section: 11416 case OMPD_single: 11417 case OMPD_master: 11418 case OMPD_critical: 11419 case OMPD_taskyield: 11420 case OMPD_barrier: 11421 case OMPD_taskwait: 11422 case OMPD_taskgroup: 11423 case OMPD_atomic: 11424 case OMPD_flush: 11425 case OMPD_depobj: 11426 case OMPD_scan: 11427 case OMPD_teams: 11428 case OMPD_target_data: 11429 case OMPD_distribute: 11430 case OMPD_distribute_simd: 11431 case OMPD_distribute_parallel_for: 11432 case OMPD_distribute_parallel_for_simd: 11433 case OMPD_teams_distribute: 11434 case OMPD_teams_distribute_simd: 11435 case OMPD_teams_distribute_parallel_for: 11436 case OMPD_teams_distribute_parallel_for_simd: 11437 case OMPD_declare_simd: 11438 case OMPD_declare_variant: 11439 case OMPD_begin_declare_variant: 11440 case OMPD_end_declare_variant: 11441 case OMPD_declare_target: 11442 case OMPD_end_declare_target: 11443 case OMPD_declare_reduction: 11444 case OMPD_declare_mapper: 11445 case OMPD_taskloop: 11446 case OMPD_taskloop_simd: 11447 case OMPD_master_taskloop: 11448 case OMPD_master_taskloop_simd: 11449 case OMPD_parallel_master_taskloop: 11450 case OMPD_parallel_master_taskloop_simd: 11451 case OMPD_target: 11452 case OMPD_target_simd: 11453 case OMPD_target_teams_distribute: 11454 case OMPD_target_teams_distribute_simd: 11455 case OMPD_target_teams_distribute_parallel_for: 11456 case OMPD_target_teams_distribute_parallel_for_simd: 11457 case OMPD_target_teams: 11458 case OMPD_target_parallel: 11459 case OMPD_target_parallel_for: 11460 case OMPD_target_parallel_for_simd: 11461 case OMPD_requires: 11462 case OMPD_metadirective: 11463 case OMPD_unknown: 11464 default: 11465 llvm_unreachable("Unexpected standalone target data directive."); 11466 break; 11467 } 11468 CGF.EmitRuntimeCall( 11469 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11470 OffloadingArgs); 11471 }; 11472 11473 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11474 &MapNamesArray](CodeGenFunction &CGF, 11475 PrePostActionTy &) { 11476 // Fill up the arrays with all the mapped variables. 11477 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11478 11479 // Get map clause information. 11480 MappableExprsHandler MEHandler(D, CGF); 11481 MEHandler.generateAllInfo(CombinedInfo); 11482 11483 TargetDataInfo Info; 11484 // Fill up the arrays and create the arguments. 11485 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11486 /*IsNonContiguous=*/true); 11487 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11488 D.hasClausesOfKind<OMPNowaitClause>(); 11489 emitOffloadingArraysArgument( 11490 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11491 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11492 {/*ForEndTask=*/false}); 11493 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11494 InputInfo.BasePointersArray = 11495 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11496 InputInfo.PointersArray = 11497 Address(Info.PointersArray, CGM.getPointerAlign()); 11498 InputInfo.SizesArray = 11499 Address(Info.SizesArray, CGM.getPointerAlign()); 11500 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11501 MapTypesArray = Info.MapTypesArray; 11502 MapNamesArray = Info.MapNamesArray; 11503 if (RequiresOuterTask) 11504 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11505 else 11506 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11507 }; 11508 11509 if (IfCond) { 11510 emitIfClause(CGF, IfCond, TargetThenGen, 11511 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11512 } else { 11513 RegionCodeGenTy ThenRCG(TargetThenGen); 11514 ThenRCG(CGF); 11515 } 11516 } 11517 11518 namespace { 11519 /// Kind of parameter in a function with 'declare simd' directive. 11520 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11521 /// Attribute set of the parameter. 11522 struct ParamAttrTy { 11523 ParamKindTy Kind = Vector; 11524 llvm::APSInt StrideOrArg; 11525 llvm::APSInt Alignment; 11526 }; 11527 } // namespace 11528 11529 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11530 ArrayRef<ParamAttrTy> ParamAttrs) { 11531 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11532 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11533 // of that clause. The VLEN value must be power of 2. 11534 // In other case the notion of the function`s "characteristic data type" (CDT) 11535 // is used to compute the vector length. 11536 // CDT is defined in the following order: 11537 // a) For non-void function, the CDT is the return type. 11538 // b) If the function has any non-uniform, non-linear parameters, then the 11539 // CDT is the type of the first such parameter. 11540 // c) If the CDT determined by a) or b) above is struct, union, or class 11541 // type which is pass-by-value (except for the type that maps to the 11542 // built-in complex data type), the characteristic data type is int. 11543 // d) If none of the above three cases is applicable, the CDT is int. 11544 // The VLEN is then determined based on the CDT and the size of vector 11545 // register of that ISA for which current vector version is generated. The 11546 // VLEN is computed using the formula below: 11547 // VLEN = sizeof(vector_register) / sizeof(CDT), 11548 // where vector register size specified in section 3.2.1 Registers and the 11549 // Stack Frame of original AMD64 ABI document. 11550 QualType RetType = FD->getReturnType(); 11551 if (RetType.isNull()) 11552 return 0; 11553 ASTContext &C = FD->getASTContext(); 11554 QualType CDT; 11555 if (!RetType.isNull() && !RetType->isVoidType()) { 11556 CDT = RetType; 11557 } else { 11558 unsigned Offset = 0; 11559 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11560 if (ParamAttrs[Offset].Kind == Vector) 11561 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11562 ++Offset; 11563 } 11564 if (CDT.isNull()) { 11565 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11566 if (ParamAttrs[I + Offset].Kind == Vector) { 11567 CDT = FD->getParamDecl(I)->getType(); 11568 break; 11569 } 11570 } 11571 } 11572 } 11573 if (CDT.isNull()) 11574 CDT = C.IntTy; 11575 CDT = CDT->getCanonicalTypeUnqualified(); 11576 if (CDT->isRecordType() || CDT->isUnionType()) 11577 CDT = C.IntTy; 11578 return C.getTypeSize(CDT); 11579 } 11580 11581 static void 11582 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11583 const llvm::APSInt &VLENVal, 11584 ArrayRef<ParamAttrTy> ParamAttrs, 11585 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11586 struct ISADataTy { 11587 char ISA; 11588 unsigned VecRegSize; 11589 }; 11590 ISADataTy ISAData[] = { 11591 { 11592 'b', 128 11593 }, // SSE 11594 { 11595 'c', 256 11596 }, // AVX 11597 { 11598 'd', 256 11599 }, // AVX2 11600 { 11601 'e', 512 11602 }, // AVX512 11603 }; 11604 llvm::SmallVector<char, 2> Masked; 11605 switch (State) { 11606 case OMPDeclareSimdDeclAttr::BS_Undefined: 11607 Masked.push_back('N'); 11608 Masked.push_back('M'); 11609 break; 11610 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11611 Masked.push_back('N'); 11612 break; 11613 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11614 Masked.push_back('M'); 11615 break; 11616 } 11617 for (char Mask : Masked) { 11618 for (const ISADataTy &Data : ISAData) { 11619 SmallString<256> Buffer; 11620 llvm::raw_svector_ostream Out(Buffer); 11621 Out << "_ZGV" << Data.ISA << Mask; 11622 if (!VLENVal) { 11623 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11624 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11625 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11626 } else { 11627 Out << VLENVal; 11628 } 11629 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11630 switch (ParamAttr.Kind){ 11631 case LinearWithVarStride: 11632 Out << 's' << ParamAttr.StrideOrArg; 11633 break; 11634 case Linear: 11635 Out << 'l'; 11636 if (ParamAttr.StrideOrArg != 1) 11637 Out << ParamAttr.StrideOrArg; 11638 break; 11639 case Uniform: 11640 Out << 'u'; 11641 break; 11642 case Vector: 11643 Out << 'v'; 11644 break; 11645 } 11646 if (!!ParamAttr.Alignment) 11647 Out << 'a' << ParamAttr.Alignment; 11648 } 11649 Out << '_' << Fn->getName(); 11650 Fn->addFnAttr(Out.str()); 11651 } 11652 } 11653 } 11654 11655 // This are the Functions that are needed to mangle the name of the 11656 // vector functions generated by the compiler, according to the rules 11657 // defined in the "Vector Function ABI specifications for AArch64", 11658 // available at 11659 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11660 11661 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11662 /// 11663 /// TODO: Need to implement the behavior for reference marked with a 11664 /// var or no linear modifiers (1.b in the section). For this, we 11665 /// need to extend ParamKindTy to support the linear modifiers. 11666 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11667 QT = QT.getCanonicalType(); 11668 11669 if (QT->isVoidType()) 11670 return false; 11671 11672 if (Kind == ParamKindTy::Uniform) 11673 return false; 11674 11675 if (Kind == ParamKindTy::Linear) 11676 return false; 11677 11678 // TODO: Handle linear references with modifiers 11679 11680 if (Kind == ParamKindTy::LinearWithVarStride) 11681 return false; 11682 11683 return true; 11684 } 11685 11686 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11687 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11688 QT = QT.getCanonicalType(); 11689 unsigned Size = C.getTypeSize(QT); 11690 11691 // Only scalars and complex within 16 bytes wide set PVB to true. 11692 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11693 return false; 11694 11695 if (QT->isFloatingType()) 11696 return true; 11697 11698 if (QT->isIntegerType()) 11699 return true; 11700 11701 if (QT->isPointerType()) 11702 return true; 11703 11704 // TODO: Add support for complex types (section 3.1.2, item 2). 11705 11706 return false; 11707 } 11708 11709 /// Computes the lane size (LS) of a return type or of an input parameter, 11710 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11711 /// TODO: Add support for references, section 3.2.1, item 1. 11712 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11713 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11714 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11715 if (getAArch64PBV(PTy, C)) 11716 return C.getTypeSize(PTy); 11717 } 11718 if (getAArch64PBV(QT, C)) 11719 return C.getTypeSize(QT); 11720 11721 return C.getTypeSize(C.getUIntPtrType()); 11722 } 11723 11724 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11725 // signature of the scalar function, as defined in 3.2.2 of the 11726 // AAVFABI. 11727 static std::tuple<unsigned, unsigned, bool> 11728 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11729 QualType RetType = FD->getReturnType().getCanonicalType(); 11730 11731 ASTContext &C = FD->getASTContext(); 11732 11733 bool OutputBecomesInput = false; 11734 11735 llvm::SmallVector<unsigned, 8> Sizes; 11736 if (!RetType->isVoidType()) { 11737 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11738 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11739 OutputBecomesInput = true; 11740 } 11741 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11742 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11743 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11744 } 11745 11746 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11747 // The LS of a function parameter / return value can only be a power 11748 // of 2, starting from 8 bits, up to 128. 11749 assert(llvm::all_of(Sizes, 11750 [](unsigned Size) { 11751 return Size == 8 || Size == 16 || Size == 32 || 11752 Size == 64 || Size == 128; 11753 }) && 11754 "Invalid size"); 11755 11756 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11757 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11758 OutputBecomesInput); 11759 } 11760 11761 /// Mangle the parameter part of the vector function name according to 11762 /// their OpenMP classification. The mangling function is defined in 11763 /// section 3.5 of the AAVFABI. 11764 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11765 SmallString<256> Buffer; 11766 llvm::raw_svector_ostream Out(Buffer); 11767 for (const auto &ParamAttr : ParamAttrs) { 11768 switch (ParamAttr.Kind) { 11769 case LinearWithVarStride: 11770 Out << "ls" << ParamAttr.StrideOrArg; 11771 break; 11772 case Linear: 11773 Out << 'l'; 11774 // Don't print the step value if it is not present or if it is 11775 // equal to 1. 11776 if (ParamAttr.StrideOrArg != 1) 11777 Out << ParamAttr.StrideOrArg; 11778 break; 11779 case Uniform: 11780 Out << 'u'; 11781 break; 11782 case Vector: 11783 Out << 'v'; 11784 break; 11785 } 11786 11787 if (!!ParamAttr.Alignment) 11788 Out << 'a' << ParamAttr.Alignment; 11789 } 11790 11791 return std::string(Out.str()); 11792 } 11793 11794 // Function used to add the attribute. The parameter `VLEN` is 11795 // templated to allow the use of "x" when targeting scalable functions 11796 // for SVE. 11797 template <typename T> 11798 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11799 char ISA, StringRef ParSeq, 11800 StringRef MangledName, bool OutputBecomesInput, 11801 llvm::Function *Fn) { 11802 SmallString<256> Buffer; 11803 llvm::raw_svector_ostream Out(Buffer); 11804 Out << Prefix << ISA << LMask << VLEN; 11805 if (OutputBecomesInput) 11806 Out << "v"; 11807 Out << ParSeq << "_" << MangledName; 11808 Fn->addFnAttr(Out.str()); 11809 } 11810 11811 // Helper function to generate the Advanced SIMD names depending on 11812 // the value of the NDS when simdlen is not present. 11813 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11814 StringRef Prefix, char ISA, 11815 StringRef ParSeq, StringRef MangledName, 11816 bool OutputBecomesInput, 11817 llvm::Function *Fn) { 11818 switch (NDS) { 11819 case 8: 11820 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11821 OutputBecomesInput, Fn); 11822 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11823 OutputBecomesInput, Fn); 11824 break; 11825 case 16: 11826 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11827 OutputBecomesInput, Fn); 11828 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11829 OutputBecomesInput, Fn); 11830 break; 11831 case 32: 11832 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11833 OutputBecomesInput, Fn); 11834 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11835 OutputBecomesInput, Fn); 11836 break; 11837 case 64: 11838 case 128: 11839 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11840 OutputBecomesInput, Fn); 11841 break; 11842 default: 11843 llvm_unreachable("Scalar type is too wide."); 11844 } 11845 } 11846 11847 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11848 static void emitAArch64DeclareSimdFunction( 11849 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11850 ArrayRef<ParamAttrTy> ParamAttrs, 11851 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11852 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11853 11854 // Get basic data for building the vector signature. 11855 const auto Data = getNDSWDS(FD, ParamAttrs); 11856 const unsigned NDS = std::get<0>(Data); 11857 const unsigned WDS = std::get<1>(Data); 11858 const bool OutputBecomesInput = std::get<2>(Data); 11859 11860 // Check the values provided via `simdlen` by the user. 11861 // 1. A `simdlen(1)` doesn't produce vector signatures, 11862 if (UserVLEN == 1) { 11863 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11864 DiagnosticsEngine::Warning, 11865 "The clause simdlen(1) has no effect when targeting aarch64."); 11866 CGM.getDiags().Report(SLoc, DiagID); 11867 return; 11868 } 11869 11870 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11871 // Advanced SIMD output. 11872 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11873 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11874 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11875 "power of 2 when targeting Advanced SIMD."); 11876 CGM.getDiags().Report(SLoc, DiagID); 11877 return; 11878 } 11879 11880 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11881 // limits. 11882 if (ISA == 's' && UserVLEN != 0) { 11883 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11884 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11885 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11886 "lanes in the architectural constraints " 11887 "for SVE (min is 128-bit, max is " 11888 "2048-bit, by steps of 128-bit)"); 11889 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11890 return; 11891 } 11892 } 11893 11894 // Sort out parameter sequence. 11895 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11896 StringRef Prefix = "_ZGV"; 11897 // Generate simdlen from user input (if any). 11898 if (UserVLEN) { 11899 if (ISA == 's') { 11900 // SVE generates only a masked function. 11901 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11902 OutputBecomesInput, Fn); 11903 } else { 11904 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11905 // Advanced SIMD generates one or two functions, depending on 11906 // the `[not]inbranch` clause. 11907 switch (State) { 11908 case OMPDeclareSimdDeclAttr::BS_Undefined: 11909 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11910 OutputBecomesInput, Fn); 11911 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11912 OutputBecomesInput, Fn); 11913 break; 11914 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11915 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11916 OutputBecomesInput, Fn); 11917 break; 11918 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11919 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11920 OutputBecomesInput, Fn); 11921 break; 11922 } 11923 } 11924 } else { 11925 // If no user simdlen is provided, follow the AAVFABI rules for 11926 // generating the vector length. 11927 if (ISA == 's') { 11928 // SVE, section 3.4.1, item 1. 11929 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11930 OutputBecomesInput, Fn); 11931 } else { 11932 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11933 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11934 // two vector names depending on the use of the clause 11935 // `[not]inbranch`. 11936 switch (State) { 11937 case OMPDeclareSimdDeclAttr::BS_Undefined: 11938 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11939 OutputBecomesInput, Fn); 11940 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11941 OutputBecomesInput, Fn); 11942 break; 11943 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11944 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11945 OutputBecomesInput, Fn); 11946 break; 11947 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11948 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11949 OutputBecomesInput, Fn); 11950 break; 11951 } 11952 } 11953 } 11954 } 11955 11956 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11957 llvm::Function *Fn) { 11958 ASTContext &C = CGM.getContext(); 11959 FD = FD->getMostRecentDecl(); 11960 // Map params to their positions in function decl. 11961 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11962 if (isa<CXXMethodDecl>(FD)) 11963 ParamPositions.try_emplace(FD, 0); 11964 unsigned ParamPos = ParamPositions.size(); 11965 for (const ParmVarDecl *P : FD->parameters()) { 11966 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11967 ++ParamPos; 11968 } 11969 while (FD) { 11970 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11971 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11972 // Mark uniform parameters. 11973 for (const Expr *E : Attr->uniforms()) { 11974 E = E->IgnoreParenImpCasts(); 11975 unsigned Pos; 11976 if (isa<CXXThisExpr>(E)) { 11977 Pos = ParamPositions[FD]; 11978 } else { 11979 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11980 ->getCanonicalDecl(); 11981 Pos = ParamPositions[PVD]; 11982 } 11983 ParamAttrs[Pos].Kind = Uniform; 11984 } 11985 // Get alignment info. 11986 auto NI = Attr->alignments_begin(); 11987 for (const Expr *E : Attr->aligneds()) { 11988 E = E->IgnoreParenImpCasts(); 11989 unsigned Pos; 11990 QualType ParmTy; 11991 if (isa<CXXThisExpr>(E)) { 11992 Pos = ParamPositions[FD]; 11993 ParmTy = E->getType(); 11994 } else { 11995 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11996 ->getCanonicalDecl(); 11997 Pos = ParamPositions[PVD]; 11998 ParmTy = PVD->getType(); 11999 } 12000 ParamAttrs[Pos].Alignment = 12001 (*NI) 12002 ? (*NI)->EvaluateKnownConstInt(C) 12003 : llvm::APSInt::getUnsigned( 12004 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 12005 .getQuantity()); 12006 ++NI; 12007 } 12008 // Mark linear parameters. 12009 auto SI = Attr->steps_begin(); 12010 auto MI = Attr->modifiers_begin(); 12011 for (const Expr *E : Attr->linears()) { 12012 E = E->IgnoreParenImpCasts(); 12013 unsigned Pos; 12014 // Rescaling factor needed to compute the linear parameter 12015 // value in the mangled name. 12016 unsigned PtrRescalingFactor = 1; 12017 if (isa<CXXThisExpr>(E)) { 12018 Pos = ParamPositions[FD]; 12019 } else { 12020 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12021 ->getCanonicalDecl(); 12022 Pos = ParamPositions[PVD]; 12023 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 12024 PtrRescalingFactor = CGM.getContext() 12025 .getTypeSizeInChars(P->getPointeeType()) 12026 .getQuantity(); 12027 } 12028 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12029 ParamAttr.Kind = Linear; 12030 // Assuming a stride of 1, for `linear` without modifiers. 12031 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12032 if (*SI) { 12033 Expr::EvalResult Result; 12034 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12035 if (const auto *DRE = 12036 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12037 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 12038 ParamAttr.Kind = LinearWithVarStride; 12039 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 12040 ParamPositions[StridePVD->getCanonicalDecl()]); 12041 } 12042 } 12043 } else { 12044 ParamAttr.StrideOrArg = Result.Val.getInt(); 12045 } 12046 } 12047 // If we are using a linear clause on a pointer, we need to 12048 // rescale the value of linear_step with the byte size of the 12049 // pointee type. 12050 if (Linear == ParamAttr.Kind) 12051 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12052 ++SI; 12053 ++MI; 12054 } 12055 llvm::APSInt VLENVal; 12056 SourceLocation ExprLoc; 12057 const Expr *VLENExpr = Attr->getSimdlen(); 12058 if (VLENExpr) { 12059 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12060 ExprLoc = VLENExpr->getExprLoc(); 12061 } 12062 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12063 if (CGM.getTriple().isX86()) { 12064 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12065 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12066 unsigned VLEN = VLENVal.getExtValue(); 12067 StringRef MangledName = Fn->getName(); 12068 if (CGM.getTarget().hasFeature("sve")) 12069 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12070 MangledName, 's', 128, Fn, ExprLoc); 12071 if (CGM.getTarget().hasFeature("neon")) 12072 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12073 MangledName, 'n', 128, Fn, ExprLoc); 12074 } 12075 } 12076 FD = FD->getPreviousDecl(); 12077 } 12078 } 12079 12080 namespace { 12081 /// Cleanup action for doacross support. 12082 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12083 public: 12084 static const int DoacrossFinArgs = 2; 12085 12086 private: 12087 llvm::FunctionCallee RTLFn; 12088 llvm::Value *Args[DoacrossFinArgs]; 12089 12090 public: 12091 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12092 ArrayRef<llvm::Value *> CallArgs) 12093 : RTLFn(RTLFn) { 12094 assert(CallArgs.size() == DoacrossFinArgs); 12095 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12096 } 12097 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12098 if (!CGF.HaveInsertPoint()) 12099 return; 12100 CGF.EmitRuntimeCall(RTLFn, Args); 12101 } 12102 }; 12103 } // namespace 12104 12105 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12106 const OMPLoopDirective &D, 12107 ArrayRef<Expr *> NumIterations) { 12108 if (!CGF.HaveInsertPoint()) 12109 return; 12110 12111 ASTContext &C = CGM.getContext(); 12112 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12113 RecordDecl *RD; 12114 if (KmpDimTy.isNull()) { 12115 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12116 // kmp_int64 lo; // lower 12117 // kmp_int64 up; // upper 12118 // kmp_int64 st; // stride 12119 // }; 12120 RD = C.buildImplicitRecord("kmp_dim"); 12121 RD->startDefinition(); 12122 addFieldToRecordDecl(C, RD, Int64Ty); 12123 addFieldToRecordDecl(C, RD, Int64Ty); 12124 addFieldToRecordDecl(C, RD, Int64Ty); 12125 RD->completeDefinition(); 12126 KmpDimTy = C.getRecordType(RD); 12127 } else { 12128 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12129 } 12130 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12131 QualType ArrayTy = 12132 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12133 12134 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12135 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12136 enum { LowerFD = 0, UpperFD, StrideFD }; 12137 // Fill dims with data. 12138 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12139 LValue DimsLVal = CGF.MakeAddrLValue( 12140 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12141 // dims.upper = num_iterations; 12142 LValue UpperLVal = CGF.EmitLValueForField( 12143 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12144 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12145 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12146 Int64Ty, NumIterations[I]->getExprLoc()); 12147 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12148 // dims.stride = 1; 12149 LValue StrideLVal = CGF.EmitLValueForField( 12150 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12151 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12152 StrideLVal); 12153 } 12154 12155 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12156 // kmp_int32 num_dims, struct kmp_dim * dims); 12157 llvm::Value *Args[] = { 12158 emitUpdateLocation(CGF, D.getBeginLoc()), 12159 getThreadID(CGF, D.getBeginLoc()), 12160 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12161 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12162 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12163 CGM.VoidPtrTy)}; 12164 12165 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12166 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12167 CGF.EmitRuntimeCall(RTLFn, Args); 12168 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12169 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12170 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12171 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12172 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12173 llvm::makeArrayRef(FiniArgs)); 12174 } 12175 12176 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12177 const OMPDependClause *C) { 12178 QualType Int64Ty = 12179 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12180 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12181 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12182 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12183 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12184 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12185 const Expr *CounterVal = C->getLoopData(I); 12186 assert(CounterVal); 12187 llvm::Value *CntVal = CGF.EmitScalarConversion( 12188 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12189 CounterVal->getExprLoc()); 12190 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12191 /*Volatile=*/false, Int64Ty); 12192 } 12193 llvm::Value *Args[] = { 12194 emitUpdateLocation(CGF, C->getBeginLoc()), 12195 getThreadID(CGF, C->getBeginLoc()), 12196 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12197 llvm::FunctionCallee RTLFn; 12198 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12199 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12200 OMPRTL___kmpc_doacross_post); 12201 } else { 12202 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12203 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12204 OMPRTL___kmpc_doacross_wait); 12205 } 12206 CGF.EmitRuntimeCall(RTLFn, Args); 12207 } 12208 12209 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12210 llvm::FunctionCallee Callee, 12211 ArrayRef<llvm::Value *> Args) const { 12212 assert(Loc.isValid() && "Outlined function call location must be valid."); 12213 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12214 12215 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12216 if (Fn->doesNotThrow()) { 12217 CGF.EmitNounwindRuntimeCall(Fn, Args); 12218 return; 12219 } 12220 } 12221 CGF.EmitRuntimeCall(Callee, Args); 12222 } 12223 12224 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12225 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12226 ArrayRef<llvm::Value *> Args) const { 12227 emitCall(CGF, Loc, OutlinedFn, Args); 12228 } 12229 12230 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12231 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12232 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12233 HasEmittedDeclareTargetRegion = true; 12234 } 12235 12236 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12237 const VarDecl *NativeParam, 12238 const VarDecl *TargetParam) const { 12239 return CGF.GetAddrOfLocalVar(NativeParam); 12240 } 12241 12242 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12243 const VarDecl *VD) { 12244 if (!VD) 12245 return Address::invalid(); 12246 Address UntiedAddr = Address::invalid(); 12247 Address UntiedRealAddr = Address::invalid(); 12248 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12249 if (It != FunctionToUntiedTaskStackMap.end()) { 12250 const UntiedLocalVarsAddressesMap &UntiedData = 12251 UntiedLocalVarsStack[It->second]; 12252 auto I = UntiedData.find(VD); 12253 if (I != UntiedData.end()) { 12254 UntiedAddr = I->second.first; 12255 UntiedRealAddr = I->second.second; 12256 } 12257 } 12258 const VarDecl *CVD = VD->getCanonicalDecl(); 12259 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12260 // Use the default allocation. 12261 if (!isAllocatableDecl(VD)) 12262 return UntiedAddr; 12263 llvm::Value *Size; 12264 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12265 if (CVD->getType()->isVariablyModifiedType()) { 12266 Size = CGF.getTypeSize(CVD->getType()); 12267 // Align the size: ((size + align - 1) / align) * align 12268 Size = CGF.Builder.CreateNUWAdd( 12269 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12270 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12271 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12272 } else { 12273 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12274 Size = CGM.getSize(Sz.alignTo(Align)); 12275 } 12276 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12277 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12278 assert(AA->getAllocator() && 12279 "Expected allocator expression for non-default allocator."); 12280 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 12281 // According to the standard, the original allocator type is a enum 12282 // (integer). Convert to pointer type, if required. 12283 Allocator = CGF.EmitScalarConversion( 12284 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 12285 AA->getAllocator()->getExprLoc()); 12286 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 12287 12288 llvm::Value *Addr = 12289 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 12290 CGM.getModule(), OMPRTL___kmpc_alloc), 12291 Args, getName({CVD->getName(), ".void.addr"})); 12292 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12293 CGM.getModule(), OMPRTL___kmpc_free); 12294 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12295 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12296 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12297 if (UntiedAddr.isValid()) 12298 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12299 12300 // Cleanup action for allocate support. 12301 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12302 llvm::FunctionCallee RTLFn; 12303 SourceLocation::UIntTy LocEncoding; 12304 Address Addr; 12305 const Expr *Allocator; 12306 12307 public: 12308 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12309 SourceLocation::UIntTy LocEncoding, Address Addr, 12310 const Expr *Allocator) 12311 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12312 Allocator(Allocator) {} 12313 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12314 if (!CGF.HaveInsertPoint()) 12315 return; 12316 llvm::Value *Args[3]; 12317 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12318 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12319 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12320 Addr.getPointer(), CGF.VoidPtrTy); 12321 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 12322 // According to the standard, the original allocator type is a enum 12323 // (integer). Convert to pointer type, if required. 12324 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12325 CGF.getContext().VoidPtrTy, 12326 Allocator->getExprLoc()); 12327 Args[2] = AllocVal; 12328 12329 CGF.EmitRuntimeCall(RTLFn, Args); 12330 } 12331 }; 12332 Address VDAddr = 12333 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12334 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12335 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12336 VDAddr, AA->getAllocator()); 12337 if (UntiedRealAddr.isValid()) 12338 if (auto *Region = 12339 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12340 Region->emitUntiedSwitch(CGF); 12341 return VDAddr; 12342 } 12343 return UntiedAddr; 12344 } 12345 12346 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12347 const VarDecl *VD) const { 12348 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12349 if (It == FunctionToUntiedTaskStackMap.end()) 12350 return false; 12351 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12352 } 12353 12354 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12355 CodeGenModule &CGM, const OMPLoopDirective &S) 12356 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12357 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12358 if (!NeedToPush) 12359 return; 12360 NontemporalDeclsSet &DS = 12361 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12362 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12363 for (const Stmt *Ref : C->private_refs()) { 12364 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12365 const ValueDecl *VD; 12366 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12367 VD = DRE->getDecl(); 12368 } else { 12369 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12370 assert((ME->isImplicitCXXThis() || 12371 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12372 "Expected member of current class."); 12373 VD = ME->getMemberDecl(); 12374 } 12375 DS.insert(VD); 12376 } 12377 } 12378 } 12379 12380 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12381 if (!NeedToPush) 12382 return; 12383 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12384 } 12385 12386 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12387 CodeGenFunction &CGF, 12388 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12389 std::pair<Address, Address>> &LocalVars) 12390 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12391 if (!NeedToPush) 12392 return; 12393 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12394 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12395 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12396 } 12397 12398 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12399 if (!NeedToPush) 12400 return; 12401 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12402 } 12403 12404 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12405 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12406 12407 return llvm::any_of( 12408 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12409 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12410 } 12411 12412 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12413 const OMPExecutableDirective &S, 12414 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12415 const { 12416 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12417 // Vars in target/task regions must be excluded completely. 12418 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12419 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12420 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12421 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12422 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12423 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12424 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12425 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12426 } 12427 } 12428 // Exclude vars in private clauses. 12429 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12430 for (const Expr *Ref : C->varlists()) { 12431 if (!Ref->getType()->isScalarType()) 12432 continue; 12433 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12434 if (!DRE) 12435 continue; 12436 NeedToCheckForLPCs.insert(DRE->getDecl()); 12437 } 12438 } 12439 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12440 for (const Expr *Ref : C->varlists()) { 12441 if (!Ref->getType()->isScalarType()) 12442 continue; 12443 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12444 if (!DRE) 12445 continue; 12446 NeedToCheckForLPCs.insert(DRE->getDecl()); 12447 } 12448 } 12449 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12450 for (const Expr *Ref : C->varlists()) { 12451 if (!Ref->getType()->isScalarType()) 12452 continue; 12453 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12454 if (!DRE) 12455 continue; 12456 NeedToCheckForLPCs.insert(DRE->getDecl()); 12457 } 12458 } 12459 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12460 for (const Expr *Ref : C->varlists()) { 12461 if (!Ref->getType()->isScalarType()) 12462 continue; 12463 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12464 if (!DRE) 12465 continue; 12466 NeedToCheckForLPCs.insert(DRE->getDecl()); 12467 } 12468 } 12469 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12470 for (const Expr *Ref : C->varlists()) { 12471 if (!Ref->getType()->isScalarType()) 12472 continue; 12473 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12474 if (!DRE) 12475 continue; 12476 NeedToCheckForLPCs.insert(DRE->getDecl()); 12477 } 12478 } 12479 for (const Decl *VD : NeedToCheckForLPCs) { 12480 for (const LastprivateConditionalData &Data : 12481 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12482 if (Data.DeclToUniqueName.count(VD) > 0) { 12483 if (!Data.Disabled) 12484 NeedToAddForLPCsAsDisabled.insert(VD); 12485 break; 12486 } 12487 } 12488 } 12489 } 12490 12491 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12492 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12493 : CGM(CGF.CGM), 12494 Action((CGM.getLangOpts().OpenMP >= 50 && 12495 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12496 [](const OMPLastprivateClause *C) { 12497 return C->getKind() == 12498 OMPC_LASTPRIVATE_conditional; 12499 })) 12500 ? ActionToDo::PushAsLastprivateConditional 12501 : ActionToDo::DoNotPush) { 12502 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12503 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12504 return; 12505 assert(Action == ActionToDo::PushAsLastprivateConditional && 12506 "Expected a push action."); 12507 LastprivateConditionalData &Data = 12508 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12509 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12510 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12511 continue; 12512 12513 for (const Expr *Ref : C->varlists()) { 12514 Data.DeclToUniqueName.insert(std::make_pair( 12515 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12516 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12517 } 12518 } 12519 Data.IVLVal = IVLVal; 12520 Data.Fn = CGF.CurFn; 12521 } 12522 12523 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12524 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12525 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12526 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12527 if (CGM.getLangOpts().OpenMP < 50) 12528 return; 12529 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12530 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12531 if (!NeedToAddForLPCsAsDisabled.empty()) { 12532 Action = ActionToDo::DisableLastprivateConditional; 12533 LastprivateConditionalData &Data = 12534 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12535 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12536 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12537 Data.Fn = CGF.CurFn; 12538 Data.Disabled = true; 12539 } 12540 } 12541 12542 CGOpenMPRuntime::LastprivateConditionalRAII 12543 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12544 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12545 return LastprivateConditionalRAII(CGF, S); 12546 } 12547 12548 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12549 if (CGM.getLangOpts().OpenMP < 50) 12550 return; 12551 if (Action == ActionToDo::DisableLastprivateConditional) { 12552 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12553 "Expected list of disabled private vars."); 12554 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12555 } 12556 if (Action == ActionToDo::PushAsLastprivateConditional) { 12557 assert( 12558 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12559 "Expected list of lastprivate conditional vars."); 12560 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12561 } 12562 } 12563 12564 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12565 const VarDecl *VD) { 12566 ASTContext &C = CGM.getContext(); 12567 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12568 if (I == LastprivateConditionalToTypes.end()) 12569 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12570 QualType NewType; 12571 const FieldDecl *VDField; 12572 const FieldDecl *FiredField; 12573 LValue BaseLVal; 12574 auto VI = I->getSecond().find(VD); 12575 if (VI == I->getSecond().end()) { 12576 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12577 RD->startDefinition(); 12578 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12579 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12580 RD->completeDefinition(); 12581 NewType = C.getRecordType(RD); 12582 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12583 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12584 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12585 } else { 12586 NewType = std::get<0>(VI->getSecond()); 12587 VDField = std::get<1>(VI->getSecond()); 12588 FiredField = std::get<2>(VI->getSecond()); 12589 BaseLVal = std::get<3>(VI->getSecond()); 12590 } 12591 LValue FiredLVal = 12592 CGF.EmitLValueForField(BaseLVal, FiredField); 12593 CGF.EmitStoreOfScalar( 12594 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12595 FiredLVal); 12596 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12597 } 12598 12599 namespace { 12600 /// Checks if the lastprivate conditional variable is referenced in LHS. 12601 class LastprivateConditionalRefChecker final 12602 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12603 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12604 const Expr *FoundE = nullptr; 12605 const Decl *FoundD = nullptr; 12606 StringRef UniqueDeclName; 12607 LValue IVLVal; 12608 llvm::Function *FoundFn = nullptr; 12609 SourceLocation Loc; 12610 12611 public: 12612 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12613 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12614 llvm::reverse(LPM)) { 12615 auto It = D.DeclToUniqueName.find(E->getDecl()); 12616 if (It == D.DeclToUniqueName.end()) 12617 continue; 12618 if (D.Disabled) 12619 return false; 12620 FoundE = E; 12621 FoundD = E->getDecl()->getCanonicalDecl(); 12622 UniqueDeclName = It->second; 12623 IVLVal = D.IVLVal; 12624 FoundFn = D.Fn; 12625 break; 12626 } 12627 return FoundE == E; 12628 } 12629 bool VisitMemberExpr(const MemberExpr *E) { 12630 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12631 return false; 12632 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12633 llvm::reverse(LPM)) { 12634 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12635 if (It == D.DeclToUniqueName.end()) 12636 continue; 12637 if (D.Disabled) 12638 return false; 12639 FoundE = E; 12640 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12641 UniqueDeclName = It->second; 12642 IVLVal = D.IVLVal; 12643 FoundFn = D.Fn; 12644 break; 12645 } 12646 return FoundE == E; 12647 } 12648 bool VisitStmt(const Stmt *S) { 12649 for (const Stmt *Child : S->children()) { 12650 if (!Child) 12651 continue; 12652 if (const auto *E = dyn_cast<Expr>(Child)) 12653 if (!E->isGLValue()) 12654 continue; 12655 if (Visit(Child)) 12656 return true; 12657 } 12658 return false; 12659 } 12660 explicit LastprivateConditionalRefChecker( 12661 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12662 : LPM(LPM) {} 12663 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12664 getFoundData() const { 12665 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12666 } 12667 }; 12668 } // namespace 12669 12670 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12671 LValue IVLVal, 12672 StringRef UniqueDeclName, 12673 LValue LVal, 12674 SourceLocation Loc) { 12675 // Last updated loop counter for the lastprivate conditional var. 12676 // int<xx> last_iv = 0; 12677 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12678 llvm::Constant *LastIV = 12679 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12680 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12681 IVLVal.getAlignment().getAsAlign()); 12682 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12683 12684 // Last value of the lastprivate conditional. 12685 // decltype(priv_a) last_a; 12686 llvm::Constant *Last = getOrCreateInternalVariable( 12687 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12688 cast<llvm::GlobalVariable>(Last)->setAlignment( 12689 LVal.getAlignment().getAsAlign()); 12690 LValue LastLVal = 12691 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12692 12693 // Global loop counter. Required to handle inner parallel-for regions. 12694 // iv 12695 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12696 12697 // #pragma omp critical(a) 12698 // if (last_iv <= iv) { 12699 // last_iv = iv; 12700 // last_a = priv_a; 12701 // } 12702 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12703 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12704 Action.Enter(CGF); 12705 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12706 // (last_iv <= iv) ? Check if the variable is updated and store new 12707 // value in global var. 12708 llvm::Value *CmpRes; 12709 if (IVLVal.getType()->isSignedIntegerType()) { 12710 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12711 } else { 12712 assert(IVLVal.getType()->isUnsignedIntegerType() && 12713 "Loop iteration variable must be integer."); 12714 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12715 } 12716 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12717 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12718 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12719 // { 12720 CGF.EmitBlock(ThenBB); 12721 12722 // last_iv = iv; 12723 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12724 12725 // last_a = priv_a; 12726 switch (CGF.getEvaluationKind(LVal.getType())) { 12727 case TEK_Scalar: { 12728 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12729 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12730 break; 12731 } 12732 case TEK_Complex: { 12733 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12734 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12735 break; 12736 } 12737 case TEK_Aggregate: 12738 llvm_unreachable( 12739 "Aggregates are not supported in lastprivate conditional."); 12740 } 12741 // } 12742 CGF.EmitBranch(ExitBB); 12743 // There is no need to emit line number for unconditional branch. 12744 (void)ApplyDebugLocation::CreateEmpty(CGF); 12745 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12746 }; 12747 12748 if (CGM.getLangOpts().OpenMPSimd) { 12749 // Do not emit as a critical region as no parallel region could be emitted. 12750 RegionCodeGenTy ThenRCG(CodeGen); 12751 ThenRCG(CGF); 12752 } else { 12753 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12754 } 12755 } 12756 12757 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12758 const Expr *LHS) { 12759 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12760 return; 12761 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12762 if (!Checker.Visit(LHS)) 12763 return; 12764 const Expr *FoundE; 12765 const Decl *FoundD; 12766 StringRef UniqueDeclName; 12767 LValue IVLVal; 12768 llvm::Function *FoundFn; 12769 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12770 Checker.getFoundData(); 12771 if (FoundFn != CGF.CurFn) { 12772 // Special codegen for inner parallel regions. 12773 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12774 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12775 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12776 "Lastprivate conditional is not found in outer region."); 12777 QualType StructTy = std::get<0>(It->getSecond()); 12778 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12779 LValue PrivLVal = CGF.EmitLValue(FoundE); 12780 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12781 PrivLVal.getAddress(CGF), 12782 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12783 LValue BaseLVal = 12784 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12785 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12786 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12787 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12788 FiredLVal, llvm::AtomicOrdering::Unordered, 12789 /*IsVolatile=*/true, /*isInit=*/false); 12790 return; 12791 } 12792 12793 // Private address of the lastprivate conditional in the current context. 12794 // priv_a 12795 LValue LVal = CGF.EmitLValue(FoundE); 12796 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12797 FoundE->getExprLoc()); 12798 } 12799 12800 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12801 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12802 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12803 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12804 return; 12805 auto Range = llvm::reverse(LastprivateConditionalStack); 12806 auto It = llvm::find_if( 12807 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12808 if (It == Range.end() || It->Fn != CGF.CurFn) 12809 return; 12810 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12811 assert(LPCI != LastprivateConditionalToTypes.end() && 12812 "Lastprivates must be registered already."); 12813 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12814 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12815 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12816 for (const auto &Pair : It->DeclToUniqueName) { 12817 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12818 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12819 continue; 12820 auto I = LPCI->getSecond().find(Pair.first); 12821 assert(I != LPCI->getSecond().end() && 12822 "Lastprivate must be rehistered already."); 12823 // bool Cmp = priv_a.Fired != 0; 12824 LValue BaseLVal = std::get<3>(I->getSecond()); 12825 LValue FiredLVal = 12826 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12827 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12828 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12829 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12830 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12831 // if (Cmp) { 12832 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12833 CGF.EmitBlock(ThenBB); 12834 Address Addr = CGF.GetAddrOfLocalVar(VD); 12835 LValue LVal; 12836 if (VD->getType()->isReferenceType()) 12837 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12838 AlignmentSource::Decl); 12839 else 12840 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12841 AlignmentSource::Decl); 12842 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12843 D.getBeginLoc()); 12844 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12845 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12846 // } 12847 } 12848 } 12849 12850 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12851 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12852 SourceLocation Loc) { 12853 if (CGF.getLangOpts().OpenMP < 50) 12854 return; 12855 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12856 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12857 "Unknown lastprivate conditional variable."); 12858 StringRef UniqueName = It->second; 12859 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12860 // The variable was not updated in the region - exit. 12861 if (!GV) 12862 return; 12863 LValue LPLVal = CGF.MakeAddrLValue( 12864 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12865 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12866 CGF.EmitStoreOfScalar(Res, PrivLVal); 12867 } 12868 12869 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12870 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12871 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12872 llvm_unreachable("Not supported in SIMD-only mode"); 12873 } 12874 12875 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12876 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12877 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12878 llvm_unreachable("Not supported in SIMD-only mode"); 12879 } 12880 12881 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12882 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12883 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12884 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12885 bool Tied, unsigned &NumberOfParts) { 12886 llvm_unreachable("Not supported in SIMD-only mode"); 12887 } 12888 12889 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12890 SourceLocation Loc, 12891 llvm::Function *OutlinedFn, 12892 ArrayRef<llvm::Value *> CapturedVars, 12893 const Expr *IfCond) { 12894 llvm_unreachable("Not supported in SIMD-only mode"); 12895 } 12896 12897 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12898 CodeGenFunction &CGF, StringRef CriticalName, 12899 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12900 const Expr *Hint) { 12901 llvm_unreachable("Not supported in SIMD-only mode"); 12902 } 12903 12904 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12905 const RegionCodeGenTy &MasterOpGen, 12906 SourceLocation Loc) { 12907 llvm_unreachable("Not supported in SIMD-only mode"); 12908 } 12909 12910 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12911 const RegionCodeGenTy &MasterOpGen, 12912 SourceLocation Loc, 12913 const Expr *Filter) { 12914 llvm_unreachable("Not supported in SIMD-only mode"); 12915 } 12916 12917 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12918 SourceLocation Loc) { 12919 llvm_unreachable("Not supported in SIMD-only mode"); 12920 } 12921 12922 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12923 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12924 SourceLocation Loc) { 12925 llvm_unreachable("Not supported in SIMD-only mode"); 12926 } 12927 12928 void CGOpenMPSIMDRuntime::emitSingleRegion( 12929 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12930 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12931 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12932 ArrayRef<const Expr *> AssignmentOps) { 12933 llvm_unreachable("Not supported in SIMD-only mode"); 12934 } 12935 12936 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12937 const RegionCodeGenTy &OrderedOpGen, 12938 SourceLocation Loc, 12939 bool IsThreads) { 12940 llvm_unreachable("Not supported in SIMD-only mode"); 12941 } 12942 12943 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12944 SourceLocation Loc, 12945 OpenMPDirectiveKind Kind, 12946 bool EmitChecks, 12947 bool ForceSimpleCall) { 12948 llvm_unreachable("Not supported in SIMD-only mode"); 12949 } 12950 12951 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12952 CodeGenFunction &CGF, SourceLocation Loc, 12953 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12954 bool Ordered, const DispatchRTInput &DispatchValues) { 12955 llvm_unreachable("Not supported in SIMD-only mode"); 12956 } 12957 12958 void CGOpenMPSIMDRuntime::emitForStaticInit( 12959 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12960 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12961 llvm_unreachable("Not supported in SIMD-only mode"); 12962 } 12963 12964 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12965 CodeGenFunction &CGF, SourceLocation Loc, 12966 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12967 llvm_unreachable("Not supported in SIMD-only mode"); 12968 } 12969 12970 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12971 SourceLocation Loc, 12972 unsigned IVSize, 12973 bool IVSigned) { 12974 llvm_unreachable("Not supported in SIMD-only mode"); 12975 } 12976 12977 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12978 SourceLocation Loc, 12979 OpenMPDirectiveKind DKind) { 12980 llvm_unreachable("Not supported in SIMD-only mode"); 12981 } 12982 12983 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12984 SourceLocation Loc, 12985 unsigned IVSize, bool IVSigned, 12986 Address IL, Address LB, 12987 Address UB, Address ST) { 12988 llvm_unreachable("Not supported in SIMD-only mode"); 12989 } 12990 12991 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12992 llvm::Value *NumThreads, 12993 SourceLocation Loc) { 12994 llvm_unreachable("Not supported in SIMD-only mode"); 12995 } 12996 12997 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12998 ProcBindKind ProcBind, 12999 SourceLocation Loc) { 13000 llvm_unreachable("Not supported in SIMD-only mode"); 13001 } 13002 13003 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 13004 const VarDecl *VD, 13005 Address VDAddr, 13006 SourceLocation Loc) { 13007 llvm_unreachable("Not supported in SIMD-only mode"); 13008 } 13009 13010 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 13011 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 13012 CodeGenFunction *CGF) { 13013 llvm_unreachable("Not supported in SIMD-only mode"); 13014 } 13015 13016 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 13017 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 13018 llvm_unreachable("Not supported in SIMD-only mode"); 13019 } 13020 13021 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13022 ArrayRef<const Expr *> Vars, 13023 SourceLocation Loc, 13024 llvm::AtomicOrdering AO) { 13025 llvm_unreachable("Not supported in SIMD-only mode"); 13026 } 13027 13028 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13029 const OMPExecutableDirective &D, 13030 llvm::Function *TaskFunction, 13031 QualType SharedsTy, Address Shareds, 13032 const Expr *IfCond, 13033 const OMPTaskDataTy &Data) { 13034 llvm_unreachable("Not supported in SIMD-only mode"); 13035 } 13036 13037 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13038 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13039 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13040 const Expr *IfCond, const OMPTaskDataTy &Data) { 13041 llvm_unreachable("Not supported in SIMD-only mode"); 13042 } 13043 13044 void CGOpenMPSIMDRuntime::emitReduction( 13045 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13046 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13047 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13048 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13049 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13050 ReductionOps, Options); 13051 } 13052 13053 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13054 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13055 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13056 llvm_unreachable("Not supported in SIMD-only mode"); 13057 } 13058 13059 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13060 SourceLocation Loc, 13061 bool IsWorksharingReduction) { 13062 llvm_unreachable("Not supported in SIMD-only mode"); 13063 } 13064 13065 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13066 SourceLocation Loc, 13067 ReductionCodeGen &RCG, 13068 unsigned N) { 13069 llvm_unreachable("Not supported in SIMD-only mode"); 13070 } 13071 13072 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13073 SourceLocation Loc, 13074 llvm::Value *ReductionsPtr, 13075 LValue SharedLVal) { 13076 llvm_unreachable("Not supported in SIMD-only mode"); 13077 } 13078 13079 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13080 SourceLocation Loc, 13081 const OMPTaskDataTy &Data) { 13082 llvm_unreachable("Not supported in SIMD-only mode"); 13083 } 13084 13085 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13086 CodeGenFunction &CGF, SourceLocation Loc, 13087 OpenMPDirectiveKind CancelRegion) { 13088 llvm_unreachable("Not supported in SIMD-only mode"); 13089 } 13090 13091 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13092 SourceLocation Loc, const Expr *IfCond, 13093 OpenMPDirectiveKind CancelRegion) { 13094 llvm_unreachable("Not supported in SIMD-only mode"); 13095 } 13096 13097 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13098 const OMPExecutableDirective &D, StringRef ParentName, 13099 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13100 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13101 llvm_unreachable("Not supported in SIMD-only mode"); 13102 } 13103 13104 void CGOpenMPSIMDRuntime::emitTargetCall( 13105 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13106 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13107 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13108 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13109 const OMPLoopDirective &D)> 13110 SizeEmitter) { 13111 llvm_unreachable("Not supported in SIMD-only mode"); 13112 } 13113 13114 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13115 llvm_unreachable("Not supported in SIMD-only mode"); 13116 } 13117 13118 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13119 llvm_unreachable("Not supported in SIMD-only mode"); 13120 } 13121 13122 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13123 return false; 13124 } 13125 13126 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13127 const OMPExecutableDirective &D, 13128 SourceLocation Loc, 13129 llvm::Function *OutlinedFn, 13130 ArrayRef<llvm::Value *> CapturedVars) { 13131 llvm_unreachable("Not supported in SIMD-only mode"); 13132 } 13133 13134 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13135 const Expr *NumTeams, 13136 const Expr *ThreadLimit, 13137 SourceLocation Loc) { 13138 llvm_unreachable("Not supported in SIMD-only mode"); 13139 } 13140 13141 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13142 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13143 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13144 llvm_unreachable("Not supported in SIMD-only mode"); 13145 } 13146 13147 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13148 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13149 const Expr *Device) { 13150 llvm_unreachable("Not supported in SIMD-only mode"); 13151 } 13152 13153 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13154 const OMPLoopDirective &D, 13155 ArrayRef<Expr *> NumIterations) { 13156 llvm_unreachable("Not supported in SIMD-only mode"); 13157 } 13158 13159 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13160 const OMPDependClause *C) { 13161 llvm_unreachable("Not supported in SIMD-only mode"); 13162 } 13163 13164 const VarDecl * 13165 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13166 const VarDecl *NativeParam) const { 13167 llvm_unreachable("Not supported in SIMD-only mode"); 13168 } 13169 13170 Address 13171 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13172 const VarDecl *NativeParam, 13173 const VarDecl *TargetParam) const { 13174 llvm_unreachable("Not supported in SIMD-only mode"); 13175 } 13176