1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/APValue.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/Decl.h" 21 #include "clang/AST/OpenMPClause.h" 22 #include "clang/AST/StmtOpenMP.h" 23 #include "clang/AST/StmtVisitor.h" 24 #include "clang/Basic/BitmaskEnum.h" 25 #include "clang/Basic/FileManager.h" 26 #include "clang/Basic/OpenMPKinds.h" 27 #include "clang/Basic/SourceManager.h" 28 #include "clang/CodeGen/ConstantInitBuilder.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/SetOperations.h" 31 #include "llvm/ADT/StringExtras.h" 32 #include "llvm/Bitcode/BitcodeReader.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 bool NoInheritance = false; 414 415 public: 416 /// Constructs region for combined constructs. 417 /// \param CodeGen Code generation sequence for combined directives. Includes 418 /// a list of functions used for code generation of implicitly inlined 419 /// regions. 420 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 421 OpenMPDirectiveKind Kind, bool HasCancel, 422 bool NoInheritance = true) 423 : CGF(CGF), NoInheritance(NoInheritance) { 424 // Start emission for the construct. 425 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 426 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 427 if (NoInheritance) { 428 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 429 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 430 CGF.LambdaThisCaptureField = nullptr; 431 BlockInfo = CGF.BlockInfo; 432 CGF.BlockInfo = nullptr; 433 } 434 } 435 436 ~InlinedOpenMPRegionRAII() { 437 // Restore original CapturedStmtInfo only if we're done with code emission. 438 auto *OldCSI = 439 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 440 delete CGF.CapturedStmtInfo; 441 CGF.CapturedStmtInfo = OldCSI; 442 if (NoInheritance) { 443 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 444 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 445 CGF.BlockInfo = BlockInfo; 446 } 447 } 448 }; 449 450 /// Values for bit flags used in the ident_t to describe the fields. 451 /// All enumeric elements are named and described in accordance with the code 452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 453 enum OpenMPLocationFlags : unsigned { 454 /// Use trampoline for internal microtask. 455 OMP_IDENT_IMD = 0x01, 456 /// Use c-style ident structure. 457 OMP_IDENT_KMPC = 0x02, 458 /// Atomic reduction option for kmpc_reduce. 459 OMP_ATOMIC_REDUCE = 0x10, 460 /// Explicit 'barrier' directive. 461 OMP_IDENT_BARRIER_EXPL = 0x20, 462 /// Implicit barrier in code. 463 OMP_IDENT_BARRIER_IMPL = 0x40, 464 /// Implicit barrier in 'for' directive. 465 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 466 /// Implicit barrier in 'sections' directive. 467 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 468 /// Implicit barrier in 'single' directive. 469 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 470 /// Call of __kmp_for_static_init for static loop. 471 OMP_IDENT_WORK_LOOP = 0x200, 472 /// Call of __kmp_for_static_init for sections. 473 OMP_IDENT_WORK_SECTIONS = 0x400, 474 /// Call of __kmp_for_static_init for distribute. 475 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 477 }; 478 479 namespace { 480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 481 /// Values for bit flags for marking which requires clauses have been used. 482 enum OpenMPOffloadingRequiresDirFlags : int64_t { 483 /// flag undefined. 484 OMP_REQ_UNDEFINED = 0x000, 485 /// no requires clause present. 486 OMP_REQ_NONE = 0x001, 487 /// reverse_offload clause. 488 OMP_REQ_REVERSE_OFFLOAD = 0x002, 489 /// unified_address clause. 490 OMP_REQ_UNIFIED_ADDRESS = 0x004, 491 /// unified_shared_memory clause. 492 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 493 /// dynamic_allocators clause. 494 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 495 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 496 }; 497 498 enum OpenMPOffloadingReservedDeviceIDs { 499 /// Device ID if the device was not defined, runtime should get it 500 /// from environment variables in the spec. 501 OMP_DEVICEID_UNDEF = -1, 502 }; 503 } // anonymous namespace 504 505 /// Describes ident structure that describes a source location. 506 /// All descriptions are taken from 507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 508 /// Original structure: 509 /// typedef struct ident { 510 /// kmp_int32 reserved_1; /**< might be used in Fortran; 511 /// see above */ 512 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 513 /// KMP_IDENT_KMPC identifies this union 514 /// member */ 515 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 516 /// see above */ 517 ///#if USE_ITT_BUILD 518 /// /* but currently used for storing 519 /// region-specific ITT */ 520 /// /* contextual information. */ 521 ///#endif /* USE_ITT_BUILD */ 522 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 523 /// C++ */ 524 /// char const *psource; /**< String describing the source location. 525 /// The string is composed of semi-colon separated 526 // fields which describe the source file, 527 /// the function and a pair of line numbers that 528 /// delimit the construct. 529 /// */ 530 /// } ident_t; 531 enum IdentFieldIndex { 532 /// might be used in Fortran 533 IdentField_Reserved_1, 534 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 535 IdentField_Flags, 536 /// Not really used in Fortran any more 537 IdentField_Reserved_2, 538 /// Source[4] in Fortran, do not use for C++ 539 IdentField_Reserved_3, 540 /// String describing the source location. The string is composed of 541 /// semi-colon separated fields which describe the source file, the function 542 /// and a pair of line numbers that delimit the construct. 543 IdentField_PSource 544 }; 545 546 /// Schedule types for 'omp for' loops (these enumerators are taken from 547 /// the enum sched_type in kmp.h). 548 enum OpenMPSchedType { 549 /// Lower bound for default (unordered) versions. 550 OMP_sch_lower = 32, 551 OMP_sch_static_chunked = 33, 552 OMP_sch_static = 34, 553 OMP_sch_dynamic_chunked = 35, 554 OMP_sch_guided_chunked = 36, 555 OMP_sch_runtime = 37, 556 OMP_sch_auto = 38, 557 /// static with chunk adjustment (e.g., simd) 558 OMP_sch_static_balanced_chunked = 45, 559 /// Lower bound for 'ordered' versions. 560 OMP_ord_lower = 64, 561 OMP_ord_static_chunked = 65, 562 OMP_ord_static = 66, 563 OMP_ord_dynamic_chunked = 67, 564 OMP_ord_guided_chunked = 68, 565 OMP_ord_runtime = 69, 566 OMP_ord_auto = 70, 567 OMP_sch_default = OMP_sch_static, 568 /// dist_schedule types 569 OMP_dist_sch_static_chunked = 91, 570 OMP_dist_sch_static = 92, 571 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 572 /// Set if the monotonic schedule modifier was present. 573 OMP_sch_modifier_monotonic = (1 << 29), 574 /// Set if the nonmonotonic schedule modifier was present. 575 OMP_sch_modifier_nonmonotonic = (1 << 30), 576 }; 577 578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 579 /// region. 580 class CleanupTy final : public EHScopeStack::Cleanup { 581 PrePostActionTy *Action; 582 583 public: 584 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 585 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 586 if (!CGF.HaveInsertPoint()) 587 return; 588 Action->Exit(CGF); 589 } 590 }; 591 592 } // anonymous namespace 593 594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 595 CodeGenFunction::RunCleanupsScope Scope(CGF); 596 if (PrePostAction) { 597 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 598 Callback(CodeGen, CGF, *PrePostAction); 599 } else { 600 PrePostActionTy Action; 601 Callback(CodeGen, CGF, Action); 602 } 603 } 604 605 /// Check if the combiner is a call to UDR combiner and if it is so return the 606 /// UDR decl used for reduction. 607 static const OMPDeclareReductionDecl * 608 getReductionInit(const Expr *ReductionOp) { 609 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 610 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 611 if (const auto *DRE = 612 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 613 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 614 return DRD; 615 return nullptr; 616 } 617 618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 619 const OMPDeclareReductionDecl *DRD, 620 const Expr *InitOp, 621 Address Private, Address Original, 622 QualType Ty) { 623 if (DRD->getInitializer()) { 624 std::pair<llvm::Function *, llvm::Function *> Reduction = 625 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 626 const auto *CE = cast<CallExpr>(InitOp); 627 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 628 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 629 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 630 const auto *LHSDRE = 631 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 632 const auto *RHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 634 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 635 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 636 [=]() { return Private; }); 637 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 638 [=]() { return Original; }); 639 (void)PrivateScope.Privatize(); 640 RValue Func = RValue::get(Reduction.second); 641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 642 CGF.EmitIgnoredExpr(InitOp); 643 } else { 644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 646 auto *GV = new llvm::GlobalVariable( 647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 648 llvm::GlobalValue::PrivateLinkage, Init, Name); 649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 650 RValue InitRVal; 651 switch (CGF.getEvaluationKind(Ty)) { 652 case TEK_Scalar: 653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 654 break; 655 case TEK_Complex: 656 InitRVal = 657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 658 break; 659 case TEK_Aggregate: { 660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 663 /*IsInitializer=*/false); 664 return; 665 } 666 } 667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 670 /*IsInitializer=*/false); 671 } 672 } 673 674 /// Emit initialization of arrays of complex types. 675 /// \param DestAddr Address of the array. 676 /// \param Type Type of array. 677 /// \param Init Initial expression of array. 678 /// \param SrcAddr Address of the original array. 679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 680 QualType Type, bool EmitDeclareReductionInit, 681 const Expr *Init, 682 const OMPDeclareReductionDecl *DRD, 683 Address SrcAddr = Address::invalid()) { 684 // Perform element-by-element initialization. 685 QualType ElementTy; 686 687 // Drill down to the base element type on both arrays. 688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 690 DestAddr = 691 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 692 if (DRD) 693 SrcAddr = 694 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 695 696 llvm::Value *SrcBegin = nullptr; 697 if (DRD) 698 SrcBegin = SrcAddr.getPointer(); 699 llvm::Value *DestBegin = DestAddr.getPointer(); 700 // Cast from pointer to array type to pointer to single element. 701 llvm::Value *DestEnd = 702 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 703 // The basic structure here is a while-do loop. 704 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 705 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 706 llvm::Value *IsEmpty = 707 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 708 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 709 710 // Enter the loop body, making that address the current address. 711 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 712 CGF.EmitBlock(BodyBB); 713 714 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 715 716 llvm::PHINode *SrcElementPHI = nullptr; 717 Address SrcElementCurrent = Address::invalid(); 718 if (DRD) { 719 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 720 "omp.arraycpy.srcElementPast"); 721 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 722 SrcElementCurrent = 723 Address(SrcElementPHI, 724 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 725 } 726 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 727 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 728 DestElementPHI->addIncoming(DestBegin, EntryBB); 729 Address DestElementCurrent = 730 Address(DestElementPHI, 731 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 732 733 // Emit copy. 734 { 735 CodeGenFunction::RunCleanupsScope InitScope(CGF); 736 if (EmitDeclareReductionInit) { 737 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 738 SrcElementCurrent, ElementTy); 739 } else 740 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 741 /*IsInitializer=*/false); 742 } 743 744 if (DRD) { 745 // Shift the address forward by one element. 746 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 747 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 748 "omp.arraycpy.dest.element"); 749 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 750 } 751 752 // Shift the address forward by one element. 753 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 754 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 755 "omp.arraycpy.dest.element"); 756 // Check whether we've reached the end. 757 llvm::Value *Done = 758 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 759 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 760 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 761 762 // Done. 763 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 764 } 765 766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 767 return CGF.EmitOMPSharedLValue(E); 768 } 769 770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 771 const Expr *E) { 772 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 773 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 774 return LValue(); 775 } 776 777 void ReductionCodeGen::emitAggregateInitialization( 778 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 779 const OMPDeclareReductionDecl *DRD) { 780 // Emit VarDecl with copy init for arrays. 781 // Get the address of the original variable captured in current 782 // captured region. 783 const auto *PrivateVD = 784 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 785 bool EmitDeclareReductionInit = 786 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 787 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 788 EmitDeclareReductionInit, 789 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 790 : PrivateVD->getInit(), 791 DRD, SharedLVal.getAddress(CGF)); 792 } 793 794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 795 ArrayRef<const Expr *> Origs, 796 ArrayRef<const Expr *> Privates, 797 ArrayRef<const Expr *> ReductionOps) { 798 ClausesData.reserve(Shareds.size()); 799 SharedAddresses.reserve(Shareds.size()); 800 Sizes.reserve(Shareds.size()); 801 BaseDecls.reserve(Shareds.size()); 802 const auto *IOrig = Origs.begin(); 803 const auto *IPriv = Privates.begin(); 804 const auto *IRed = ReductionOps.begin(); 805 for (const Expr *Ref : Shareds) { 806 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 807 std::advance(IOrig, 1); 808 std::advance(IPriv, 1); 809 std::advance(IRed, 1); 810 } 811 } 812 813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 814 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 815 "Number of generated lvalues must be exactly N."); 816 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 817 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 818 SharedAddresses.emplace_back(First, Second); 819 if (ClausesData[N].Shared == ClausesData[N].Ref) { 820 OrigAddresses.emplace_back(First, Second); 821 } else { 822 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 823 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 824 OrigAddresses.emplace_back(First, Second); 825 } 826 } 827 828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 829 const auto *PrivateVD = 830 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 831 QualType PrivateType = PrivateVD->getType(); 832 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 833 if (!PrivateType->isVariablyModifiedType()) { 834 Sizes.emplace_back( 835 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 836 nullptr); 837 return; 838 } 839 llvm::Value *Size; 840 llvm::Value *SizeInChars; 841 auto *ElemType = 842 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 843 ->getElementType(); 844 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 845 if (AsArraySection) { 846 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 847 OrigAddresses[N].first.getPointer(CGF)); 848 Size = CGF.Builder.CreateNUWAdd( 849 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 850 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 851 } else { 852 SizeInChars = 853 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 854 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 855 } 856 Sizes.emplace_back(SizeInChars, Size); 857 CodeGenFunction::OpaqueValueMapping OpaqueMap( 858 CGF, 859 cast<OpaqueValueExpr>( 860 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 861 RValue::get(Size)); 862 CGF.EmitVariablyModifiedType(PrivateType); 863 } 864 865 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 866 llvm::Value *Size) { 867 const auto *PrivateVD = 868 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 869 QualType PrivateType = PrivateVD->getType(); 870 if (!PrivateType->isVariablyModifiedType()) { 871 assert(!Size && !Sizes[N].second && 872 "Size should be nullptr for non-variably modified reduction " 873 "items."); 874 return; 875 } 876 CodeGenFunction::OpaqueValueMapping OpaqueMap( 877 CGF, 878 cast<OpaqueValueExpr>( 879 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 880 RValue::get(Size)); 881 CGF.EmitVariablyModifiedType(PrivateType); 882 } 883 884 void ReductionCodeGen::emitInitialization( 885 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 886 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 887 assert(SharedAddresses.size() > N && "No variable was generated"); 888 const auto *PrivateVD = 889 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 890 const OMPDeclareReductionDecl *DRD = 891 getReductionInit(ClausesData[N].ReductionOp); 892 QualType PrivateType = PrivateVD->getType(); 893 PrivateAddr = CGF.Builder.CreateElementBitCast( 894 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 895 QualType SharedType = SharedAddresses[N].first.getType(); 896 SharedLVal = CGF.MakeAddrLValue( 897 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 898 CGF.ConvertTypeForMem(SharedType)), 899 SharedType, SharedAddresses[N].first.getBaseInfo(), 900 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 901 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 902 if (DRD && DRD->getInitializer()) 903 (void)DefaultInit(CGF); 904 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 905 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 906 (void)DefaultInit(CGF); 907 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 908 PrivateAddr, SharedLVal.getAddress(CGF), 909 SharedLVal.getType()); 910 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 911 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 912 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 913 PrivateVD->getType().getQualifiers(), 914 /*IsInitializer=*/false); 915 } 916 } 917 918 bool ReductionCodeGen::needCleanups(unsigned N) { 919 const auto *PrivateVD = 920 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 921 QualType PrivateType = PrivateVD->getType(); 922 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 923 return DTorKind != QualType::DK_none; 924 } 925 926 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 927 Address PrivateAddr) { 928 const auto *PrivateVD = 929 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 930 QualType PrivateType = PrivateVD->getType(); 931 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 932 if (needCleanups(N)) { 933 PrivateAddr = CGF.Builder.CreateElementBitCast( 934 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 935 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 936 } 937 } 938 939 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 940 LValue BaseLV) { 941 BaseTy = BaseTy.getNonReferenceType(); 942 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 943 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 944 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 945 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 946 } else { 947 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 948 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 949 } 950 BaseTy = BaseTy->getPointeeType(); 951 } 952 return CGF.MakeAddrLValue( 953 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 954 CGF.ConvertTypeForMem(ElTy)), 955 BaseLV.getType(), BaseLV.getBaseInfo(), 956 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 957 } 958 959 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 960 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 961 llvm::Value *Addr) { 962 Address Tmp = Address::invalid(); 963 Address TopTmp = Address::invalid(); 964 Address MostTopTmp = Address::invalid(); 965 BaseTy = BaseTy.getNonReferenceType(); 966 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 967 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 968 Tmp = CGF.CreateMemTemp(BaseTy); 969 if (TopTmp.isValid()) 970 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 971 else 972 MostTopTmp = Tmp; 973 TopTmp = Tmp; 974 BaseTy = BaseTy->getPointeeType(); 975 } 976 llvm::Type *Ty = BaseLVType; 977 if (Tmp.isValid()) 978 Ty = Tmp.getElementType(); 979 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 980 if (Tmp.isValid()) { 981 CGF.Builder.CreateStore(Addr, Tmp); 982 return MostTopTmp; 983 } 984 return Address(Addr, BaseLVAlignment); 985 } 986 987 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 988 const VarDecl *OrigVD = nullptr; 989 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 990 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 991 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 992 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 993 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 994 Base = TempASE->getBase()->IgnoreParenImpCasts(); 995 DE = cast<DeclRefExpr>(Base); 996 OrigVD = cast<VarDecl>(DE->getDecl()); 997 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 998 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 999 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1000 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1001 DE = cast<DeclRefExpr>(Base); 1002 OrigVD = cast<VarDecl>(DE->getDecl()); 1003 } 1004 return OrigVD; 1005 } 1006 1007 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1008 Address PrivateAddr) { 1009 const DeclRefExpr *DE; 1010 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1011 BaseDecls.emplace_back(OrigVD); 1012 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1013 LValue BaseLValue = 1014 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1015 OriginalBaseLValue); 1016 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1017 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1018 BaseLValue.getPointer(CGF), SharedAddr.getPointer()); 1019 llvm::Value *PrivatePointer = 1020 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1021 PrivateAddr.getPointer(), SharedAddr.getType()); 1022 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1023 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1024 return castToBase(CGF, OrigVD->getType(), 1025 SharedAddresses[N].first.getType(), 1026 OriginalBaseLValue.getAddress(CGF).getType(), 1027 OriginalBaseLValue.getAlignment(), Ptr); 1028 } 1029 BaseDecls.emplace_back( 1030 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1031 return PrivateAddr; 1032 } 1033 1034 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1035 const OMPDeclareReductionDecl *DRD = 1036 getReductionInit(ClausesData[N].ReductionOp); 1037 return DRD && DRD->getInitializer(); 1038 } 1039 1040 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1041 return CGF.EmitLoadOfPointerLValue( 1042 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1043 getThreadIDVariable()->getType()->castAs<PointerType>()); 1044 } 1045 1046 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1047 if (!CGF.HaveInsertPoint()) 1048 return; 1049 // 1.2.2 OpenMP Language Terminology 1050 // Structured block - An executable statement with a single entry at the 1051 // top and a single exit at the bottom. 1052 // The point of exit cannot be a branch out of the structured block. 1053 // longjmp() and throw() must not violate the entry/exit criteria. 1054 CGF.EHStack.pushTerminate(); 1055 if (S) 1056 CGF.incrementProfileCounter(S); 1057 CodeGen(CGF); 1058 CGF.EHStack.popTerminate(); 1059 } 1060 1061 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1062 CodeGenFunction &CGF) { 1063 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1064 getThreadIDVariable()->getType(), 1065 AlignmentSource::Decl); 1066 } 1067 1068 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1069 QualType FieldTy) { 1070 auto *Field = FieldDecl::Create( 1071 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1072 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1073 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1074 Field->setAccess(AS_public); 1075 DC->addDecl(Field); 1076 return Field; 1077 } 1078 1079 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1080 StringRef Separator) 1081 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1082 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1083 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1084 1085 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1086 OMPBuilder.initialize(); 1087 loadOffloadInfoMetadata(); 1088 } 1089 1090 void CGOpenMPRuntime::clear() { 1091 InternalVars.clear(); 1092 // Clean non-target variable declarations possibly used only in debug info. 1093 for (const auto &Data : EmittedNonTargetVariables) { 1094 if (!Data.getValue().pointsToAliveValue()) 1095 continue; 1096 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1097 if (!GV) 1098 continue; 1099 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1100 continue; 1101 GV->eraseFromParent(); 1102 } 1103 } 1104 1105 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1106 SmallString<128> Buffer; 1107 llvm::raw_svector_ostream OS(Buffer); 1108 StringRef Sep = FirstSeparator; 1109 for (StringRef Part : Parts) { 1110 OS << Sep << Part; 1111 Sep = Separator; 1112 } 1113 return std::string(OS.str()); 1114 } 1115 1116 static llvm::Function * 1117 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1118 const Expr *CombinerInitializer, const VarDecl *In, 1119 const VarDecl *Out, bool IsCombiner) { 1120 // void .omp_combiner.(Ty *in, Ty *out); 1121 ASTContext &C = CGM.getContext(); 1122 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1123 FunctionArgList Args; 1124 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1125 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1126 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1127 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1128 Args.push_back(&OmpOutParm); 1129 Args.push_back(&OmpInParm); 1130 const CGFunctionInfo &FnInfo = 1131 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1132 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1133 std::string Name = CGM.getOpenMPRuntime().getName( 1134 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1135 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1136 Name, &CGM.getModule()); 1137 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1138 if (CGM.getLangOpts().Optimize) { 1139 Fn->removeFnAttr(llvm::Attribute::NoInline); 1140 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1141 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1142 } 1143 CodeGenFunction CGF(CGM); 1144 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1145 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1146 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1147 Out->getLocation()); 1148 CodeGenFunction::OMPPrivateScope Scope(CGF); 1149 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1150 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1151 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1152 .getAddress(CGF); 1153 }); 1154 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1155 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1156 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1157 .getAddress(CGF); 1158 }); 1159 (void)Scope.Privatize(); 1160 if (!IsCombiner && Out->hasInit() && 1161 !CGF.isTrivialInitializer(Out->getInit())) { 1162 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1163 Out->getType().getQualifiers(), 1164 /*IsInitializer=*/true); 1165 } 1166 if (CombinerInitializer) 1167 CGF.EmitIgnoredExpr(CombinerInitializer); 1168 Scope.ForceCleanup(); 1169 CGF.FinishFunction(); 1170 return Fn; 1171 } 1172 1173 void CGOpenMPRuntime::emitUserDefinedReduction( 1174 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1175 if (UDRMap.count(D) > 0) 1176 return; 1177 llvm::Function *Combiner = emitCombinerOrInitializer( 1178 CGM, D->getType(), D->getCombiner(), 1179 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1180 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1181 /*IsCombiner=*/true); 1182 llvm::Function *Initializer = nullptr; 1183 if (const Expr *Init = D->getInitializer()) { 1184 Initializer = emitCombinerOrInitializer( 1185 CGM, D->getType(), 1186 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1187 : nullptr, 1188 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1189 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1190 /*IsCombiner=*/false); 1191 } 1192 UDRMap.try_emplace(D, Combiner, Initializer); 1193 if (CGF) { 1194 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1195 Decls.second.push_back(D); 1196 } 1197 } 1198 1199 std::pair<llvm::Function *, llvm::Function *> 1200 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1201 auto I = UDRMap.find(D); 1202 if (I != UDRMap.end()) 1203 return I->second; 1204 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1205 return UDRMap.lookup(D); 1206 } 1207 1208 namespace { 1209 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1210 // Builder if one is present. 1211 struct PushAndPopStackRAII { 1212 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1213 bool HasCancel, llvm::omp::Directive Kind) 1214 : OMPBuilder(OMPBuilder) { 1215 if (!OMPBuilder) 1216 return; 1217 1218 // The following callback is the crucial part of clangs cleanup process. 1219 // 1220 // NOTE: 1221 // Once the OpenMPIRBuilder is used to create parallel regions (and 1222 // similar), the cancellation destination (Dest below) is determined via 1223 // IP. That means if we have variables to finalize we split the block at IP, 1224 // use the new block (=BB) as destination to build a JumpDest (via 1225 // getJumpDestInCurrentScope(BB)) which then is fed to 1226 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1227 // to push & pop an FinalizationInfo object. 1228 // The FiniCB will still be needed but at the point where the 1229 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1230 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1231 assert(IP.getBlock()->end() == IP.getPoint() && 1232 "Clang CG should cause non-terminated block!"); 1233 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1234 CGF.Builder.restoreIP(IP); 1235 CodeGenFunction::JumpDest Dest = 1236 CGF.getOMPCancelDestination(OMPD_parallel); 1237 CGF.EmitBranchThroughCleanup(Dest); 1238 }; 1239 1240 // TODO: Remove this once we emit parallel regions through the 1241 // OpenMPIRBuilder as it can do this setup internally. 1242 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1243 OMPBuilder->pushFinalizationCB(std::move(FI)); 1244 } 1245 ~PushAndPopStackRAII() { 1246 if (OMPBuilder) 1247 OMPBuilder->popFinalizationCB(); 1248 } 1249 llvm::OpenMPIRBuilder *OMPBuilder; 1250 }; 1251 } // namespace 1252 1253 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1254 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1255 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1256 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1257 assert(ThreadIDVar->getType()->isPointerType() && 1258 "thread id variable must be of type kmp_int32 *"); 1259 CodeGenFunction CGF(CGM, true); 1260 bool HasCancel = false; 1261 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1262 HasCancel = OPD->hasCancel(); 1263 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1264 HasCancel = OPD->hasCancel(); 1265 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1266 HasCancel = OPSD->hasCancel(); 1267 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1270 HasCancel = OPFD->hasCancel(); 1271 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1272 HasCancel = OPFD->hasCancel(); 1273 else if (const auto *OPFD = 1274 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1275 HasCancel = OPFD->hasCancel(); 1276 else if (const auto *OPFD = 1277 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1278 HasCancel = OPFD->hasCancel(); 1279 1280 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1281 // parallel region to make cancellation barriers work properly. 1282 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1283 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1284 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1285 HasCancel, OutlinedHelperName); 1286 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1287 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1288 } 1289 1290 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1292 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1293 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1294 return emitParallelOrTeamsOutlinedFunction( 1295 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1296 } 1297 1298 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1299 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1300 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1301 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1302 return emitParallelOrTeamsOutlinedFunction( 1303 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1304 } 1305 1306 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1307 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1308 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1309 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1310 bool Tied, unsigned &NumberOfParts) { 1311 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1312 PrePostActionTy &) { 1313 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1314 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1315 llvm::Value *TaskArgs[] = { 1316 UpLoc, ThreadID, 1317 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1318 TaskTVar->getType()->castAs<PointerType>()) 1319 .getPointer(CGF)}; 1320 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1321 CGM.getModule(), OMPRTL___kmpc_omp_task), 1322 TaskArgs); 1323 }; 1324 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1325 UntiedCodeGen); 1326 CodeGen.setAction(Action); 1327 assert(!ThreadIDVar->getType()->isPointerType() && 1328 "thread id variable must be of type kmp_int32 for tasks"); 1329 const OpenMPDirectiveKind Region = 1330 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1331 : OMPD_task; 1332 const CapturedStmt *CS = D.getCapturedStmt(Region); 1333 bool HasCancel = false; 1334 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1335 HasCancel = TD->hasCancel(); 1336 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1337 HasCancel = TD->hasCancel(); 1338 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1339 HasCancel = TD->hasCancel(); 1340 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1341 HasCancel = TD->hasCancel(); 1342 1343 CodeGenFunction CGF(CGM, true); 1344 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1345 InnermostKind, HasCancel, Action); 1346 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1347 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1348 if (!Tied) 1349 NumberOfParts = Action.getNumberOfParts(); 1350 return Res; 1351 } 1352 1353 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1354 const RecordDecl *RD, const CGRecordLayout &RL, 1355 ArrayRef<llvm::Constant *> Data) { 1356 llvm::StructType *StructTy = RL.getLLVMType(); 1357 unsigned PrevIdx = 0; 1358 ConstantInitBuilder CIBuilder(CGM); 1359 auto DI = Data.begin(); 1360 for (const FieldDecl *FD : RD->fields()) { 1361 unsigned Idx = RL.getLLVMFieldNo(FD); 1362 // Fill the alignment. 1363 for (unsigned I = PrevIdx; I < Idx; ++I) 1364 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1365 PrevIdx = Idx + 1; 1366 Fields.add(*DI); 1367 ++DI; 1368 } 1369 } 1370 1371 template <class... As> 1372 static llvm::GlobalVariable * 1373 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1374 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1375 As &&... Args) { 1376 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1377 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1378 ConstantInitBuilder CIBuilder(CGM); 1379 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1380 buildStructValue(Fields, CGM, RD, RL, Data); 1381 return Fields.finishAndCreateGlobal( 1382 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1383 std::forward<As>(Args)...); 1384 } 1385 1386 template <typename T> 1387 static void 1388 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1389 ArrayRef<llvm::Constant *> Data, 1390 T &Parent) { 1391 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1392 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1393 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1394 buildStructValue(Fields, CGM, RD, RL, Data); 1395 Fields.finishAndAddTo(Parent); 1396 } 1397 1398 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1399 bool AtCurrentPoint) { 1400 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1401 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1402 1403 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1404 if (AtCurrentPoint) { 1405 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1406 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1407 } else { 1408 Elem.second.ServiceInsertPt = 1409 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1410 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1411 } 1412 } 1413 1414 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1415 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1416 if (Elem.second.ServiceInsertPt) { 1417 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1418 Elem.second.ServiceInsertPt = nullptr; 1419 Ptr->eraseFromParent(); 1420 } 1421 } 1422 1423 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1424 SourceLocation Loc, 1425 SmallString<128> &Buffer) { 1426 llvm::raw_svector_ostream OS(Buffer); 1427 // Build debug location 1428 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1429 OS << ";" << PLoc.getFilename() << ";"; 1430 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1431 OS << FD->getQualifiedNameAsString(); 1432 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1433 return OS.str(); 1434 } 1435 1436 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1437 SourceLocation Loc, 1438 unsigned Flags) { 1439 llvm::Constant *SrcLocStr; 1440 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1441 Loc.isInvalid()) { 1442 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1443 } else { 1444 std::string FunctionName = ""; 1445 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1446 FunctionName = FD->getQualifiedNameAsString(); 1447 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1448 const char *FileName = PLoc.getFilename(); 1449 unsigned Line = PLoc.getLine(); 1450 unsigned Column = PLoc.getColumn(); 1451 SrcLocStr = 1452 OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column); 1453 } 1454 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1455 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1456 Reserved2Flags); 1457 } 1458 1459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1460 SourceLocation Loc) { 1461 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1462 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1463 // the clang invariants used below might be broken. 1464 if (CGM.getLangOpts().OpenMPIRBuilder) { 1465 SmallString<128> Buffer; 1466 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1467 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1468 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1469 return OMPBuilder.getOrCreateThreadID( 1470 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1471 } 1472 1473 llvm::Value *ThreadID = nullptr; 1474 // Check whether we've already cached a load of the thread id in this 1475 // function. 1476 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1477 if (I != OpenMPLocThreadIDMap.end()) { 1478 ThreadID = I->second.ThreadID; 1479 if (ThreadID != nullptr) 1480 return ThreadID; 1481 } 1482 // If exceptions are enabled, do not use parameter to avoid possible crash. 1483 if (auto *OMPRegionInfo = 1484 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1485 if (OMPRegionInfo->getThreadIDVariable()) { 1486 // Check if this an outlined function with thread id passed as argument. 1487 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1488 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1489 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1490 !CGF.getLangOpts().CXXExceptions || 1491 CGF.Builder.GetInsertBlock() == TopBlock || 1492 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1493 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1494 TopBlock || 1495 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1496 CGF.Builder.GetInsertBlock()) { 1497 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1498 // If value loaded in entry block, cache it and use it everywhere in 1499 // function. 1500 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1501 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1502 Elem.second.ThreadID = ThreadID; 1503 } 1504 return ThreadID; 1505 } 1506 } 1507 } 1508 1509 // This is not an outlined function region - need to call __kmpc_int32 1510 // kmpc_global_thread_num(ident_t *loc). 1511 // Generate thread id value and cache this value for use across the 1512 // function. 1513 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1514 if (!Elem.second.ServiceInsertPt) 1515 setLocThreadIdInsertPt(CGF); 1516 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1517 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1518 llvm::CallInst *Call = CGF.Builder.CreateCall( 1519 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1520 OMPRTL___kmpc_global_thread_num), 1521 emitUpdateLocation(CGF, Loc)); 1522 Call->setCallingConv(CGF.getRuntimeCC()); 1523 Elem.second.ThreadID = Call; 1524 return Call; 1525 } 1526 1527 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1528 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1529 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1530 clearLocThreadIdInsertPt(CGF); 1531 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1532 } 1533 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1534 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1535 UDRMap.erase(D); 1536 FunctionUDRMap.erase(CGF.CurFn); 1537 } 1538 auto I = FunctionUDMMap.find(CGF.CurFn); 1539 if (I != FunctionUDMMap.end()) { 1540 for(const auto *D : I->second) 1541 UDMMap.erase(D); 1542 FunctionUDMMap.erase(I); 1543 } 1544 LastprivateConditionalToTypes.erase(CGF.CurFn); 1545 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1546 } 1547 1548 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1549 return OMPBuilder.IdentPtr; 1550 } 1551 1552 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1553 if (!Kmpc_MicroTy) { 1554 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1555 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1556 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1557 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1558 } 1559 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1560 } 1561 1562 llvm::FunctionCallee 1563 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1564 bool IsGPUDistribute) { 1565 assert((IVSize == 32 || IVSize == 64) && 1566 "IV size is not compatible with the omp runtime"); 1567 StringRef Name; 1568 if (IsGPUDistribute) 1569 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1570 : "__kmpc_distribute_static_init_4u") 1571 : (IVSigned ? "__kmpc_distribute_static_init_8" 1572 : "__kmpc_distribute_static_init_8u"); 1573 else 1574 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1575 : "__kmpc_for_static_init_4u") 1576 : (IVSigned ? "__kmpc_for_static_init_8" 1577 : "__kmpc_for_static_init_8u"); 1578 1579 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1580 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1581 llvm::Type *TypeParams[] = { 1582 getIdentTyPointerTy(), // loc 1583 CGM.Int32Ty, // tid 1584 CGM.Int32Ty, // schedtype 1585 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1586 PtrTy, // p_lower 1587 PtrTy, // p_upper 1588 PtrTy, // p_stride 1589 ITy, // incr 1590 ITy // chunk 1591 }; 1592 auto *FnTy = 1593 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1594 return CGM.CreateRuntimeFunction(FnTy, Name); 1595 } 1596 1597 llvm::FunctionCallee 1598 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1599 assert((IVSize == 32 || IVSize == 64) && 1600 "IV size is not compatible with the omp runtime"); 1601 StringRef Name = 1602 IVSize == 32 1603 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1604 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1605 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1606 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1607 CGM.Int32Ty, // tid 1608 CGM.Int32Ty, // schedtype 1609 ITy, // lower 1610 ITy, // upper 1611 ITy, // stride 1612 ITy // chunk 1613 }; 1614 auto *FnTy = 1615 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1616 return CGM.CreateRuntimeFunction(FnTy, Name); 1617 } 1618 1619 llvm::FunctionCallee 1620 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1621 assert((IVSize == 32 || IVSize == 64) && 1622 "IV size is not compatible with the omp runtime"); 1623 StringRef Name = 1624 IVSize == 32 1625 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1626 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1627 llvm::Type *TypeParams[] = { 1628 getIdentTyPointerTy(), // loc 1629 CGM.Int32Ty, // tid 1630 }; 1631 auto *FnTy = 1632 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1633 return CGM.CreateRuntimeFunction(FnTy, Name); 1634 } 1635 1636 llvm::FunctionCallee 1637 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1638 assert((IVSize == 32 || IVSize == 64) && 1639 "IV size is not compatible with the omp runtime"); 1640 StringRef Name = 1641 IVSize == 32 1642 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1643 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1644 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1645 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1646 llvm::Type *TypeParams[] = { 1647 getIdentTyPointerTy(), // loc 1648 CGM.Int32Ty, // tid 1649 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1650 PtrTy, // p_lower 1651 PtrTy, // p_upper 1652 PtrTy // p_stride 1653 }; 1654 auto *FnTy = 1655 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1656 return CGM.CreateRuntimeFunction(FnTy, Name); 1657 } 1658 1659 /// Obtain information that uniquely identifies a target entry. This 1660 /// consists of the file and device IDs as well as line number associated with 1661 /// the relevant entry source location. 1662 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1663 unsigned &DeviceID, unsigned &FileID, 1664 unsigned &LineNum) { 1665 SourceManager &SM = C.getSourceManager(); 1666 1667 // The loc should be always valid and have a file ID (the user cannot use 1668 // #pragma directives in macros) 1669 1670 assert(Loc.isValid() && "Source location is expected to be always valid."); 1671 1672 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1673 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1674 1675 llvm::sys::fs::UniqueID ID; 1676 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1677 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1678 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1679 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1680 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1681 << PLoc.getFilename() << EC.message(); 1682 } 1683 1684 DeviceID = ID.getDevice(); 1685 FileID = ID.getFile(); 1686 LineNum = PLoc.getLine(); 1687 } 1688 1689 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1690 if (CGM.getLangOpts().OpenMPSimd) 1691 return Address::invalid(); 1692 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1693 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1694 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1695 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1696 HasRequiresUnifiedSharedMemory))) { 1697 SmallString<64> PtrName; 1698 { 1699 llvm::raw_svector_ostream OS(PtrName); 1700 OS << CGM.getMangledName(GlobalDecl(VD)); 1701 if (!VD->isExternallyVisible()) { 1702 unsigned DeviceID, FileID, Line; 1703 getTargetEntryUniqueInfo(CGM.getContext(), 1704 VD->getCanonicalDecl()->getBeginLoc(), 1705 DeviceID, FileID, Line); 1706 OS << llvm::format("_%x", FileID); 1707 } 1708 OS << "_decl_tgt_ref_ptr"; 1709 } 1710 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1711 if (!Ptr) { 1712 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1713 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1714 PtrName); 1715 1716 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1717 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1718 1719 if (!CGM.getLangOpts().OpenMPIsDevice) 1720 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1721 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1722 } 1723 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1724 } 1725 return Address::invalid(); 1726 } 1727 1728 llvm::Constant * 1729 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1730 assert(!CGM.getLangOpts().OpenMPUseTLS || 1731 !CGM.getContext().getTargetInfo().isTLSSupported()); 1732 // Lookup the entry, lazily creating it if necessary. 1733 std::string Suffix = getName({"cache", ""}); 1734 return getOrCreateInternalVariable( 1735 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1736 } 1737 1738 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1739 const VarDecl *VD, 1740 Address VDAddr, 1741 SourceLocation Loc) { 1742 if (CGM.getLangOpts().OpenMPUseTLS && 1743 CGM.getContext().getTargetInfo().isTLSSupported()) 1744 return VDAddr; 1745 1746 llvm::Type *VarTy = VDAddr.getElementType(); 1747 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1748 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1749 CGM.Int8PtrTy), 1750 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1751 getOrCreateThreadPrivateCache(VD)}; 1752 return Address(CGF.EmitRuntimeCall( 1753 OMPBuilder.getOrCreateRuntimeFunction( 1754 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1755 Args), 1756 VDAddr.getAlignment()); 1757 } 1758 1759 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1760 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1761 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1762 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1763 // library. 1764 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1765 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1766 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1767 OMPLoc); 1768 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1769 // to register constructor/destructor for variable. 1770 llvm::Value *Args[] = { 1771 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1772 Ctor, CopyCtor, Dtor}; 1773 CGF.EmitRuntimeCall( 1774 OMPBuilder.getOrCreateRuntimeFunction( 1775 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1776 Args); 1777 } 1778 1779 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1780 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1781 bool PerformInit, CodeGenFunction *CGF) { 1782 if (CGM.getLangOpts().OpenMPUseTLS && 1783 CGM.getContext().getTargetInfo().isTLSSupported()) 1784 return nullptr; 1785 1786 VD = VD->getDefinition(CGM.getContext()); 1787 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1788 QualType ASTTy = VD->getType(); 1789 1790 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1791 const Expr *Init = VD->getAnyInitializer(); 1792 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1793 // Generate function that re-emits the declaration's initializer into the 1794 // threadprivate copy of the variable VD 1795 CodeGenFunction CtorCGF(CGM); 1796 FunctionArgList Args; 1797 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1798 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1799 ImplicitParamDecl::Other); 1800 Args.push_back(&Dst); 1801 1802 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1803 CGM.getContext().VoidPtrTy, Args); 1804 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1805 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1806 llvm::Function *Fn = 1807 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1808 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1809 Args, Loc, Loc); 1810 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1811 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1812 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1813 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1814 Arg = CtorCGF.Builder.CreateElementBitCast( 1815 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1816 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1817 /*IsInitializer=*/true); 1818 ArgVal = CtorCGF.EmitLoadOfScalar( 1819 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1820 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1821 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1822 CtorCGF.FinishFunction(); 1823 Ctor = Fn; 1824 } 1825 if (VD->getType().isDestructedType() != QualType::DK_none) { 1826 // Generate function that emits destructor call for the threadprivate copy 1827 // of the variable VD 1828 CodeGenFunction DtorCGF(CGM); 1829 FunctionArgList Args; 1830 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1831 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1832 ImplicitParamDecl::Other); 1833 Args.push_back(&Dst); 1834 1835 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1836 CGM.getContext().VoidTy, Args); 1837 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1838 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1839 llvm::Function *Fn = 1840 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1841 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1842 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1843 Loc, Loc); 1844 // Create a scope with an artificial location for the body of this function. 1845 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1846 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1847 DtorCGF.GetAddrOfLocalVar(&Dst), 1848 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1849 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1850 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1851 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1852 DtorCGF.FinishFunction(); 1853 Dtor = Fn; 1854 } 1855 // Do not emit init function if it is not required. 1856 if (!Ctor && !Dtor) 1857 return nullptr; 1858 1859 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1860 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1861 /*isVarArg=*/false) 1862 ->getPointerTo(); 1863 // Copying constructor for the threadprivate variable. 1864 // Must be NULL - reserved by runtime, but currently it requires that this 1865 // parameter is always NULL. Otherwise it fires assertion. 1866 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1867 if (Ctor == nullptr) { 1868 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1869 /*isVarArg=*/false) 1870 ->getPointerTo(); 1871 Ctor = llvm::Constant::getNullValue(CtorTy); 1872 } 1873 if (Dtor == nullptr) { 1874 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1875 /*isVarArg=*/false) 1876 ->getPointerTo(); 1877 Dtor = llvm::Constant::getNullValue(DtorTy); 1878 } 1879 if (!CGF) { 1880 auto *InitFunctionTy = 1881 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1882 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1883 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1884 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1885 CodeGenFunction InitCGF(CGM); 1886 FunctionArgList ArgList; 1887 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1888 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1889 Loc, Loc); 1890 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1891 InitCGF.FinishFunction(); 1892 return InitFunction; 1893 } 1894 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1895 } 1896 return nullptr; 1897 } 1898 1899 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1900 llvm::GlobalVariable *Addr, 1901 bool PerformInit) { 1902 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1903 !CGM.getLangOpts().OpenMPIsDevice) 1904 return false; 1905 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1906 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1907 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1908 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1909 HasRequiresUnifiedSharedMemory)) 1910 return CGM.getLangOpts().OpenMPIsDevice; 1911 VD = VD->getDefinition(CGM.getContext()); 1912 assert(VD && "Unknown VarDecl"); 1913 1914 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1915 return CGM.getLangOpts().OpenMPIsDevice; 1916 1917 QualType ASTTy = VD->getType(); 1918 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1919 1920 // Produce the unique prefix to identify the new target regions. We use 1921 // the source location of the variable declaration which we know to not 1922 // conflict with any target region. 1923 unsigned DeviceID; 1924 unsigned FileID; 1925 unsigned Line; 1926 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1927 SmallString<128> Buffer, Out; 1928 { 1929 llvm::raw_svector_ostream OS(Buffer); 1930 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1931 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1932 } 1933 1934 const Expr *Init = VD->getAnyInitializer(); 1935 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1936 llvm::Constant *Ctor; 1937 llvm::Constant *ID; 1938 if (CGM.getLangOpts().OpenMPIsDevice) { 1939 // Generate function that re-emits the declaration's initializer into 1940 // the threadprivate copy of the variable VD 1941 CodeGenFunction CtorCGF(CGM); 1942 1943 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1944 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1945 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1946 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1947 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1948 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1949 FunctionArgList(), Loc, Loc); 1950 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1951 CtorCGF.EmitAnyExprToMem(Init, 1952 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1953 Init->getType().getQualifiers(), 1954 /*IsInitializer=*/true); 1955 CtorCGF.FinishFunction(); 1956 Ctor = Fn; 1957 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1958 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1959 } else { 1960 Ctor = new llvm::GlobalVariable( 1961 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1962 llvm::GlobalValue::PrivateLinkage, 1963 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1964 ID = Ctor; 1965 } 1966 1967 // Register the information for the entry associated with the constructor. 1968 Out.clear(); 1969 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1970 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1971 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1972 } 1973 if (VD->getType().isDestructedType() != QualType::DK_none) { 1974 llvm::Constant *Dtor; 1975 llvm::Constant *ID; 1976 if (CGM.getLangOpts().OpenMPIsDevice) { 1977 // Generate function that emits destructor call for the threadprivate 1978 // copy of the variable VD 1979 CodeGenFunction DtorCGF(CGM); 1980 1981 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1982 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1983 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1984 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1985 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1986 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1987 FunctionArgList(), Loc, Loc); 1988 // Create a scope with an artificial location for the body of this 1989 // function. 1990 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1991 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1992 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1993 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1994 DtorCGF.FinishFunction(); 1995 Dtor = Fn; 1996 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1997 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1998 } else { 1999 Dtor = new llvm::GlobalVariable( 2000 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2001 llvm::GlobalValue::PrivateLinkage, 2002 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2003 ID = Dtor; 2004 } 2005 // Register the information for the entry associated with the destructor. 2006 Out.clear(); 2007 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2008 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2009 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2010 } 2011 return CGM.getLangOpts().OpenMPIsDevice; 2012 } 2013 2014 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2015 QualType VarType, 2016 StringRef Name) { 2017 std::string Suffix = getName({"artificial", ""}); 2018 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2019 llvm::Value *GAddr = 2020 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2021 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2022 CGM.getTarget().isTLSSupported()) { 2023 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2024 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2025 } 2026 std::string CacheSuffix = getName({"cache", ""}); 2027 llvm::Value *Args[] = { 2028 emitUpdateLocation(CGF, SourceLocation()), 2029 getThreadID(CGF, SourceLocation()), 2030 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2031 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2032 /*isSigned=*/false), 2033 getOrCreateInternalVariable( 2034 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2035 return Address( 2036 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2037 CGF.EmitRuntimeCall( 2038 OMPBuilder.getOrCreateRuntimeFunction( 2039 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2040 Args), 2041 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2042 CGM.getContext().getTypeAlignInChars(VarType)); 2043 } 2044 2045 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2046 const RegionCodeGenTy &ThenGen, 2047 const RegionCodeGenTy &ElseGen) { 2048 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2049 2050 // If the condition constant folds and can be elided, try to avoid emitting 2051 // the condition and the dead arm of the if/else. 2052 bool CondConstant; 2053 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2054 if (CondConstant) 2055 ThenGen(CGF); 2056 else 2057 ElseGen(CGF); 2058 return; 2059 } 2060 2061 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2062 // emit the conditional branch. 2063 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2064 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2065 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2066 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2067 2068 // Emit the 'then' code. 2069 CGF.EmitBlock(ThenBlock); 2070 ThenGen(CGF); 2071 CGF.EmitBranch(ContBlock); 2072 // Emit the 'else' code if present. 2073 // There is no need to emit line number for unconditional branch. 2074 (void)ApplyDebugLocation::CreateEmpty(CGF); 2075 CGF.EmitBlock(ElseBlock); 2076 ElseGen(CGF); 2077 // There is no need to emit line number for unconditional branch. 2078 (void)ApplyDebugLocation::CreateEmpty(CGF); 2079 CGF.EmitBranch(ContBlock); 2080 // Emit the continuation block for code after the if. 2081 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2082 } 2083 2084 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2085 llvm::Function *OutlinedFn, 2086 ArrayRef<llvm::Value *> CapturedVars, 2087 const Expr *IfCond) { 2088 if (!CGF.HaveInsertPoint()) 2089 return; 2090 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2091 auto &M = CGM.getModule(); 2092 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2093 this](CodeGenFunction &CGF, PrePostActionTy &) { 2094 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2095 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2096 llvm::Value *Args[] = { 2097 RTLoc, 2098 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2099 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2100 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2101 RealArgs.append(std::begin(Args), std::end(Args)); 2102 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2103 2104 llvm::FunctionCallee RTLFn = 2105 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2106 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2107 }; 2108 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2109 this](CodeGenFunction &CGF, PrePostActionTy &) { 2110 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2111 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2112 // Build calls: 2113 // __kmpc_serialized_parallel(&Loc, GTid); 2114 llvm::Value *Args[] = {RTLoc, ThreadID}; 2115 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2116 M, OMPRTL___kmpc_serialized_parallel), 2117 Args); 2118 2119 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2120 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2121 Address ZeroAddrBound = 2122 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2123 /*Name=*/".bound.zero.addr"); 2124 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2125 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2126 // ThreadId for serialized parallels is 0. 2127 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2128 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2129 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2130 2131 // Ensure we do not inline the function. This is trivially true for the ones 2132 // passed to __kmpc_fork_call but the ones called in serialized regions 2133 // could be inlined. This is not a perfect but it is closer to the invariant 2134 // we want, namely, every data environment starts with a new function. 2135 // TODO: We should pass the if condition to the runtime function and do the 2136 // handling there. Much cleaner code. 2137 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2138 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2139 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2140 2141 // __kmpc_end_serialized_parallel(&Loc, GTid); 2142 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2143 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2144 M, OMPRTL___kmpc_end_serialized_parallel), 2145 EndArgs); 2146 }; 2147 if (IfCond) { 2148 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2149 } else { 2150 RegionCodeGenTy ThenRCG(ThenGen); 2151 ThenRCG(CGF); 2152 } 2153 } 2154 2155 // If we're inside an (outlined) parallel region, use the region info's 2156 // thread-ID variable (it is passed in a first argument of the outlined function 2157 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2158 // regular serial code region, get thread ID by calling kmp_int32 2159 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2160 // return the address of that temp. 2161 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2162 SourceLocation Loc) { 2163 if (auto *OMPRegionInfo = 2164 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2165 if (OMPRegionInfo->getThreadIDVariable()) 2166 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2167 2168 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2169 QualType Int32Ty = 2170 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2171 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2172 CGF.EmitStoreOfScalar(ThreadID, 2173 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2174 2175 return ThreadIDTemp; 2176 } 2177 2178 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2179 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2180 SmallString<256> Buffer; 2181 llvm::raw_svector_ostream Out(Buffer); 2182 Out << Name; 2183 StringRef RuntimeName = Out.str(); 2184 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2185 if (Elem.second) { 2186 assert(Elem.second->getType()->getPointerElementType() == Ty && 2187 "OMP internal variable has different type than requested"); 2188 return &*Elem.second; 2189 } 2190 2191 return Elem.second = new llvm::GlobalVariable( 2192 CGM.getModule(), Ty, /*IsConstant*/ false, 2193 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2194 Elem.first(), /*InsertBefore=*/nullptr, 2195 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2196 } 2197 2198 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2199 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2200 std::string Name = getName({Prefix, "var"}); 2201 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2202 } 2203 2204 namespace { 2205 /// Common pre(post)-action for different OpenMP constructs. 2206 class CommonActionTy final : public PrePostActionTy { 2207 llvm::FunctionCallee EnterCallee; 2208 ArrayRef<llvm::Value *> EnterArgs; 2209 llvm::FunctionCallee ExitCallee; 2210 ArrayRef<llvm::Value *> ExitArgs; 2211 bool Conditional; 2212 llvm::BasicBlock *ContBlock = nullptr; 2213 2214 public: 2215 CommonActionTy(llvm::FunctionCallee EnterCallee, 2216 ArrayRef<llvm::Value *> EnterArgs, 2217 llvm::FunctionCallee ExitCallee, 2218 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2219 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2220 ExitArgs(ExitArgs), Conditional(Conditional) {} 2221 void Enter(CodeGenFunction &CGF) override { 2222 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2223 if (Conditional) { 2224 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2225 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2226 ContBlock = CGF.createBasicBlock("omp_if.end"); 2227 // Generate the branch (If-stmt) 2228 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2229 CGF.EmitBlock(ThenBlock); 2230 } 2231 } 2232 void Done(CodeGenFunction &CGF) { 2233 // Emit the rest of blocks/branches 2234 CGF.EmitBranch(ContBlock); 2235 CGF.EmitBlock(ContBlock, true); 2236 } 2237 void Exit(CodeGenFunction &CGF) override { 2238 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2239 } 2240 }; 2241 } // anonymous namespace 2242 2243 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2244 StringRef CriticalName, 2245 const RegionCodeGenTy &CriticalOpGen, 2246 SourceLocation Loc, const Expr *Hint) { 2247 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2248 // CriticalOpGen(); 2249 // __kmpc_end_critical(ident_t *, gtid, Lock); 2250 // Prepare arguments and build a call to __kmpc_critical 2251 if (!CGF.HaveInsertPoint()) 2252 return; 2253 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2254 getCriticalRegionLock(CriticalName)}; 2255 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2256 std::end(Args)); 2257 if (Hint) { 2258 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2259 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2260 } 2261 CommonActionTy Action( 2262 OMPBuilder.getOrCreateRuntimeFunction( 2263 CGM.getModule(), 2264 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2265 EnterArgs, 2266 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2267 OMPRTL___kmpc_end_critical), 2268 Args); 2269 CriticalOpGen.setAction(Action); 2270 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2271 } 2272 2273 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2274 const RegionCodeGenTy &MasterOpGen, 2275 SourceLocation Loc) { 2276 if (!CGF.HaveInsertPoint()) 2277 return; 2278 // if(__kmpc_master(ident_t *, gtid)) { 2279 // MasterOpGen(); 2280 // __kmpc_end_master(ident_t *, gtid); 2281 // } 2282 // Prepare arguments and build a call to __kmpc_master 2283 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2284 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2285 CGM.getModule(), OMPRTL___kmpc_master), 2286 Args, 2287 OMPBuilder.getOrCreateRuntimeFunction( 2288 CGM.getModule(), OMPRTL___kmpc_end_master), 2289 Args, 2290 /*Conditional=*/true); 2291 MasterOpGen.setAction(Action); 2292 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2293 Action.Done(CGF); 2294 } 2295 2296 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2297 const RegionCodeGenTy &MaskedOpGen, 2298 SourceLocation Loc, const Expr *Filter) { 2299 if (!CGF.HaveInsertPoint()) 2300 return; 2301 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2302 // MaskedOpGen(); 2303 // __kmpc_end_masked(iden_t *, gtid); 2304 // } 2305 // Prepare arguments and build a call to __kmpc_masked 2306 llvm::Value *FilterVal = Filter 2307 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2308 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2309 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2310 FilterVal}; 2311 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2312 getThreadID(CGF, Loc)}; 2313 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2314 CGM.getModule(), OMPRTL___kmpc_masked), 2315 Args, 2316 OMPBuilder.getOrCreateRuntimeFunction( 2317 CGM.getModule(), OMPRTL___kmpc_end_masked), 2318 ArgsEnd, 2319 /*Conditional=*/true); 2320 MaskedOpGen.setAction(Action); 2321 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2322 Action.Done(CGF); 2323 } 2324 2325 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2326 SourceLocation Loc) { 2327 if (!CGF.HaveInsertPoint()) 2328 return; 2329 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2330 OMPBuilder.createTaskyield(CGF.Builder); 2331 } else { 2332 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2333 llvm::Value *Args[] = { 2334 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2335 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2336 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2337 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2338 Args); 2339 } 2340 2341 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2342 Region->emitUntiedSwitch(CGF); 2343 } 2344 2345 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2346 const RegionCodeGenTy &TaskgroupOpGen, 2347 SourceLocation Loc) { 2348 if (!CGF.HaveInsertPoint()) 2349 return; 2350 // __kmpc_taskgroup(ident_t *, gtid); 2351 // TaskgroupOpGen(); 2352 // __kmpc_end_taskgroup(ident_t *, gtid); 2353 // Prepare arguments and build a call to __kmpc_taskgroup 2354 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2355 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2356 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2357 Args, 2358 OMPBuilder.getOrCreateRuntimeFunction( 2359 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2360 Args); 2361 TaskgroupOpGen.setAction(Action); 2362 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2363 } 2364 2365 /// Given an array of pointers to variables, project the address of a 2366 /// given variable. 2367 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2368 unsigned Index, const VarDecl *Var) { 2369 // Pull out the pointer to the variable. 2370 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2371 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2372 2373 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2374 Addr = CGF.Builder.CreateElementBitCast( 2375 Addr, CGF.ConvertTypeForMem(Var->getType())); 2376 return Addr; 2377 } 2378 2379 static llvm::Value *emitCopyprivateCopyFunction( 2380 CodeGenModule &CGM, llvm::Type *ArgsType, 2381 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2382 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2383 SourceLocation Loc) { 2384 ASTContext &C = CGM.getContext(); 2385 // void copy_func(void *LHSArg, void *RHSArg); 2386 FunctionArgList Args; 2387 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2388 ImplicitParamDecl::Other); 2389 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2390 ImplicitParamDecl::Other); 2391 Args.push_back(&LHSArg); 2392 Args.push_back(&RHSArg); 2393 const auto &CGFI = 2394 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2395 std::string Name = 2396 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2397 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2398 llvm::GlobalValue::InternalLinkage, Name, 2399 &CGM.getModule()); 2400 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2401 Fn->setDoesNotRecurse(); 2402 CodeGenFunction CGF(CGM); 2403 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2404 // Dest = (void*[n])(LHSArg); 2405 // Src = (void*[n])(RHSArg); 2406 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2407 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2408 ArgsType), CGF.getPointerAlign()); 2409 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2410 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2411 ArgsType), CGF.getPointerAlign()); 2412 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2413 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2414 // ... 2415 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2416 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2417 const auto *DestVar = 2418 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2419 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2420 2421 const auto *SrcVar = 2422 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2423 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2424 2425 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2426 QualType Type = VD->getType(); 2427 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2428 } 2429 CGF.FinishFunction(); 2430 return Fn; 2431 } 2432 2433 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2434 const RegionCodeGenTy &SingleOpGen, 2435 SourceLocation Loc, 2436 ArrayRef<const Expr *> CopyprivateVars, 2437 ArrayRef<const Expr *> SrcExprs, 2438 ArrayRef<const Expr *> DstExprs, 2439 ArrayRef<const Expr *> AssignmentOps) { 2440 if (!CGF.HaveInsertPoint()) 2441 return; 2442 assert(CopyprivateVars.size() == SrcExprs.size() && 2443 CopyprivateVars.size() == DstExprs.size() && 2444 CopyprivateVars.size() == AssignmentOps.size()); 2445 ASTContext &C = CGM.getContext(); 2446 // int32 did_it = 0; 2447 // if(__kmpc_single(ident_t *, gtid)) { 2448 // SingleOpGen(); 2449 // __kmpc_end_single(ident_t *, gtid); 2450 // did_it = 1; 2451 // } 2452 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2453 // <copy_func>, did_it); 2454 2455 Address DidIt = Address::invalid(); 2456 if (!CopyprivateVars.empty()) { 2457 // int32 did_it = 0; 2458 QualType KmpInt32Ty = 2459 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2460 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2461 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2462 } 2463 // Prepare arguments and build a call to __kmpc_single 2464 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2465 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_single), 2467 Args, 2468 OMPBuilder.getOrCreateRuntimeFunction( 2469 CGM.getModule(), OMPRTL___kmpc_end_single), 2470 Args, 2471 /*Conditional=*/true); 2472 SingleOpGen.setAction(Action); 2473 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2474 if (DidIt.isValid()) { 2475 // did_it = 1; 2476 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2477 } 2478 Action.Done(CGF); 2479 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2480 // <copy_func>, did_it); 2481 if (DidIt.isValid()) { 2482 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2483 QualType CopyprivateArrayTy = C.getConstantArrayType( 2484 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2485 /*IndexTypeQuals=*/0); 2486 // Create a list of all private variables for copyprivate. 2487 Address CopyprivateList = 2488 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2489 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2490 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2491 CGF.Builder.CreateStore( 2492 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2493 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2494 CGF.VoidPtrTy), 2495 Elem); 2496 } 2497 // Build function that copies private values from single region to all other 2498 // threads in the corresponding parallel region. 2499 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2500 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2501 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2502 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2503 Address CL = 2504 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2505 CGF.VoidPtrTy); 2506 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2507 llvm::Value *Args[] = { 2508 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2509 getThreadID(CGF, Loc), // i32 <gtid> 2510 BufSize, // size_t <buf_size> 2511 CL.getPointer(), // void *<copyprivate list> 2512 CpyFn, // void (*) (void *, void *) <copy_func> 2513 DidItVal // i32 did_it 2514 }; 2515 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2516 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2517 Args); 2518 } 2519 } 2520 2521 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2522 const RegionCodeGenTy &OrderedOpGen, 2523 SourceLocation Loc, bool IsThreads) { 2524 if (!CGF.HaveInsertPoint()) 2525 return; 2526 // __kmpc_ordered(ident_t *, gtid); 2527 // OrderedOpGen(); 2528 // __kmpc_end_ordered(ident_t *, gtid); 2529 // Prepare arguments and build a call to __kmpc_ordered 2530 if (IsThreads) { 2531 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2532 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2533 CGM.getModule(), OMPRTL___kmpc_ordered), 2534 Args, 2535 OMPBuilder.getOrCreateRuntimeFunction( 2536 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2537 Args); 2538 OrderedOpGen.setAction(Action); 2539 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2540 return; 2541 } 2542 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2543 } 2544 2545 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2546 unsigned Flags; 2547 if (Kind == OMPD_for) 2548 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2549 else if (Kind == OMPD_sections) 2550 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2551 else if (Kind == OMPD_single) 2552 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2553 else if (Kind == OMPD_barrier) 2554 Flags = OMP_IDENT_BARRIER_EXPL; 2555 else 2556 Flags = OMP_IDENT_BARRIER_IMPL; 2557 return Flags; 2558 } 2559 2560 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2561 CodeGenFunction &CGF, const OMPLoopDirective &S, 2562 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2563 // Check if the loop directive is actually a doacross loop directive. In this 2564 // case choose static, 1 schedule. 2565 if (llvm::any_of( 2566 S.getClausesOfKind<OMPOrderedClause>(), 2567 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2568 ScheduleKind = OMPC_SCHEDULE_static; 2569 // Chunk size is 1 in this case. 2570 llvm::APInt ChunkSize(32, 1); 2571 ChunkExpr = IntegerLiteral::Create( 2572 CGF.getContext(), ChunkSize, 2573 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2574 SourceLocation()); 2575 } 2576 } 2577 2578 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2579 OpenMPDirectiveKind Kind, bool EmitChecks, 2580 bool ForceSimpleCall) { 2581 // Check if we should use the OMPBuilder 2582 auto *OMPRegionInfo = 2583 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2584 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2585 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2586 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2587 return; 2588 } 2589 2590 if (!CGF.HaveInsertPoint()) 2591 return; 2592 // Build call __kmpc_cancel_barrier(loc, thread_id); 2593 // Build call __kmpc_barrier(loc, thread_id); 2594 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2595 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2596 // thread_id); 2597 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2598 getThreadID(CGF, Loc)}; 2599 if (OMPRegionInfo) { 2600 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2601 llvm::Value *Result = CGF.EmitRuntimeCall( 2602 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2603 OMPRTL___kmpc_cancel_barrier), 2604 Args); 2605 if (EmitChecks) { 2606 // if (__kmpc_cancel_barrier()) { 2607 // exit from construct; 2608 // } 2609 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2610 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2611 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2612 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2613 CGF.EmitBlock(ExitBB); 2614 // exit from construct; 2615 CodeGenFunction::JumpDest CancelDestination = 2616 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2617 CGF.EmitBranchThroughCleanup(CancelDestination); 2618 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2619 } 2620 return; 2621 } 2622 } 2623 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2624 CGM.getModule(), OMPRTL___kmpc_barrier), 2625 Args); 2626 } 2627 2628 /// Map the OpenMP loop schedule to the runtime enumeration. 2629 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2630 bool Chunked, bool Ordered) { 2631 switch (ScheduleKind) { 2632 case OMPC_SCHEDULE_static: 2633 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2634 : (Ordered ? OMP_ord_static : OMP_sch_static); 2635 case OMPC_SCHEDULE_dynamic: 2636 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2637 case OMPC_SCHEDULE_guided: 2638 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2639 case OMPC_SCHEDULE_runtime: 2640 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2641 case OMPC_SCHEDULE_auto: 2642 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2643 case OMPC_SCHEDULE_unknown: 2644 assert(!Chunked && "chunk was specified but schedule kind not known"); 2645 return Ordered ? OMP_ord_static : OMP_sch_static; 2646 } 2647 llvm_unreachable("Unexpected runtime schedule"); 2648 } 2649 2650 /// Map the OpenMP distribute schedule to the runtime enumeration. 2651 static OpenMPSchedType 2652 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2653 // only static is allowed for dist_schedule 2654 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2655 } 2656 2657 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2658 bool Chunked) const { 2659 OpenMPSchedType Schedule = 2660 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2661 return Schedule == OMP_sch_static; 2662 } 2663 2664 bool CGOpenMPRuntime::isStaticNonchunked( 2665 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2666 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2667 return Schedule == OMP_dist_sch_static; 2668 } 2669 2670 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2671 bool Chunked) const { 2672 OpenMPSchedType Schedule = 2673 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2674 return Schedule == OMP_sch_static_chunked; 2675 } 2676 2677 bool CGOpenMPRuntime::isStaticChunked( 2678 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2679 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2680 return Schedule == OMP_dist_sch_static_chunked; 2681 } 2682 2683 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2684 OpenMPSchedType Schedule = 2685 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2686 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2687 return Schedule != OMP_sch_static; 2688 } 2689 2690 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2691 OpenMPScheduleClauseModifier M1, 2692 OpenMPScheduleClauseModifier M2) { 2693 int Modifier = 0; 2694 switch (M1) { 2695 case OMPC_SCHEDULE_MODIFIER_monotonic: 2696 Modifier = OMP_sch_modifier_monotonic; 2697 break; 2698 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2699 Modifier = OMP_sch_modifier_nonmonotonic; 2700 break; 2701 case OMPC_SCHEDULE_MODIFIER_simd: 2702 if (Schedule == OMP_sch_static_chunked) 2703 Schedule = OMP_sch_static_balanced_chunked; 2704 break; 2705 case OMPC_SCHEDULE_MODIFIER_last: 2706 case OMPC_SCHEDULE_MODIFIER_unknown: 2707 break; 2708 } 2709 switch (M2) { 2710 case OMPC_SCHEDULE_MODIFIER_monotonic: 2711 Modifier = OMP_sch_modifier_monotonic; 2712 break; 2713 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2714 Modifier = OMP_sch_modifier_nonmonotonic; 2715 break; 2716 case OMPC_SCHEDULE_MODIFIER_simd: 2717 if (Schedule == OMP_sch_static_chunked) 2718 Schedule = OMP_sch_static_balanced_chunked; 2719 break; 2720 case OMPC_SCHEDULE_MODIFIER_last: 2721 case OMPC_SCHEDULE_MODIFIER_unknown: 2722 break; 2723 } 2724 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2725 // If the static schedule kind is specified or if the ordered clause is 2726 // specified, and if the nonmonotonic modifier is not specified, the effect is 2727 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2728 // modifier is specified, the effect is as if the nonmonotonic modifier is 2729 // specified. 2730 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2731 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2732 Schedule == OMP_sch_static_balanced_chunked || 2733 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2734 Schedule == OMP_dist_sch_static_chunked || 2735 Schedule == OMP_dist_sch_static)) 2736 Modifier = OMP_sch_modifier_nonmonotonic; 2737 } 2738 return Schedule | Modifier; 2739 } 2740 2741 void CGOpenMPRuntime::emitForDispatchInit( 2742 CodeGenFunction &CGF, SourceLocation Loc, 2743 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2744 bool Ordered, const DispatchRTInput &DispatchValues) { 2745 if (!CGF.HaveInsertPoint()) 2746 return; 2747 OpenMPSchedType Schedule = getRuntimeSchedule( 2748 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2749 assert(Ordered || 2750 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2751 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2752 Schedule != OMP_sch_static_balanced_chunked)); 2753 // Call __kmpc_dispatch_init( 2754 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2755 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2756 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2757 2758 // If the Chunk was not specified in the clause - use default value 1. 2759 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2760 : CGF.Builder.getIntN(IVSize, 1); 2761 llvm::Value *Args[] = { 2762 emitUpdateLocation(CGF, Loc), 2763 getThreadID(CGF, Loc), 2764 CGF.Builder.getInt32(addMonoNonMonoModifier( 2765 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2766 DispatchValues.LB, // Lower 2767 DispatchValues.UB, // Upper 2768 CGF.Builder.getIntN(IVSize, 1), // Stride 2769 Chunk // Chunk 2770 }; 2771 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2772 } 2773 2774 static void emitForStaticInitCall( 2775 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2776 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2777 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2778 const CGOpenMPRuntime::StaticRTInput &Values) { 2779 if (!CGF.HaveInsertPoint()) 2780 return; 2781 2782 assert(!Values.Ordered); 2783 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2784 Schedule == OMP_sch_static_balanced_chunked || 2785 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2786 Schedule == OMP_dist_sch_static || 2787 Schedule == OMP_dist_sch_static_chunked); 2788 2789 // Call __kmpc_for_static_init( 2790 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2791 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2792 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2793 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2794 llvm::Value *Chunk = Values.Chunk; 2795 if (Chunk == nullptr) { 2796 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2797 Schedule == OMP_dist_sch_static) && 2798 "expected static non-chunked schedule"); 2799 // If the Chunk was not specified in the clause - use default value 1. 2800 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2801 } else { 2802 assert((Schedule == OMP_sch_static_chunked || 2803 Schedule == OMP_sch_static_balanced_chunked || 2804 Schedule == OMP_ord_static_chunked || 2805 Schedule == OMP_dist_sch_static_chunked) && 2806 "expected static chunked schedule"); 2807 } 2808 llvm::Value *Args[] = { 2809 UpdateLocation, 2810 ThreadId, 2811 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2812 M2)), // Schedule type 2813 Values.IL.getPointer(), // &isLastIter 2814 Values.LB.getPointer(), // &LB 2815 Values.UB.getPointer(), // &UB 2816 Values.ST.getPointer(), // &Stride 2817 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2818 Chunk // Chunk 2819 }; 2820 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2821 } 2822 2823 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2824 SourceLocation Loc, 2825 OpenMPDirectiveKind DKind, 2826 const OpenMPScheduleTy &ScheduleKind, 2827 const StaticRTInput &Values) { 2828 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2829 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2830 assert(isOpenMPWorksharingDirective(DKind) && 2831 "Expected loop-based or sections-based directive."); 2832 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2833 isOpenMPLoopDirective(DKind) 2834 ? OMP_IDENT_WORK_LOOP 2835 : OMP_IDENT_WORK_SECTIONS); 2836 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2837 llvm::FunctionCallee StaticInitFunction = 2838 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2839 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2840 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2841 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2842 } 2843 2844 void CGOpenMPRuntime::emitDistributeStaticInit( 2845 CodeGenFunction &CGF, SourceLocation Loc, 2846 OpenMPDistScheduleClauseKind SchedKind, 2847 const CGOpenMPRuntime::StaticRTInput &Values) { 2848 OpenMPSchedType ScheduleNum = 2849 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2850 llvm::Value *UpdatedLocation = 2851 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2852 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2853 llvm::FunctionCallee StaticInitFunction; 2854 bool isGPUDistribute = 2855 CGM.getLangOpts().OpenMPIsDevice && 2856 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2857 StaticInitFunction = createForStaticInitFunction( 2858 Values.IVSize, Values.IVSigned, isGPUDistribute); 2859 2860 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2861 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2862 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2863 } 2864 2865 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2866 SourceLocation Loc, 2867 OpenMPDirectiveKind DKind) { 2868 if (!CGF.HaveInsertPoint()) 2869 return; 2870 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2871 llvm::Value *Args[] = { 2872 emitUpdateLocation(CGF, Loc, 2873 isOpenMPDistributeDirective(DKind) 2874 ? OMP_IDENT_WORK_DISTRIBUTE 2875 : isOpenMPLoopDirective(DKind) 2876 ? OMP_IDENT_WORK_LOOP 2877 : OMP_IDENT_WORK_SECTIONS), 2878 getThreadID(CGF, Loc)}; 2879 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2880 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2881 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2882 CGF.EmitRuntimeCall( 2883 OMPBuilder.getOrCreateRuntimeFunction( 2884 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2885 Args); 2886 else 2887 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2888 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2889 Args); 2890 } 2891 2892 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2893 SourceLocation Loc, 2894 unsigned IVSize, 2895 bool IVSigned) { 2896 if (!CGF.HaveInsertPoint()) 2897 return; 2898 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2899 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2900 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2901 } 2902 2903 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2904 SourceLocation Loc, unsigned IVSize, 2905 bool IVSigned, Address IL, 2906 Address LB, Address UB, 2907 Address ST) { 2908 // Call __kmpc_dispatch_next( 2909 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2910 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2911 // kmp_int[32|64] *p_stride); 2912 llvm::Value *Args[] = { 2913 emitUpdateLocation(CGF, Loc), 2914 getThreadID(CGF, Loc), 2915 IL.getPointer(), // &isLastIter 2916 LB.getPointer(), // &Lower 2917 UB.getPointer(), // &Upper 2918 ST.getPointer() // &Stride 2919 }; 2920 llvm::Value *Call = 2921 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2922 return CGF.EmitScalarConversion( 2923 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2924 CGF.getContext().BoolTy, Loc); 2925 } 2926 2927 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2928 llvm::Value *NumThreads, 2929 SourceLocation Loc) { 2930 if (!CGF.HaveInsertPoint()) 2931 return; 2932 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2933 llvm::Value *Args[] = { 2934 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2935 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2936 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2937 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2938 Args); 2939 } 2940 2941 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2942 ProcBindKind ProcBind, 2943 SourceLocation Loc) { 2944 if (!CGF.HaveInsertPoint()) 2945 return; 2946 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2947 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2948 llvm::Value *Args[] = { 2949 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2950 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2951 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2952 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2953 Args); 2954 } 2955 2956 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2957 SourceLocation Loc, llvm::AtomicOrdering AO) { 2958 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2959 OMPBuilder.createFlush(CGF.Builder); 2960 } else { 2961 if (!CGF.HaveInsertPoint()) 2962 return; 2963 // Build call void __kmpc_flush(ident_t *loc) 2964 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2965 CGM.getModule(), OMPRTL___kmpc_flush), 2966 emitUpdateLocation(CGF, Loc)); 2967 } 2968 } 2969 2970 namespace { 2971 /// Indexes of fields for type kmp_task_t. 2972 enum KmpTaskTFields { 2973 /// List of shared variables. 2974 KmpTaskTShareds, 2975 /// Task routine. 2976 KmpTaskTRoutine, 2977 /// Partition id for the untied tasks. 2978 KmpTaskTPartId, 2979 /// Function with call of destructors for private variables. 2980 Data1, 2981 /// Task priority. 2982 Data2, 2983 /// (Taskloops only) Lower bound. 2984 KmpTaskTLowerBound, 2985 /// (Taskloops only) Upper bound. 2986 KmpTaskTUpperBound, 2987 /// (Taskloops only) Stride. 2988 KmpTaskTStride, 2989 /// (Taskloops only) Is last iteration flag. 2990 KmpTaskTLastIter, 2991 /// (Taskloops only) Reduction data. 2992 KmpTaskTReductions, 2993 }; 2994 } // anonymous namespace 2995 2996 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2997 return OffloadEntriesTargetRegion.empty() && 2998 OffloadEntriesDeviceGlobalVar.empty(); 2999 } 3000 3001 /// Initialize target region entry. 3002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3003 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3004 StringRef ParentName, unsigned LineNum, 3005 unsigned Order) { 3006 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3007 "only required for the device " 3008 "code generation."); 3009 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3010 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3011 OMPTargetRegionEntryTargetRegion); 3012 ++OffloadingEntriesNum; 3013 } 3014 3015 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3016 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3017 StringRef ParentName, unsigned LineNum, 3018 llvm::Constant *Addr, llvm::Constant *ID, 3019 OMPTargetRegionEntryKind Flags) { 3020 // If we are emitting code for a target, the entry is already initialized, 3021 // only has to be registered. 3022 if (CGM.getLangOpts().OpenMPIsDevice) { 3023 // This could happen if the device compilation is invoked standalone. 3024 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3025 return; 3026 auto &Entry = 3027 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3028 Entry.setAddress(Addr); 3029 Entry.setID(ID); 3030 Entry.setFlags(Flags); 3031 } else { 3032 if (Flags == 3033 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3034 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3035 /*IgnoreAddressId*/ true)) 3036 return; 3037 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3038 "Target region entry already registered!"); 3039 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3040 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3041 ++OffloadingEntriesNum; 3042 } 3043 } 3044 3045 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3046 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3047 bool IgnoreAddressId) const { 3048 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3049 if (PerDevice == OffloadEntriesTargetRegion.end()) 3050 return false; 3051 auto PerFile = PerDevice->second.find(FileID); 3052 if (PerFile == PerDevice->second.end()) 3053 return false; 3054 auto PerParentName = PerFile->second.find(ParentName); 3055 if (PerParentName == PerFile->second.end()) 3056 return false; 3057 auto PerLine = PerParentName->second.find(LineNum); 3058 if (PerLine == PerParentName->second.end()) 3059 return false; 3060 // Fail if this entry is already registered. 3061 if (!IgnoreAddressId && 3062 (PerLine->second.getAddress() || PerLine->second.getID())) 3063 return false; 3064 return true; 3065 } 3066 3067 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3068 const OffloadTargetRegionEntryInfoActTy &Action) { 3069 // Scan all target region entries and perform the provided action. 3070 for (const auto &D : OffloadEntriesTargetRegion) 3071 for (const auto &F : D.second) 3072 for (const auto &P : F.second) 3073 for (const auto &L : P.second) 3074 Action(D.first, F.first, P.first(), L.first, L.second); 3075 } 3076 3077 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3078 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3079 OMPTargetGlobalVarEntryKind Flags, 3080 unsigned Order) { 3081 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3082 "only required for the device " 3083 "code generation."); 3084 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3085 ++OffloadingEntriesNum; 3086 } 3087 3088 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3089 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3090 CharUnits VarSize, 3091 OMPTargetGlobalVarEntryKind Flags, 3092 llvm::GlobalValue::LinkageTypes Linkage) { 3093 if (CGM.getLangOpts().OpenMPIsDevice) { 3094 // This could happen if the device compilation is invoked standalone. 3095 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3096 return; 3097 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3098 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3099 if (Entry.getVarSize().isZero()) { 3100 Entry.setVarSize(VarSize); 3101 Entry.setLinkage(Linkage); 3102 } 3103 return; 3104 } 3105 Entry.setVarSize(VarSize); 3106 Entry.setLinkage(Linkage); 3107 Entry.setAddress(Addr); 3108 } else { 3109 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3110 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3111 assert(Entry.isValid() && Entry.getFlags() == Flags && 3112 "Entry not initialized!"); 3113 if (Entry.getVarSize().isZero()) { 3114 Entry.setVarSize(VarSize); 3115 Entry.setLinkage(Linkage); 3116 } 3117 return; 3118 } 3119 OffloadEntriesDeviceGlobalVar.try_emplace( 3120 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3121 ++OffloadingEntriesNum; 3122 } 3123 } 3124 3125 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3126 actOnDeviceGlobalVarEntriesInfo( 3127 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3128 // Scan all target region entries and perform the provided action. 3129 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3130 Action(E.getKey(), E.getValue()); 3131 } 3132 3133 void CGOpenMPRuntime::createOffloadEntry( 3134 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3135 llvm::GlobalValue::LinkageTypes Linkage) { 3136 StringRef Name = Addr->getName(); 3137 llvm::Module &M = CGM.getModule(); 3138 llvm::LLVMContext &C = M.getContext(); 3139 3140 // Create constant string with the name. 3141 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3142 3143 std::string StringName = getName({"omp_offloading", "entry_name"}); 3144 auto *Str = new llvm::GlobalVariable( 3145 M, StrPtrInit->getType(), /*isConstant=*/true, 3146 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3147 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3148 3149 llvm::Constant *Data[] = { 3150 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3151 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3152 llvm::ConstantInt::get(CGM.SizeTy, Size), 3153 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3154 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3155 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3156 llvm::GlobalVariable *Entry = createGlobalStruct( 3157 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3158 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3159 3160 // The entry has to be created in the section the linker expects it to be. 3161 Entry->setSection("omp_offloading_entries"); 3162 } 3163 3164 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3165 // Emit the offloading entries and metadata so that the device codegen side 3166 // can easily figure out what to emit. The produced metadata looks like 3167 // this: 3168 // 3169 // !omp_offload.info = !{!1, ...} 3170 // 3171 // Right now we only generate metadata for function that contain target 3172 // regions. 3173 3174 // If we are in simd mode or there are no entries, we don't need to do 3175 // anything. 3176 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3177 return; 3178 3179 llvm::Module &M = CGM.getModule(); 3180 llvm::LLVMContext &C = M.getContext(); 3181 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3182 SourceLocation, StringRef>, 3183 16> 3184 OrderedEntries(OffloadEntriesInfoManager.size()); 3185 llvm::SmallVector<StringRef, 16> ParentFunctions( 3186 OffloadEntriesInfoManager.size()); 3187 3188 // Auxiliary methods to create metadata values and strings. 3189 auto &&GetMDInt = [this](unsigned V) { 3190 return llvm::ConstantAsMetadata::get( 3191 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3192 }; 3193 3194 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3195 3196 // Create the offloading info metadata node. 3197 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3198 3199 // Create function that emits metadata for each target region entry; 3200 auto &&TargetRegionMetadataEmitter = 3201 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3202 &GetMDString]( 3203 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3204 unsigned Line, 3205 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3206 // Generate metadata for target regions. Each entry of this metadata 3207 // contains: 3208 // - Entry 0 -> Kind of this type of metadata (0). 3209 // - Entry 1 -> Device ID of the file where the entry was identified. 3210 // - Entry 2 -> File ID of the file where the entry was identified. 3211 // - Entry 3 -> Mangled name of the function where the entry was 3212 // identified. 3213 // - Entry 4 -> Line in the file where the entry was identified. 3214 // - Entry 5 -> Order the entry was created. 3215 // The first element of the metadata node is the kind. 3216 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3217 GetMDInt(FileID), GetMDString(ParentName), 3218 GetMDInt(Line), GetMDInt(E.getOrder())}; 3219 3220 SourceLocation Loc; 3221 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3222 E = CGM.getContext().getSourceManager().fileinfo_end(); 3223 I != E; ++I) { 3224 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3225 I->getFirst()->getUniqueID().getFile() == FileID) { 3226 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3227 I->getFirst(), Line, 1); 3228 break; 3229 } 3230 } 3231 // Save this entry in the right position of the ordered entries array. 3232 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3233 ParentFunctions[E.getOrder()] = ParentName; 3234 3235 // Add metadata to the named metadata node. 3236 MD->addOperand(llvm::MDNode::get(C, Ops)); 3237 }; 3238 3239 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3240 TargetRegionMetadataEmitter); 3241 3242 // Create function that emits metadata for each device global variable entry; 3243 auto &&DeviceGlobalVarMetadataEmitter = 3244 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3245 MD](StringRef MangledName, 3246 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3247 &E) { 3248 // Generate metadata for global variables. Each entry of this metadata 3249 // contains: 3250 // - Entry 0 -> Kind of this type of metadata (1). 3251 // - Entry 1 -> Mangled name of the variable. 3252 // - Entry 2 -> Declare target kind. 3253 // - Entry 3 -> Order the entry was created. 3254 // The first element of the metadata node is the kind. 3255 llvm::Metadata *Ops[] = { 3256 GetMDInt(E.getKind()), GetMDString(MangledName), 3257 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3258 3259 // Save this entry in the right position of the ordered entries array. 3260 OrderedEntries[E.getOrder()] = 3261 std::make_tuple(&E, SourceLocation(), MangledName); 3262 3263 // Add metadata to the named metadata node. 3264 MD->addOperand(llvm::MDNode::get(C, Ops)); 3265 }; 3266 3267 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3268 DeviceGlobalVarMetadataEmitter); 3269 3270 for (const auto &E : OrderedEntries) { 3271 assert(std::get<0>(E) && "All ordered entries must exist!"); 3272 if (const auto *CE = 3273 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3274 std::get<0>(E))) { 3275 if (!CE->getID() || !CE->getAddress()) { 3276 // Do not blame the entry if the parent funtion is not emitted. 3277 StringRef FnName = ParentFunctions[CE->getOrder()]; 3278 if (!CGM.GetGlobalValue(FnName)) 3279 continue; 3280 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3281 DiagnosticsEngine::Error, 3282 "Offloading entry for target region in %0 is incorrect: either the " 3283 "address or the ID is invalid."); 3284 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3285 continue; 3286 } 3287 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3288 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3289 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3290 OffloadEntryInfoDeviceGlobalVar>( 3291 std::get<0>(E))) { 3292 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3293 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3294 CE->getFlags()); 3295 switch (Flags) { 3296 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3297 if (CGM.getLangOpts().OpenMPIsDevice && 3298 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3299 continue; 3300 if (!CE->getAddress()) { 3301 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3302 DiagnosticsEngine::Error, "Offloading entry for declare target " 3303 "variable %0 is incorrect: the " 3304 "address is invalid."); 3305 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3306 continue; 3307 } 3308 // The vaiable has no definition - no need to add the entry. 3309 if (CE->getVarSize().isZero()) 3310 continue; 3311 break; 3312 } 3313 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3314 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3315 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3316 "Declaret target link address is set."); 3317 if (CGM.getLangOpts().OpenMPIsDevice) 3318 continue; 3319 if (!CE->getAddress()) { 3320 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3321 DiagnosticsEngine::Error, 3322 "Offloading entry for declare target variable is incorrect: the " 3323 "address is invalid."); 3324 CGM.getDiags().Report(DiagID); 3325 continue; 3326 } 3327 break; 3328 } 3329 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3330 CE->getVarSize().getQuantity(), Flags, 3331 CE->getLinkage()); 3332 } else { 3333 llvm_unreachable("Unsupported entry kind."); 3334 } 3335 } 3336 } 3337 3338 /// Loads all the offload entries information from the host IR 3339 /// metadata. 3340 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3341 // If we are in target mode, load the metadata from the host IR. This code has 3342 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3343 3344 if (!CGM.getLangOpts().OpenMPIsDevice) 3345 return; 3346 3347 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3348 return; 3349 3350 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3351 if (auto EC = Buf.getError()) { 3352 CGM.getDiags().Report(diag::err_cannot_open_file) 3353 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3354 return; 3355 } 3356 3357 llvm::LLVMContext C; 3358 auto ME = expectedToErrorOrAndEmitErrors( 3359 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3360 3361 if (auto EC = ME.getError()) { 3362 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3363 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3364 CGM.getDiags().Report(DiagID) 3365 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3366 return; 3367 } 3368 3369 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3370 if (!MD) 3371 return; 3372 3373 for (llvm::MDNode *MN : MD->operands()) { 3374 auto &&GetMDInt = [MN](unsigned Idx) { 3375 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3376 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3377 }; 3378 3379 auto &&GetMDString = [MN](unsigned Idx) { 3380 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3381 return V->getString(); 3382 }; 3383 3384 switch (GetMDInt(0)) { 3385 default: 3386 llvm_unreachable("Unexpected metadata!"); 3387 break; 3388 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3389 OffloadingEntryInfoTargetRegion: 3390 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3391 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3392 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3393 /*Order=*/GetMDInt(5)); 3394 break; 3395 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3396 OffloadingEntryInfoDeviceGlobalVar: 3397 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3398 /*MangledName=*/GetMDString(1), 3399 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3400 /*Flags=*/GetMDInt(2)), 3401 /*Order=*/GetMDInt(3)); 3402 break; 3403 } 3404 } 3405 } 3406 3407 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3408 if (!KmpRoutineEntryPtrTy) { 3409 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3410 ASTContext &C = CGM.getContext(); 3411 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3412 FunctionProtoType::ExtProtoInfo EPI; 3413 KmpRoutineEntryPtrQTy = C.getPointerType( 3414 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3415 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3416 } 3417 } 3418 3419 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3420 // Make sure the type of the entry is already created. This is the type we 3421 // have to create: 3422 // struct __tgt_offload_entry{ 3423 // void *addr; // Pointer to the offload entry info. 3424 // // (function or global) 3425 // char *name; // Name of the function or global. 3426 // size_t size; // Size of the entry info (0 if it a function). 3427 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3428 // int32_t reserved; // Reserved, to use by the runtime library. 3429 // }; 3430 if (TgtOffloadEntryQTy.isNull()) { 3431 ASTContext &C = CGM.getContext(); 3432 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3433 RD->startDefinition(); 3434 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3435 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3436 addFieldToRecordDecl(C, RD, C.getSizeType()); 3437 addFieldToRecordDecl( 3438 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3439 addFieldToRecordDecl( 3440 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3441 RD->completeDefinition(); 3442 RD->addAttr(PackedAttr::CreateImplicit(C)); 3443 TgtOffloadEntryQTy = C.getRecordType(RD); 3444 } 3445 return TgtOffloadEntryQTy; 3446 } 3447 3448 namespace { 3449 struct PrivateHelpersTy { 3450 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3451 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3452 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3453 PrivateElemInit(PrivateElemInit) {} 3454 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3455 const Expr *OriginalRef = nullptr; 3456 const VarDecl *Original = nullptr; 3457 const VarDecl *PrivateCopy = nullptr; 3458 const VarDecl *PrivateElemInit = nullptr; 3459 bool isLocalPrivate() const { 3460 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3461 } 3462 }; 3463 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3464 } // anonymous namespace 3465 3466 static bool isAllocatableDecl(const VarDecl *VD) { 3467 const VarDecl *CVD = VD->getCanonicalDecl(); 3468 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3469 return false; 3470 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3471 // Use the default allocation. 3472 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3473 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3474 !AA->getAllocator()); 3475 } 3476 3477 static RecordDecl * 3478 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3479 if (!Privates.empty()) { 3480 ASTContext &C = CGM.getContext(); 3481 // Build struct .kmp_privates_t. { 3482 // /* private vars */ 3483 // }; 3484 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3485 RD->startDefinition(); 3486 for (const auto &Pair : Privates) { 3487 const VarDecl *VD = Pair.second.Original; 3488 QualType Type = VD->getType().getNonReferenceType(); 3489 // If the private variable is a local variable with lvalue ref type, 3490 // allocate the pointer instead of the pointee type. 3491 if (Pair.second.isLocalPrivate()) { 3492 if (VD->getType()->isLValueReferenceType()) 3493 Type = C.getPointerType(Type); 3494 if (isAllocatableDecl(VD)) 3495 Type = C.getPointerType(Type); 3496 } 3497 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3498 if (VD->hasAttrs()) { 3499 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3500 E(VD->getAttrs().end()); 3501 I != E; ++I) 3502 FD->addAttr(*I); 3503 } 3504 } 3505 RD->completeDefinition(); 3506 return RD; 3507 } 3508 return nullptr; 3509 } 3510 3511 static RecordDecl * 3512 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3513 QualType KmpInt32Ty, 3514 QualType KmpRoutineEntryPointerQTy) { 3515 ASTContext &C = CGM.getContext(); 3516 // Build struct kmp_task_t { 3517 // void * shareds; 3518 // kmp_routine_entry_t routine; 3519 // kmp_int32 part_id; 3520 // kmp_cmplrdata_t data1; 3521 // kmp_cmplrdata_t data2; 3522 // For taskloops additional fields: 3523 // kmp_uint64 lb; 3524 // kmp_uint64 ub; 3525 // kmp_int64 st; 3526 // kmp_int32 liter; 3527 // void * reductions; 3528 // }; 3529 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3530 UD->startDefinition(); 3531 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3532 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3533 UD->completeDefinition(); 3534 QualType KmpCmplrdataTy = C.getRecordType(UD); 3535 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3536 RD->startDefinition(); 3537 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3538 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3539 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3540 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3541 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3542 if (isOpenMPTaskLoopDirective(Kind)) { 3543 QualType KmpUInt64Ty = 3544 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3545 QualType KmpInt64Ty = 3546 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3547 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3548 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3549 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3550 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3551 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3552 } 3553 RD->completeDefinition(); 3554 return RD; 3555 } 3556 3557 static RecordDecl * 3558 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3559 ArrayRef<PrivateDataTy> Privates) { 3560 ASTContext &C = CGM.getContext(); 3561 // Build struct kmp_task_t_with_privates { 3562 // kmp_task_t task_data; 3563 // .kmp_privates_t. privates; 3564 // }; 3565 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3566 RD->startDefinition(); 3567 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3568 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3569 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3570 RD->completeDefinition(); 3571 return RD; 3572 } 3573 3574 /// Emit a proxy function which accepts kmp_task_t as the second 3575 /// argument. 3576 /// \code 3577 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3578 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3579 /// For taskloops: 3580 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3581 /// tt->reductions, tt->shareds); 3582 /// return 0; 3583 /// } 3584 /// \endcode 3585 static llvm::Function * 3586 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3587 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3588 QualType KmpTaskTWithPrivatesPtrQTy, 3589 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3590 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3591 llvm::Value *TaskPrivatesMap) { 3592 ASTContext &C = CGM.getContext(); 3593 FunctionArgList Args; 3594 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3595 ImplicitParamDecl::Other); 3596 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3597 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3598 ImplicitParamDecl::Other); 3599 Args.push_back(&GtidArg); 3600 Args.push_back(&TaskTypeArg); 3601 const auto &TaskEntryFnInfo = 3602 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3603 llvm::FunctionType *TaskEntryTy = 3604 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3605 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3606 auto *TaskEntry = llvm::Function::Create( 3607 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3608 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3609 TaskEntry->setDoesNotRecurse(); 3610 CodeGenFunction CGF(CGM); 3611 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3612 Loc, Loc); 3613 3614 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3615 // tt, 3616 // For taskloops: 3617 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3618 // tt->task_data.shareds); 3619 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3620 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3621 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3622 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3623 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3624 const auto *KmpTaskTWithPrivatesQTyRD = 3625 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3626 LValue Base = 3627 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3628 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3629 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3630 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3631 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3632 3633 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3634 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3635 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3636 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3637 CGF.ConvertTypeForMem(SharedsPtrTy)); 3638 3639 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3640 llvm::Value *PrivatesParam; 3641 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3642 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3643 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3644 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3645 } else { 3646 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3647 } 3648 3649 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3650 TaskPrivatesMap, 3651 CGF.Builder 3652 .CreatePointerBitCastOrAddrSpaceCast( 3653 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3654 .getPointer()}; 3655 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3656 std::end(CommonArgs)); 3657 if (isOpenMPTaskLoopDirective(Kind)) { 3658 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3659 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3660 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3661 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3662 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3663 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3664 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3665 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3666 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3667 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3668 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3669 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3670 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3671 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3672 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3673 CallArgs.push_back(LBParam); 3674 CallArgs.push_back(UBParam); 3675 CallArgs.push_back(StParam); 3676 CallArgs.push_back(LIParam); 3677 CallArgs.push_back(RParam); 3678 } 3679 CallArgs.push_back(SharedsParam); 3680 3681 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3682 CallArgs); 3683 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3684 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3685 CGF.FinishFunction(); 3686 return TaskEntry; 3687 } 3688 3689 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3690 SourceLocation Loc, 3691 QualType KmpInt32Ty, 3692 QualType KmpTaskTWithPrivatesPtrQTy, 3693 QualType KmpTaskTWithPrivatesQTy) { 3694 ASTContext &C = CGM.getContext(); 3695 FunctionArgList Args; 3696 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3697 ImplicitParamDecl::Other); 3698 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3699 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3700 ImplicitParamDecl::Other); 3701 Args.push_back(&GtidArg); 3702 Args.push_back(&TaskTypeArg); 3703 const auto &DestructorFnInfo = 3704 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3705 llvm::FunctionType *DestructorFnTy = 3706 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3707 std::string Name = 3708 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3709 auto *DestructorFn = 3710 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3711 Name, &CGM.getModule()); 3712 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3713 DestructorFnInfo); 3714 DestructorFn->setDoesNotRecurse(); 3715 CodeGenFunction CGF(CGM); 3716 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3717 Args, Loc, Loc); 3718 3719 LValue Base = CGF.EmitLoadOfPointerLValue( 3720 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3721 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3722 const auto *KmpTaskTWithPrivatesQTyRD = 3723 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3724 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3725 Base = CGF.EmitLValueForField(Base, *FI); 3726 for (const auto *Field : 3727 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3728 if (QualType::DestructionKind DtorKind = 3729 Field->getType().isDestructedType()) { 3730 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3731 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3732 } 3733 } 3734 CGF.FinishFunction(); 3735 return DestructorFn; 3736 } 3737 3738 /// Emit a privates mapping function for correct handling of private and 3739 /// firstprivate variables. 3740 /// \code 3741 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3742 /// **noalias priv1,..., <tyn> **noalias privn) { 3743 /// *priv1 = &.privates.priv1; 3744 /// ...; 3745 /// *privn = &.privates.privn; 3746 /// } 3747 /// \endcode 3748 static llvm::Value * 3749 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3750 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3751 ArrayRef<PrivateDataTy> Privates) { 3752 ASTContext &C = CGM.getContext(); 3753 FunctionArgList Args; 3754 ImplicitParamDecl TaskPrivatesArg( 3755 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3756 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3757 ImplicitParamDecl::Other); 3758 Args.push_back(&TaskPrivatesArg); 3759 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3760 unsigned Counter = 1; 3761 for (const Expr *E : Data.PrivateVars) { 3762 Args.push_back(ImplicitParamDecl::Create( 3763 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3764 C.getPointerType(C.getPointerType(E->getType())) 3765 .withConst() 3766 .withRestrict(), 3767 ImplicitParamDecl::Other)); 3768 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3769 PrivateVarsPos[VD] = Counter; 3770 ++Counter; 3771 } 3772 for (const Expr *E : Data.FirstprivateVars) { 3773 Args.push_back(ImplicitParamDecl::Create( 3774 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3775 C.getPointerType(C.getPointerType(E->getType())) 3776 .withConst() 3777 .withRestrict(), 3778 ImplicitParamDecl::Other)); 3779 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3780 PrivateVarsPos[VD] = Counter; 3781 ++Counter; 3782 } 3783 for (const Expr *E : Data.LastprivateVars) { 3784 Args.push_back(ImplicitParamDecl::Create( 3785 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3786 C.getPointerType(C.getPointerType(E->getType())) 3787 .withConst() 3788 .withRestrict(), 3789 ImplicitParamDecl::Other)); 3790 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3791 PrivateVarsPos[VD] = Counter; 3792 ++Counter; 3793 } 3794 for (const VarDecl *VD : Data.PrivateLocals) { 3795 QualType Ty = VD->getType().getNonReferenceType(); 3796 if (VD->getType()->isLValueReferenceType()) 3797 Ty = C.getPointerType(Ty); 3798 if (isAllocatableDecl(VD)) 3799 Ty = C.getPointerType(Ty); 3800 Args.push_back(ImplicitParamDecl::Create( 3801 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3802 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3803 ImplicitParamDecl::Other)); 3804 PrivateVarsPos[VD] = Counter; 3805 ++Counter; 3806 } 3807 const auto &TaskPrivatesMapFnInfo = 3808 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3809 llvm::FunctionType *TaskPrivatesMapTy = 3810 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3811 std::string Name = 3812 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3813 auto *TaskPrivatesMap = llvm::Function::Create( 3814 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3815 &CGM.getModule()); 3816 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3817 TaskPrivatesMapFnInfo); 3818 if (CGM.getLangOpts().Optimize) { 3819 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3820 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3821 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3822 } 3823 CodeGenFunction CGF(CGM); 3824 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3825 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3826 3827 // *privi = &.privates.privi; 3828 LValue Base = CGF.EmitLoadOfPointerLValue( 3829 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3830 TaskPrivatesArg.getType()->castAs<PointerType>()); 3831 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3832 Counter = 0; 3833 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3834 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3835 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3836 LValue RefLVal = 3837 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3838 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3839 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3840 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3841 ++Counter; 3842 } 3843 CGF.FinishFunction(); 3844 return TaskPrivatesMap; 3845 } 3846 3847 /// Emit initialization for private variables in task-based directives. 3848 static void emitPrivatesInit(CodeGenFunction &CGF, 3849 const OMPExecutableDirective &D, 3850 Address KmpTaskSharedsPtr, LValue TDBase, 3851 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3852 QualType SharedsTy, QualType SharedsPtrTy, 3853 const OMPTaskDataTy &Data, 3854 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3855 ASTContext &C = CGF.getContext(); 3856 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3857 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3858 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3859 ? OMPD_taskloop 3860 : OMPD_task; 3861 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3862 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3863 LValue SrcBase; 3864 bool IsTargetTask = 3865 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3866 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3867 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3868 // PointersArray, SizesArray, and MappersArray. The original variables for 3869 // these arrays are not captured and we get their addresses explicitly. 3870 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3871 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3872 SrcBase = CGF.MakeAddrLValue( 3873 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3874 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3875 SharedsTy); 3876 } 3877 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3878 for (const PrivateDataTy &Pair : Privates) { 3879 // Do not initialize private locals. 3880 if (Pair.second.isLocalPrivate()) { 3881 ++FI; 3882 continue; 3883 } 3884 const VarDecl *VD = Pair.second.PrivateCopy; 3885 const Expr *Init = VD->getAnyInitializer(); 3886 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3887 !CGF.isTrivialInitializer(Init)))) { 3888 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3889 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3890 const VarDecl *OriginalVD = Pair.second.Original; 3891 // Check if the variable is the target-based BasePointersArray, 3892 // PointersArray, SizesArray, or MappersArray. 3893 LValue SharedRefLValue; 3894 QualType Type = PrivateLValue.getType(); 3895 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3896 if (IsTargetTask && !SharedField) { 3897 assert(isa<ImplicitParamDecl>(OriginalVD) && 3898 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3899 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3900 ->getNumParams() == 0 && 3901 isa<TranslationUnitDecl>( 3902 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3903 ->getDeclContext()) && 3904 "Expected artificial target data variable."); 3905 SharedRefLValue = 3906 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3907 } else if (ForDup) { 3908 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3909 SharedRefLValue = CGF.MakeAddrLValue( 3910 Address(SharedRefLValue.getPointer(CGF), 3911 C.getDeclAlign(OriginalVD)), 3912 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3913 SharedRefLValue.getTBAAInfo()); 3914 } else if (CGF.LambdaCaptureFields.count( 3915 Pair.second.Original->getCanonicalDecl()) > 0 || 3916 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3917 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3918 } else { 3919 // Processing for implicitly captured variables. 3920 InlinedOpenMPRegionRAII Region( 3921 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3922 /*HasCancel=*/false, /*NoInheritance=*/true); 3923 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3924 } 3925 if (Type->isArrayType()) { 3926 // Initialize firstprivate array. 3927 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3928 // Perform simple memcpy. 3929 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3930 } else { 3931 // Initialize firstprivate array using element-by-element 3932 // initialization. 3933 CGF.EmitOMPAggregateAssign( 3934 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3935 Type, 3936 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3937 Address SrcElement) { 3938 // Clean up any temporaries needed by the initialization. 3939 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3940 InitScope.addPrivate( 3941 Elem, [SrcElement]() -> Address { return SrcElement; }); 3942 (void)InitScope.Privatize(); 3943 // Emit initialization for single element. 3944 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3945 CGF, &CapturesInfo); 3946 CGF.EmitAnyExprToMem(Init, DestElement, 3947 Init->getType().getQualifiers(), 3948 /*IsInitializer=*/false); 3949 }); 3950 } 3951 } else { 3952 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3953 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3954 return SharedRefLValue.getAddress(CGF); 3955 }); 3956 (void)InitScope.Privatize(); 3957 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3958 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3959 /*capturedByInit=*/false); 3960 } 3961 } else { 3962 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3963 } 3964 } 3965 ++FI; 3966 } 3967 } 3968 3969 /// Check if duplication function is required for taskloops. 3970 static bool checkInitIsRequired(CodeGenFunction &CGF, 3971 ArrayRef<PrivateDataTy> Privates) { 3972 bool InitRequired = false; 3973 for (const PrivateDataTy &Pair : Privates) { 3974 if (Pair.second.isLocalPrivate()) 3975 continue; 3976 const VarDecl *VD = Pair.second.PrivateCopy; 3977 const Expr *Init = VD->getAnyInitializer(); 3978 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3979 !CGF.isTrivialInitializer(Init)); 3980 if (InitRequired) 3981 break; 3982 } 3983 return InitRequired; 3984 } 3985 3986 3987 /// Emit task_dup function (for initialization of 3988 /// private/firstprivate/lastprivate vars and last_iter flag) 3989 /// \code 3990 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3991 /// lastpriv) { 3992 /// // setup lastprivate flag 3993 /// task_dst->last = lastpriv; 3994 /// // could be constructor calls here... 3995 /// } 3996 /// \endcode 3997 static llvm::Value * 3998 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3999 const OMPExecutableDirective &D, 4000 QualType KmpTaskTWithPrivatesPtrQTy, 4001 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4002 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4003 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4004 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4005 ASTContext &C = CGM.getContext(); 4006 FunctionArgList Args; 4007 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4008 KmpTaskTWithPrivatesPtrQTy, 4009 ImplicitParamDecl::Other); 4010 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4011 KmpTaskTWithPrivatesPtrQTy, 4012 ImplicitParamDecl::Other); 4013 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4014 ImplicitParamDecl::Other); 4015 Args.push_back(&DstArg); 4016 Args.push_back(&SrcArg); 4017 Args.push_back(&LastprivArg); 4018 const auto &TaskDupFnInfo = 4019 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4020 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4021 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4022 auto *TaskDup = llvm::Function::Create( 4023 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4024 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4025 TaskDup->setDoesNotRecurse(); 4026 CodeGenFunction CGF(CGM); 4027 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4028 Loc); 4029 4030 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4031 CGF.GetAddrOfLocalVar(&DstArg), 4032 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4033 // task_dst->liter = lastpriv; 4034 if (WithLastIter) { 4035 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4036 LValue Base = CGF.EmitLValueForField( 4037 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4038 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4039 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4040 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4041 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4042 } 4043 4044 // Emit initial values for private copies (if any). 4045 assert(!Privates.empty()); 4046 Address KmpTaskSharedsPtr = Address::invalid(); 4047 if (!Data.FirstprivateVars.empty()) { 4048 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4049 CGF.GetAddrOfLocalVar(&SrcArg), 4050 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4051 LValue Base = CGF.EmitLValueForField( 4052 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4053 KmpTaskSharedsPtr = Address( 4054 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4055 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4056 KmpTaskTShareds)), 4057 Loc), 4058 CGM.getNaturalTypeAlignment(SharedsTy)); 4059 } 4060 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4061 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4062 CGF.FinishFunction(); 4063 return TaskDup; 4064 } 4065 4066 /// Checks if destructor function is required to be generated. 4067 /// \return true if cleanups are required, false otherwise. 4068 static bool 4069 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4070 ArrayRef<PrivateDataTy> Privates) { 4071 for (const PrivateDataTy &P : Privates) { 4072 if (P.second.isLocalPrivate()) 4073 continue; 4074 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4075 if (Ty.isDestructedType()) 4076 return true; 4077 } 4078 return false; 4079 } 4080 4081 namespace { 4082 /// Loop generator for OpenMP iterator expression. 4083 class OMPIteratorGeneratorScope final 4084 : public CodeGenFunction::OMPPrivateScope { 4085 CodeGenFunction &CGF; 4086 const OMPIteratorExpr *E = nullptr; 4087 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4088 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4089 OMPIteratorGeneratorScope() = delete; 4090 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4091 4092 public: 4093 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4094 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4095 if (!E) 4096 return; 4097 SmallVector<llvm::Value *, 4> Uppers; 4098 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4099 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4100 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4101 addPrivate(VD, [&CGF, VD]() { 4102 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4103 }); 4104 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4105 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4106 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4107 "counter.addr"); 4108 }); 4109 } 4110 Privatize(); 4111 4112 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4113 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4114 LValue CLVal = 4115 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4116 HelperData.CounterVD->getType()); 4117 // Counter = 0; 4118 CGF.EmitStoreOfScalar( 4119 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4120 CLVal); 4121 CodeGenFunction::JumpDest &ContDest = 4122 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4123 CodeGenFunction::JumpDest &ExitDest = 4124 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4125 // N = <number-of_iterations>; 4126 llvm::Value *N = Uppers[I]; 4127 // cont: 4128 // if (Counter < N) goto body; else goto exit; 4129 CGF.EmitBlock(ContDest.getBlock()); 4130 auto *CVal = 4131 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4132 llvm::Value *Cmp = 4133 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4134 ? CGF.Builder.CreateICmpSLT(CVal, N) 4135 : CGF.Builder.CreateICmpULT(CVal, N); 4136 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4137 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4138 // body: 4139 CGF.EmitBlock(BodyBB); 4140 // Iteri = Begini + Counter * Stepi; 4141 CGF.EmitIgnoredExpr(HelperData.Update); 4142 } 4143 } 4144 ~OMPIteratorGeneratorScope() { 4145 if (!E) 4146 return; 4147 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4148 // Counter = Counter + 1; 4149 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4150 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4151 // goto cont; 4152 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4153 // exit: 4154 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4155 } 4156 } 4157 }; 4158 } // namespace 4159 4160 static std::pair<llvm::Value *, llvm::Value *> 4161 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4162 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4163 llvm::Value *Addr; 4164 if (OASE) { 4165 const Expr *Base = OASE->getBase(); 4166 Addr = CGF.EmitScalarExpr(Base); 4167 } else { 4168 Addr = CGF.EmitLValue(E).getPointer(CGF); 4169 } 4170 llvm::Value *SizeVal; 4171 QualType Ty = E->getType(); 4172 if (OASE) { 4173 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4174 for (const Expr *SE : OASE->getDimensions()) { 4175 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4176 Sz = CGF.EmitScalarConversion( 4177 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4178 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4179 } 4180 } else if (const auto *ASE = 4181 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4182 LValue UpAddrLVal = 4183 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4184 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4185 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4186 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4187 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4188 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4189 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4190 } else { 4191 SizeVal = CGF.getTypeSize(Ty); 4192 } 4193 return std::make_pair(Addr, SizeVal); 4194 } 4195 4196 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4197 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4198 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4199 if (KmpTaskAffinityInfoTy.isNull()) { 4200 RecordDecl *KmpAffinityInfoRD = 4201 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4202 KmpAffinityInfoRD->startDefinition(); 4203 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4204 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4205 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4206 KmpAffinityInfoRD->completeDefinition(); 4207 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4208 } 4209 } 4210 4211 CGOpenMPRuntime::TaskResultTy 4212 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4213 const OMPExecutableDirective &D, 4214 llvm::Function *TaskFunction, QualType SharedsTy, 4215 Address Shareds, const OMPTaskDataTy &Data) { 4216 ASTContext &C = CGM.getContext(); 4217 llvm::SmallVector<PrivateDataTy, 4> Privates; 4218 // Aggregate privates and sort them by the alignment. 4219 const auto *I = Data.PrivateCopies.begin(); 4220 for (const Expr *E : Data.PrivateVars) { 4221 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4222 Privates.emplace_back( 4223 C.getDeclAlign(VD), 4224 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4225 /*PrivateElemInit=*/nullptr)); 4226 ++I; 4227 } 4228 I = Data.FirstprivateCopies.begin(); 4229 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4230 for (const Expr *E : Data.FirstprivateVars) { 4231 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4232 Privates.emplace_back( 4233 C.getDeclAlign(VD), 4234 PrivateHelpersTy( 4235 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4236 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4237 ++I; 4238 ++IElemInitRef; 4239 } 4240 I = Data.LastprivateCopies.begin(); 4241 for (const Expr *E : Data.LastprivateVars) { 4242 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4243 Privates.emplace_back( 4244 C.getDeclAlign(VD), 4245 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4246 /*PrivateElemInit=*/nullptr)); 4247 ++I; 4248 } 4249 for (const VarDecl *VD : Data.PrivateLocals) { 4250 if (isAllocatableDecl(VD)) 4251 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4252 else 4253 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4254 } 4255 llvm::stable_sort(Privates, 4256 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4257 return L.first > R.first; 4258 }); 4259 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4260 // Build type kmp_routine_entry_t (if not built yet). 4261 emitKmpRoutineEntryT(KmpInt32Ty); 4262 // Build type kmp_task_t (if not built yet). 4263 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4264 if (SavedKmpTaskloopTQTy.isNull()) { 4265 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4266 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4267 } 4268 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4269 } else { 4270 assert((D.getDirectiveKind() == OMPD_task || 4271 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4272 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4273 "Expected taskloop, task or target directive"); 4274 if (SavedKmpTaskTQTy.isNull()) { 4275 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4276 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4277 } 4278 KmpTaskTQTy = SavedKmpTaskTQTy; 4279 } 4280 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4281 // Build particular struct kmp_task_t for the given task. 4282 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4283 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4284 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4285 QualType KmpTaskTWithPrivatesPtrQTy = 4286 C.getPointerType(KmpTaskTWithPrivatesQTy); 4287 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4288 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4289 KmpTaskTWithPrivatesTy->getPointerTo(); 4290 llvm::Value *KmpTaskTWithPrivatesTySize = 4291 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4292 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4293 4294 // Emit initial values for private copies (if any). 4295 llvm::Value *TaskPrivatesMap = nullptr; 4296 llvm::Type *TaskPrivatesMapTy = 4297 std::next(TaskFunction->arg_begin(), 3)->getType(); 4298 if (!Privates.empty()) { 4299 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4300 TaskPrivatesMap = 4301 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4302 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4303 TaskPrivatesMap, TaskPrivatesMapTy); 4304 } else { 4305 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4306 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4307 } 4308 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4309 // kmp_task_t *tt); 4310 llvm::Function *TaskEntry = emitProxyTaskFunction( 4311 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4312 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4313 TaskPrivatesMap); 4314 4315 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4316 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4317 // kmp_routine_entry_t *task_entry); 4318 // Task flags. Format is taken from 4319 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4320 // description of kmp_tasking_flags struct. 4321 enum { 4322 TiedFlag = 0x1, 4323 FinalFlag = 0x2, 4324 DestructorsFlag = 0x8, 4325 PriorityFlag = 0x20, 4326 DetachableFlag = 0x40, 4327 }; 4328 unsigned Flags = Data.Tied ? TiedFlag : 0; 4329 bool NeedsCleanup = false; 4330 if (!Privates.empty()) { 4331 NeedsCleanup = 4332 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4333 if (NeedsCleanup) 4334 Flags = Flags | DestructorsFlag; 4335 } 4336 if (Data.Priority.getInt()) 4337 Flags = Flags | PriorityFlag; 4338 if (D.hasClausesOfKind<OMPDetachClause>()) 4339 Flags = Flags | DetachableFlag; 4340 llvm::Value *TaskFlags = 4341 Data.Final.getPointer() 4342 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4343 CGF.Builder.getInt32(FinalFlag), 4344 CGF.Builder.getInt32(/*C=*/0)) 4345 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4346 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4347 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4348 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4349 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4350 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4351 TaskEntry, KmpRoutineEntryPtrTy)}; 4352 llvm::Value *NewTask; 4353 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4354 // Check if we have any device clause associated with the directive. 4355 const Expr *Device = nullptr; 4356 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4357 Device = C->getDevice(); 4358 // Emit device ID if any otherwise use default value. 4359 llvm::Value *DeviceID; 4360 if (Device) 4361 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4362 CGF.Int64Ty, /*isSigned=*/true); 4363 else 4364 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4365 AllocArgs.push_back(DeviceID); 4366 NewTask = CGF.EmitRuntimeCall( 4367 OMPBuilder.getOrCreateRuntimeFunction( 4368 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4369 AllocArgs); 4370 } else { 4371 NewTask = 4372 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4373 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4374 AllocArgs); 4375 } 4376 // Emit detach clause initialization. 4377 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4378 // task_descriptor); 4379 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4380 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4381 LValue EvtLVal = CGF.EmitLValue(Evt); 4382 4383 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4384 // int gtid, kmp_task_t *task); 4385 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4386 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4387 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4388 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4389 OMPBuilder.getOrCreateRuntimeFunction( 4390 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4391 {Loc, Tid, NewTask}); 4392 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4393 Evt->getExprLoc()); 4394 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4395 } 4396 // Process affinity clauses. 4397 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4398 // Process list of affinity data. 4399 ASTContext &C = CGM.getContext(); 4400 Address AffinitiesArray = Address::invalid(); 4401 // Calculate number of elements to form the array of affinity data. 4402 llvm::Value *NumOfElements = nullptr; 4403 unsigned NumAffinities = 0; 4404 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4405 if (const Expr *Modifier = C->getModifier()) { 4406 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4407 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4408 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4409 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4410 NumOfElements = 4411 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4412 } 4413 } else { 4414 NumAffinities += C->varlist_size(); 4415 } 4416 } 4417 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4418 // Fields ids in kmp_task_affinity_info record. 4419 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4420 4421 QualType KmpTaskAffinityInfoArrayTy; 4422 if (NumOfElements) { 4423 NumOfElements = CGF.Builder.CreateNUWAdd( 4424 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4425 auto *OVE = new (C) OpaqueValueExpr( 4426 Loc, 4427 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4428 VK_PRValue); 4429 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4430 RValue::get(NumOfElements)); 4431 KmpTaskAffinityInfoArrayTy = 4432 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4433 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4434 // Properly emit variable-sized array. 4435 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4436 ImplicitParamDecl::Other); 4437 CGF.EmitVarDecl(*PD); 4438 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4439 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4440 /*isSigned=*/false); 4441 } else { 4442 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4443 KmpTaskAffinityInfoTy, 4444 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4445 ArrayType::Normal, /*IndexTypeQuals=*/0); 4446 AffinitiesArray = 4447 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4448 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4449 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4450 /*isSigned=*/false); 4451 } 4452 4453 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4454 // Fill array by elements without iterators. 4455 unsigned Pos = 0; 4456 bool HasIterator = false; 4457 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4458 if (C->getModifier()) { 4459 HasIterator = true; 4460 continue; 4461 } 4462 for (const Expr *E : C->varlists()) { 4463 llvm::Value *Addr; 4464 llvm::Value *Size; 4465 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4466 LValue Base = 4467 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4468 KmpTaskAffinityInfoTy); 4469 // affs[i].base_addr = &<Affinities[i].second>; 4470 LValue BaseAddrLVal = CGF.EmitLValueForField( 4471 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4472 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4473 BaseAddrLVal); 4474 // affs[i].len = sizeof(<Affinities[i].second>); 4475 LValue LenLVal = CGF.EmitLValueForField( 4476 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4477 CGF.EmitStoreOfScalar(Size, LenLVal); 4478 ++Pos; 4479 } 4480 } 4481 LValue PosLVal; 4482 if (HasIterator) { 4483 PosLVal = CGF.MakeAddrLValue( 4484 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4485 C.getSizeType()); 4486 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4487 } 4488 // Process elements with iterators. 4489 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4490 const Expr *Modifier = C->getModifier(); 4491 if (!Modifier) 4492 continue; 4493 OMPIteratorGeneratorScope IteratorScope( 4494 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4495 for (const Expr *E : C->varlists()) { 4496 llvm::Value *Addr; 4497 llvm::Value *Size; 4498 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4499 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4500 LValue Base = CGF.MakeAddrLValue( 4501 Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(), 4502 AffinitiesArray.getPointer(), Idx), 4503 AffinitiesArray.getAlignment()), 4504 KmpTaskAffinityInfoTy); 4505 // affs[i].base_addr = &<Affinities[i].second>; 4506 LValue BaseAddrLVal = CGF.EmitLValueForField( 4507 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4508 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4509 BaseAddrLVal); 4510 // affs[i].len = sizeof(<Affinities[i].second>); 4511 LValue LenLVal = CGF.EmitLValueForField( 4512 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4513 CGF.EmitStoreOfScalar(Size, LenLVal); 4514 Idx = CGF.Builder.CreateNUWAdd( 4515 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4516 CGF.EmitStoreOfScalar(Idx, PosLVal); 4517 } 4518 } 4519 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4520 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4521 // naffins, kmp_task_affinity_info_t *affin_list); 4522 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4523 llvm::Value *GTid = getThreadID(CGF, Loc); 4524 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4525 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4526 // FIXME: Emit the function and ignore its result for now unless the 4527 // runtime function is properly implemented. 4528 (void)CGF.EmitRuntimeCall( 4529 OMPBuilder.getOrCreateRuntimeFunction( 4530 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4531 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4532 } 4533 llvm::Value *NewTaskNewTaskTTy = 4534 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4535 NewTask, KmpTaskTWithPrivatesPtrTy); 4536 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4537 KmpTaskTWithPrivatesQTy); 4538 LValue TDBase = 4539 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4540 // Fill the data in the resulting kmp_task_t record. 4541 // Copy shareds if there are any. 4542 Address KmpTaskSharedsPtr = Address::invalid(); 4543 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4544 KmpTaskSharedsPtr = 4545 Address(CGF.EmitLoadOfScalar( 4546 CGF.EmitLValueForField( 4547 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4548 KmpTaskTShareds)), 4549 Loc), 4550 CGM.getNaturalTypeAlignment(SharedsTy)); 4551 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4552 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4553 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4554 } 4555 // Emit initial values for private copies (if any). 4556 TaskResultTy Result; 4557 if (!Privates.empty()) { 4558 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4559 SharedsTy, SharedsPtrTy, Data, Privates, 4560 /*ForDup=*/false); 4561 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4562 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4563 Result.TaskDupFn = emitTaskDupFunction( 4564 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4565 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4566 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4567 } 4568 } 4569 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4570 enum { Priority = 0, Destructors = 1 }; 4571 // Provide pointer to function with destructors for privates. 4572 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4573 const RecordDecl *KmpCmplrdataUD = 4574 (*FI)->getType()->getAsUnionType()->getDecl(); 4575 if (NeedsCleanup) { 4576 llvm::Value *DestructorFn = emitDestructorsFunction( 4577 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4578 KmpTaskTWithPrivatesQTy); 4579 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4580 LValue DestructorsLV = CGF.EmitLValueForField( 4581 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4582 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4583 DestructorFn, KmpRoutineEntryPtrTy), 4584 DestructorsLV); 4585 } 4586 // Set priority. 4587 if (Data.Priority.getInt()) { 4588 LValue Data2LV = CGF.EmitLValueForField( 4589 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4590 LValue PriorityLV = CGF.EmitLValueForField( 4591 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4592 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4593 } 4594 Result.NewTask = NewTask; 4595 Result.TaskEntry = TaskEntry; 4596 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4597 Result.TDBase = TDBase; 4598 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4599 return Result; 4600 } 4601 4602 namespace { 4603 /// Dependence kind for RTL. 4604 enum RTLDependenceKindTy { 4605 DepIn = 0x01, 4606 DepInOut = 0x3, 4607 DepMutexInOutSet = 0x4 4608 }; 4609 /// Fields ids in kmp_depend_info record. 4610 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4611 } // namespace 4612 4613 /// Translates internal dependency kind into the runtime kind. 4614 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4615 RTLDependenceKindTy DepKind; 4616 switch (K) { 4617 case OMPC_DEPEND_in: 4618 DepKind = DepIn; 4619 break; 4620 // Out and InOut dependencies must use the same code. 4621 case OMPC_DEPEND_out: 4622 case OMPC_DEPEND_inout: 4623 DepKind = DepInOut; 4624 break; 4625 case OMPC_DEPEND_mutexinoutset: 4626 DepKind = DepMutexInOutSet; 4627 break; 4628 case OMPC_DEPEND_source: 4629 case OMPC_DEPEND_sink: 4630 case OMPC_DEPEND_depobj: 4631 case OMPC_DEPEND_unknown: 4632 llvm_unreachable("Unknown task dependence type"); 4633 } 4634 return DepKind; 4635 } 4636 4637 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4638 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4639 QualType &FlagsTy) { 4640 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4641 if (KmpDependInfoTy.isNull()) { 4642 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4643 KmpDependInfoRD->startDefinition(); 4644 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4645 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4646 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4647 KmpDependInfoRD->completeDefinition(); 4648 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4649 } 4650 } 4651 4652 std::pair<llvm::Value *, LValue> 4653 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4654 SourceLocation Loc) { 4655 ASTContext &C = CGM.getContext(); 4656 QualType FlagsTy; 4657 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4658 RecordDecl *KmpDependInfoRD = 4659 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4660 LValue Base = CGF.EmitLoadOfPointerLValue( 4661 DepobjLVal.getAddress(CGF), 4662 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4663 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4664 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4665 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4666 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4667 Base.getTBAAInfo()); 4668 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4669 Addr.getElementType(), Addr.getPointer(), 4670 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4671 LValue NumDepsBase = CGF.MakeAddrLValue( 4672 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4673 Base.getBaseInfo(), Base.getTBAAInfo()); 4674 // NumDeps = deps[i].base_addr; 4675 LValue BaseAddrLVal = CGF.EmitLValueForField( 4676 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4677 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4678 return std::make_pair(NumDeps, Base); 4679 } 4680 4681 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4682 llvm::PointerUnion<unsigned *, LValue *> Pos, 4683 const OMPTaskDataTy::DependData &Data, 4684 Address DependenciesArray) { 4685 CodeGenModule &CGM = CGF.CGM; 4686 ASTContext &C = CGM.getContext(); 4687 QualType FlagsTy; 4688 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4689 RecordDecl *KmpDependInfoRD = 4690 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4691 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4692 4693 OMPIteratorGeneratorScope IteratorScope( 4694 CGF, cast_or_null<OMPIteratorExpr>( 4695 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4696 : nullptr)); 4697 for (const Expr *E : Data.DepExprs) { 4698 llvm::Value *Addr; 4699 llvm::Value *Size; 4700 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4701 LValue Base; 4702 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4703 Base = CGF.MakeAddrLValue( 4704 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4705 } else { 4706 LValue &PosLVal = *Pos.get<LValue *>(); 4707 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4708 Base = CGF.MakeAddrLValue( 4709 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), 4710 DependenciesArray.getPointer(), Idx), 4711 DependenciesArray.getAlignment()), 4712 KmpDependInfoTy); 4713 } 4714 // deps[i].base_addr = &<Dependencies[i].second>; 4715 LValue BaseAddrLVal = CGF.EmitLValueForField( 4716 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4717 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4718 BaseAddrLVal); 4719 // deps[i].len = sizeof(<Dependencies[i].second>); 4720 LValue LenLVal = CGF.EmitLValueForField( 4721 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4722 CGF.EmitStoreOfScalar(Size, LenLVal); 4723 // deps[i].flags = <Dependencies[i].first>; 4724 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4725 LValue FlagsLVal = CGF.EmitLValueForField( 4726 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4727 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4728 FlagsLVal); 4729 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4730 ++(*P); 4731 } else { 4732 LValue &PosLVal = *Pos.get<LValue *>(); 4733 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4734 Idx = CGF.Builder.CreateNUWAdd(Idx, 4735 llvm::ConstantInt::get(Idx->getType(), 1)); 4736 CGF.EmitStoreOfScalar(Idx, PosLVal); 4737 } 4738 } 4739 } 4740 4741 static SmallVector<llvm::Value *, 4> 4742 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4743 const OMPTaskDataTy::DependData &Data) { 4744 assert(Data.DepKind == OMPC_DEPEND_depobj && 4745 "Expected depobj dependecy kind."); 4746 SmallVector<llvm::Value *, 4> Sizes; 4747 SmallVector<LValue, 4> SizeLVals; 4748 ASTContext &C = CGF.getContext(); 4749 QualType FlagsTy; 4750 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4751 RecordDecl *KmpDependInfoRD = 4752 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4753 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4754 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4755 { 4756 OMPIteratorGeneratorScope IteratorScope( 4757 CGF, cast_or_null<OMPIteratorExpr>( 4758 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4759 : nullptr)); 4760 for (const Expr *E : Data.DepExprs) { 4761 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4762 LValue Base = CGF.EmitLoadOfPointerLValue( 4763 DepobjLVal.getAddress(CGF), 4764 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4765 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4766 Base.getAddress(CGF), KmpDependInfoPtrT); 4767 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4768 Base.getTBAAInfo()); 4769 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4770 Addr.getElementType(), Addr.getPointer(), 4771 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4772 LValue NumDepsBase = CGF.MakeAddrLValue( 4773 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4774 Base.getBaseInfo(), Base.getTBAAInfo()); 4775 // NumDeps = deps[i].base_addr; 4776 LValue BaseAddrLVal = CGF.EmitLValueForField( 4777 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4778 llvm::Value *NumDeps = 4779 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4780 LValue NumLVal = CGF.MakeAddrLValue( 4781 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4782 C.getUIntPtrType()); 4783 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4784 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4785 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4786 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4787 CGF.EmitStoreOfScalar(Add, NumLVal); 4788 SizeLVals.push_back(NumLVal); 4789 } 4790 } 4791 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4792 llvm::Value *Size = 4793 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4794 Sizes.push_back(Size); 4795 } 4796 return Sizes; 4797 } 4798 4799 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4800 LValue PosLVal, 4801 const OMPTaskDataTy::DependData &Data, 4802 Address DependenciesArray) { 4803 assert(Data.DepKind == OMPC_DEPEND_depobj && 4804 "Expected depobj dependecy kind."); 4805 ASTContext &C = CGF.getContext(); 4806 QualType FlagsTy; 4807 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4808 RecordDecl *KmpDependInfoRD = 4809 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4810 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4811 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4812 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4813 { 4814 OMPIteratorGeneratorScope IteratorScope( 4815 CGF, cast_or_null<OMPIteratorExpr>( 4816 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4817 : nullptr)); 4818 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4819 const Expr *E = Data.DepExprs[I]; 4820 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4821 LValue Base = CGF.EmitLoadOfPointerLValue( 4822 DepobjLVal.getAddress(CGF), 4823 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4824 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4825 Base.getAddress(CGF), KmpDependInfoPtrT); 4826 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4827 Base.getTBAAInfo()); 4828 4829 // Get number of elements in a single depobj. 4830 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4831 Addr.getElementType(), Addr.getPointer(), 4832 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4833 LValue NumDepsBase = CGF.MakeAddrLValue( 4834 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4835 Base.getBaseInfo(), Base.getTBAAInfo()); 4836 // NumDeps = deps[i].base_addr; 4837 LValue BaseAddrLVal = CGF.EmitLValueForField( 4838 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4839 llvm::Value *NumDeps = 4840 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4841 4842 // memcopy dependency data. 4843 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4844 ElSize, 4845 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4846 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4847 Address DepAddr = 4848 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), 4849 DependenciesArray.getPointer(), Pos), 4850 DependenciesArray.getAlignment()); 4851 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4852 4853 // Increase pos. 4854 // pos += size; 4855 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4856 CGF.EmitStoreOfScalar(Add, PosLVal); 4857 } 4858 } 4859 } 4860 4861 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4862 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4863 SourceLocation Loc) { 4864 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4865 return D.DepExprs.empty(); 4866 })) 4867 return std::make_pair(nullptr, Address::invalid()); 4868 // Process list of dependencies. 4869 ASTContext &C = CGM.getContext(); 4870 Address DependenciesArray = Address::invalid(); 4871 llvm::Value *NumOfElements = nullptr; 4872 unsigned NumDependencies = std::accumulate( 4873 Dependencies.begin(), Dependencies.end(), 0, 4874 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4875 return D.DepKind == OMPC_DEPEND_depobj 4876 ? V 4877 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4878 }); 4879 QualType FlagsTy; 4880 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4881 bool HasDepobjDeps = false; 4882 bool HasRegularWithIterators = false; 4883 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4884 llvm::Value *NumOfRegularWithIterators = 4885 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4886 // Calculate number of depobj dependecies and regular deps with the iterators. 4887 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4888 if (D.DepKind == OMPC_DEPEND_depobj) { 4889 SmallVector<llvm::Value *, 4> Sizes = 4890 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4891 for (llvm::Value *Size : Sizes) { 4892 NumOfDepobjElements = 4893 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4894 } 4895 HasDepobjDeps = true; 4896 continue; 4897 } 4898 // Include number of iterations, if any. 4899 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4900 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4901 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4902 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4903 NumOfRegularWithIterators = 4904 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4905 } 4906 HasRegularWithIterators = true; 4907 continue; 4908 } 4909 } 4910 4911 QualType KmpDependInfoArrayTy; 4912 if (HasDepobjDeps || HasRegularWithIterators) { 4913 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4914 /*isSigned=*/false); 4915 if (HasDepobjDeps) { 4916 NumOfElements = 4917 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4918 } 4919 if (HasRegularWithIterators) { 4920 NumOfElements = 4921 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4922 } 4923 auto *OVE = new (C) OpaqueValueExpr( 4924 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4925 VK_PRValue); 4926 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4927 RValue::get(NumOfElements)); 4928 KmpDependInfoArrayTy = 4929 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4930 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4931 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4932 // Properly emit variable-sized array. 4933 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4934 ImplicitParamDecl::Other); 4935 CGF.EmitVarDecl(*PD); 4936 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4937 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4938 /*isSigned=*/false); 4939 } else { 4940 KmpDependInfoArrayTy = C.getConstantArrayType( 4941 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4942 ArrayType::Normal, /*IndexTypeQuals=*/0); 4943 DependenciesArray = 4944 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4945 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4946 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4947 /*isSigned=*/false); 4948 } 4949 unsigned Pos = 0; 4950 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4951 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4952 Dependencies[I].IteratorExpr) 4953 continue; 4954 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4955 DependenciesArray); 4956 } 4957 // Copy regular dependecies with iterators. 4958 LValue PosLVal = CGF.MakeAddrLValue( 4959 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4960 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4961 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4962 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4963 !Dependencies[I].IteratorExpr) 4964 continue; 4965 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4966 DependenciesArray); 4967 } 4968 // Copy final depobj arrays without iterators. 4969 if (HasDepobjDeps) { 4970 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4971 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4972 continue; 4973 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4974 DependenciesArray); 4975 } 4976 } 4977 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4978 DependenciesArray, CGF.VoidPtrTy); 4979 return std::make_pair(NumOfElements, DependenciesArray); 4980 } 4981 4982 Address CGOpenMPRuntime::emitDepobjDependClause( 4983 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4984 SourceLocation Loc) { 4985 if (Dependencies.DepExprs.empty()) 4986 return Address::invalid(); 4987 // Process list of dependencies. 4988 ASTContext &C = CGM.getContext(); 4989 Address DependenciesArray = Address::invalid(); 4990 unsigned NumDependencies = Dependencies.DepExprs.size(); 4991 QualType FlagsTy; 4992 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4993 RecordDecl *KmpDependInfoRD = 4994 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4995 4996 llvm::Value *Size; 4997 // Define type kmp_depend_info[<Dependencies.size()>]; 4998 // For depobj reserve one extra element to store the number of elements. 4999 // It is required to handle depobj(x) update(in) construct. 5000 // kmp_depend_info[<Dependencies.size()>] deps; 5001 llvm::Value *NumDepsVal; 5002 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 5003 if (const auto *IE = 5004 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 5005 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 5006 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5007 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5008 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 5009 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 5010 } 5011 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 5012 NumDepsVal); 5013 CharUnits SizeInBytes = 5014 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5015 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5016 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5017 NumDepsVal = 5018 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5019 } else { 5020 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5021 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5022 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5023 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5024 Size = CGM.getSize(Sz.alignTo(Align)); 5025 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5026 } 5027 // Need to allocate on the dynamic memory. 5028 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5029 // Use default allocator. 5030 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5031 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5032 5033 llvm::Value *Addr = 5034 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5035 CGM.getModule(), OMPRTL___kmpc_alloc), 5036 Args, ".dep.arr.addr"); 5037 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5038 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5039 DependenciesArray = Address(Addr, Align); 5040 // Write number of elements in the first element of array for depobj. 5041 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5042 // deps[i].base_addr = NumDependencies; 5043 LValue BaseAddrLVal = CGF.EmitLValueForField( 5044 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5045 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5046 llvm::PointerUnion<unsigned *, LValue *> Pos; 5047 unsigned Idx = 1; 5048 LValue PosLVal; 5049 if (Dependencies.IteratorExpr) { 5050 PosLVal = CGF.MakeAddrLValue( 5051 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5052 C.getSizeType()); 5053 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5054 /*IsInit=*/true); 5055 Pos = &PosLVal; 5056 } else { 5057 Pos = &Idx; 5058 } 5059 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5060 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5061 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5062 return DependenciesArray; 5063 } 5064 5065 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5066 SourceLocation Loc) { 5067 ASTContext &C = CGM.getContext(); 5068 QualType FlagsTy; 5069 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5070 LValue Base = CGF.EmitLoadOfPointerLValue( 5071 DepobjLVal.getAddress(CGF), 5072 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5073 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5074 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5075 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5076 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5077 Addr.getElementType(), Addr.getPointer(), 5078 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5079 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5080 CGF.VoidPtrTy); 5081 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5082 // Use default allocator. 5083 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5084 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5085 5086 // _kmpc_free(gtid, addr, nullptr); 5087 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5088 CGM.getModule(), OMPRTL___kmpc_free), 5089 Args); 5090 } 5091 5092 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5093 OpenMPDependClauseKind NewDepKind, 5094 SourceLocation Loc) { 5095 ASTContext &C = CGM.getContext(); 5096 QualType FlagsTy; 5097 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5098 RecordDecl *KmpDependInfoRD = 5099 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5100 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5101 llvm::Value *NumDeps; 5102 LValue Base; 5103 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5104 5105 Address Begin = Base.getAddress(CGF); 5106 // Cast from pointer to array type to pointer to single element. 5107 llvm::Value *End = CGF.Builder.CreateGEP( 5108 Begin.getElementType(), Begin.getPointer(), NumDeps); 5109 // The basic structure here is a while-do loop. 5110 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5111 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5112 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5113 CGF.EmitBlock(BodyBB); 5114 llvm::PHINode *ElementPHI = 5115 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5116 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5117 Begin = Address(ElementPHI, Begin.getAlignment()); 5118 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5119 Base.getTBAAInfo()); 5120 // deps[i].flags = NewDepKind; 5121 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5122 LValue FlagsLVal = CGF.EmitLValueForField( 5123 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5124 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5125 FlagsLVal); 5126 5127 // Shift the address forward by one element. 5128 Address ElementNext = 5129 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5130 ElementPHI->addIncoming(ElementNext.getPointer(), 5131 CGF.Builder.GetInsertBlock()); 5132 llvm::Value *IsEmpty = 5133 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5134 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5135 // Done. 5136 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5137 } 5138 5139 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5140 const OMPExecutableDirective &D, 5141 llvm::Function *TaskFunction, 5142 QualType SharedsTy, Address Shareds, 5143 const Expr *IfCond, 5144 const OMPTaskDataTy &Data) { 5145 if (!CGF.HaveInsertPoint()) 5146 return; 5147 5148 TaskResultTy Result = 5149 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5150 llvm::Value *NewTask = Result.NewTask; 5151 llvm::Function *TaskEntry = Result.TaskEntry; 5152 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5153 LValue TDBase = Result.TDBase; 5154 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5155 // Process list of dependences. 5156 Address DependenciesArray = Address::invalid(); 5157 llvm::Value *NumOfElements; 5158 std::tie(NumOfElements, DependenciesArray) = 5159 emitDependClause(CGF, Data.Dependences, Loc); 5160 5161 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5162 // libcall. 5163 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5164 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5165 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5166 // list is not empty 5167 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5168 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5169 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5170 llvm::Value *DepTaskArgs[7]; 5171 if (!Data.Dependences.empty()) { 5172 DepTaskArgs[0] = UpLoc; 5173 DepTaskArgs[1] = ThreadID; 5174 DepTaskArgs[2] = NewTask; 5175 DepTaskArgs[3] = NumOfElements; 5176 DepTaskArgs[4] = DependenciesArray.getPointer(); 5177 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5178 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5179 } 5180 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5181 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5182 if (!Data.Tied) { 5183 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5184 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5185 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5186 } 5187 if (!Data.Dependences.empty()) { 5188 CGF.EmitRuntimeCall( 5189 OMPBuilder.getOrCreateRuntimeFunction( 5190 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5191 DepTaskArgs); 5192 } else { 5193 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5194 CGM.getModule(), OMPRTL___kmpc_omp_task), 5195 TaskArgs); 5196 } 5197 // Check if parent region is untied and build return for untied task; 5198 if (auto *Region = 5199 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5200 Region->emitUntiedSwitch(CGF); 5201 }; 5202 5203 llvm::Value *DepWaitTaskArgs[6]; 5204 if (!Data.Dependences.empty()) { 5205 DepWaitTaskArgs[0] = UpLoc; 5206 DepWaitTaskArgs[1] = ThreadID; 5207 DepWaitTaskArgs[2] = NumOfElements; 5208 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5209 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5210 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5211 } 5212 auto &M = CGM.getModule(); 5213 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5214 TaskEntry, &Data, &DepWaitTaskArgs, 5215 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5216 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5217 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5218 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5219 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5220 // is specified. 5221 if (!Data.Dependences.empty()) 5222 CGF.EmitRuntimeCall( 5223 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5224 DepWaitTaskArgs); 5225 // Call proxy_task_entry(gtid, new_task); 5226 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5227 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5228 Action.Enter(CGF); 5229 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5230 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5231 OutlinedFnArgs); 5232 }; 5233 5234 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5235 // kmp_task_t *new_task); 5236 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5237 // kmp_task_t *new_task); 5238 RegionCodeGenTy RCG(CodeGen); 5239 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5240 M, OMPRTL___kmpc_omp_task_begin_if0), 5241 TaskArgs, 5242 OMPBuilder.getOrCreateRuntimeFunction( 5243 M, OMPRTL___kmpc_omp_task_complete_if0), 5244 TaskArgs); 5245 RCG.setAction(Action); 5246 RCG(CGF); 5247 }; 5248 5249 if (IfCond) { 5250 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5251 } else { 5252 RegionCodeGenTy ThenRCG(ThenCodeGen); 5253 ThenRCG(CGF); 5254 } 5255 } 5256 5257 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5258 const OMPLoopDirective &D, 5259 llvm::Function *TaskFunction, 5260 QualType SharedsTy, Address Shareds, 5261 const Expr *IfCond, 5262 const OMPTaskDataTy &Data) { 5263 if (!CGF.HaveInsertPoint()) 5264 return; 5265 TaskResultTy Result = 5266 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5267 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5268 // libcall. 5269 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5270 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5271 // sched, kmp_uint64 grainsize, void *task_dup); 5272 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5273 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5274 llvm::Value *IfVal; 5275 if (IfCond) { 5276 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5277 /*isSigned=*/true); 5278 } else { 5279 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5280 } 5281 5282 LValue LBLVal = CGF.EmitLValueForField( 5283 Result.TDBase, 5284 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5285 const auto *LBVar = 5286 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5287 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5288 LBLVal.getQuals(), 5289 /*IsInitializer=*/true); 5290 LValue UBLVal = CGF.EmitLValueForField( 5291 Result.TDBase, 5292 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5293 const auto *UBVar = 5294 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5295 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5296 UBLVal.getQuals(), 5297 /*IsInitializer=*/true); 5298 LValue StLVal = CGF.EmitLValueForField( 5299 Result.TDBase, 5300 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5301 const auto *StVar = 5302 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5303 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5304 StLVal.getQuals(), 5305 /*IsInitializer=*/true); 5306 // Store reductions address. 5307 LValue RedLVal = CGF.EmitLValueForField( 5308 Result.TDBase, 5309 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5310 if (Data.Reductions) { 5311 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5312 } else { 5313 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5314 CGF.getContext().VoidPtrTy); 5315 } 5316 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5317 llvm::Value *TaskArgs[] = { 5318 UpLoc, 5319 ThreadID, 5320 Result.NewTask, 5321 IfVal, 5322 LBLVal.getPointer(CGF), 5323 UBLVal.getPointer(CGF), 5324 CGF.EmitLoadOfScalar(StLVal, Loc), 5325 llvm::ConstantInt::getSigned( 5326 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5327 llvm::ConstantInt::getSigned( 5328 CGF.IntTy, Data.Schedule.getPointer() 5329 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5330 : NoSchedule), 5331 Data.Schedule.getPointer() 5332 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5333 /*isSigned=*/false) 5334 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5335 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5336 Result.TaskDupFn, CGF.VoidPtrTy) 5337 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5338 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5339 CGM.getModule(), OMPRTL___kmpc_taskloop), 5340 TaskArgs); 5341 } 5342 5343 /// Emit reduction operation for each element of array (required for 5344 /// array sections) LHS op = RHS. 5345 /// \param Type Type of array. 5346 /// \param LHSVar Variable on the left side of the reduction operation 5347 /// (references element of array in original variable). 5348 /// \param RHSVar Variable on the right side of the reduction operation 5349 /// (references element of array in original variable). 5350 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5351 /// RHSVar. 5352 static void EmitOMPAggregateReduction( 5353 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5354 const VarDecl *RHSVar, 5355 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5356 const Expr *, const Expr *)> &RedOpGen, 5357 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5358 const Expr *UpExpr = nullptr) { 5359 // Perform element-by-element initialization. 5360 QualType ElementTy; 5361 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5362 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5363 5364 // Drill down to the base element type on both arrays. 5365 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5366 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5367 5368 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5369 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5370 // Cast from pointer to array type to pointer to single element. 5371 llvm::Value *LHSEnd = 5372 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5373 // The basic structure here is a while-do loop. 5374 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5375 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5376 llvm::Value *IsEmpty = 5377 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5378 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5379 5380 // Enter the loop body, making that address the current address. 5381 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5382 CGF.EmitBlock(BodyBB); 5383 5384 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5385 5386 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5387 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5388 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5389 Address RHSElementCurrent = 5390 Address(RHSElementPHI, 5391 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5392 5393 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5394 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5395 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5396 Address LHSElementCurrent = 5397 Address(LHSElementPHI, 5398 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5399 5400 // Emit copy. 5401 CodeGenFunction::OMPPrivateScope Scope(CGF); 5402 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5403 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5404 Scope.Privatize(); 5405 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5406 Scope.ForceCleanup(); 5407 5408 // Shift the address forward by one element. 5409 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5410 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5411 "omp.arraycpy.dest.element"); 5412 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5413 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5414 "omp.arraycpy.src.element"); 5415 // Check whether we've reached the end. 5416 llvm::Value *Done = 5417 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5418 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5419 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5420 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5421 5422 // Done. 5423 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5424 } 5425 5426 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5427 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5428 /// UDR combiner function. 5429 static void emitReductionCombiner(CodeGenFunction &CGF, 5430 const Expr *ReductionOp) { 5431 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5432 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5433 if (const auto *DRE = 5434 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5435 if (const auto *DRD = 5436 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5437 std::pair<llvm::Function *, llvm::Function *> Reduction = 5438 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5439 RValue Func = RValue::get(Reduction.first); 5440 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5441 CGF.EmitIgnoredExpr(ReductionOp); 5442 return; 5443 } 5444 CGF.EmitIgnoredExpr(ReductionOp); 5445 } 5446 5447 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5448 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5449 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5450 ArrayRef<const Expr *> ReductionOps) { 5451 ASTContext &C = CGM.getContext(); 5452 5453 // void reduction_func(void *LHSArg, void *RHSArg); 5454 FunctionArgList Args; 5455 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5456 ImplicitParamDecl::Other); 5457 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5458 ImplicitParamDecl::Other); 5459 Args.push_back(&LHSArg); 5460 Args.push_back(&RHSArg); 5461 const auto &CGFI = 5462 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5463 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5464 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5465 llvm::GlobalValue::InternalLinkage, Name, 5466 &CGM.getModule()); 5467 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5468 Fn->setDoesNotRecurse(); 5469 CodeGenFunction CGF(CGM); 5470 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5471 5472 // Dst = (void*[n])(LHSArg); 5473 // Src = (void*[n])(RHSArg); 5474 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5475 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5476 ArgsType), CGF.getPointerAlign()); 5477 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5478 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5479 ArgsType), CGF.getPointerAlign()); 5480 5481 // ... 5482 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5483 // ... 5484 CodeGenFunction::OMPPrivateScope Scope(CGF); 5485 auto IPriv = Privates.begin(); 5486 unsigned Idx = 0; 5487 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5488 const auto *RHSVar = 5489 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5490 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5491 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5492 }); 5493 const auto *LHSVar = 5494 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5495 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5496 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5497 }); 5498 QualType PrivTy = (*IPriv)->getType(); 5499 if (PrivTy->isVariablyModifiedType()) { 5500 // Get array size and emit VLA type. 5501 ++Idx; 5502 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5503 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5504 const VariableArrayType *VLA = 5505 CGF.getContext().getAsVariableArrayType(PrivTy); 5506 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5507 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5508 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5509 CGF.EmitVariablyModifiedType(PrivTy); 5510 } 5511 } 5512 Scope.Privatize(); 5513 IPriv = Privates.begin(); 5514 auto ILHS = LHSExprs.begin(); 5515 auto IRHS = RHSExprs.begin(); 5516 for (const Expr *E : ReductionOps) { 5517 if ((*IPriv)->getType()->isArrayType()) { 5518 // Emit reduction for array section. 5519 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5520 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5521 EmitOMPAggregateReduction( 5522 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5523 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5524 emitReductionCombiner(CGF, E); 5525 }); 5526 } else { 5527 // Emit reduction for array subscript or single variable. 5528 emitReductionCombiner(CGF, E); 5529 } 5530 ++IPriv; 5531 ++ILHS; 5532 ++IRHS; 5533 } 5534 Scope.ForceCleanup(); 5535 CGF.FinishFunction(); 5536 return Fn; 5537 } 5538 5539 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5540 const Expr *ReductionOp, 5541 const Expr *PrivateRef, 5542 const DeclRefExpr *LHS, 5543 const DeclRefExpr *RHS) { 5544 if (PrivateRef->getType()->isArrayType()) { 5545 // Emit reduction for array section. 5546 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5547 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5548 EmitOMPAggregateReduction( 5549 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5550 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5551 emitReductionCombiner(CGF, ReductionOp); 5552 }); 5553 } else { 5554 // Emit reduction for array subscript or single variable. 5555 emitReductionCombiner(CGF, ReductionOp); 5556 } 5557 } 5558 5559 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5560 ArrayRef<const Expr *> Privates, 5561 ArrayRef<const Expr *> LHSExprs, 5562 ArrayRef<const Expr *> RHSExprs, 5563 ArrayRef<const Expr *> ReductionOps, 5564 ReductionOptionsTy Options) { 5565 if (!CGF.HaveInsertPoint()) 5566 return; 5567 5568 bool WithNowait = Options.WithNowait; 5569 bool SimpleReduction = Options.SimpleReduction; 5570 5571 // Next code should be emitted for reduction: 5572 // 5573 // static kmp_critical_name lock = { 0 }; 5574 // 5575 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5576 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5577 // ... 5578 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5579 // *(Type<n>-1*)rhs[<n>-1]); 5580 // } 5581 // 5582 // ... 5583 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5584 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5585 // RedList, reduce_func, &<lock>)) { 5586 // case 1: 5587 // ... 5588 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5589 // ... 5590 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5591 // break; 5592 // case 2: 5593 // ... 5594 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5595 // ... 5596 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5597 // break; 5598 // default:; 5599 // } 5600 // 5601 // if SimpleReduction is true, only the next code is generated: 5602 // ... 5603 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5604 // ... 5605 5606 ASTContext &C = CGM.getContext(); 5607 5608 if (SimpleReduction) { 5609 CodeGenFunction::RunCleanupsScope Scope(CGF); 5610 auto IPriv = Privates.begin(); 5611 auto ILHS = LHSExprs.begin(); 5612 auto IRHS = RHSExprs.begin(); 5613 for (const Expr *E : ReductionOps) { 5614 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5615 cast<DeclRefExpr>(*IRHS)); 5616 ++IPriv; 5617 ++ILHS; 5618 ++IRHS; 5619 } 5620 return; 5621 } 5622 5623 // 1. Build a list of reduction variables. 5624 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5625 auto Size = RHSExprs.size(); 5626 for (const Expr *E : Privates) { 5627 if (E->getType()->isVariablyModifiedType()) 5628 // Reserve place for array size. 5629 ++Size; 5630 } 5631 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5632 QualType ReductionArrayTy = 5633 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5634 /*IndexTypeQuals=*/0); 5635 Address ReductionList = 5636 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5637 auto IPriv = Privates.begin(); 5638 unsigned Idx = 0; 5639 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5640 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5641 CGF.Builder.CreateStore( 5642 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5643 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5644 Elem); 5645 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5646 // Store array size. 5647 ++Idx; 5648 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5649 llvm::Value *Size = CGF.Builder.CreateIntCast( 5650 CGF.getVLASize( 5651 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5652 .NumElts, 5653 CGF.SizeTy, /*isSigned=*/false); 5654 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5655 Elem); 5656 } 5657 } 5658 5659 // 2. Emit reduce_func(). 5660 llvm::Function *ReductionFn = emitReductionFunction( 5661 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5662 LHSExprs, RHSExprs, ReductionOps); 5663 5664 // 3. Create static kmp_critical_name lock = { 0 }; 5665 std::string Name = getName({"reduction"}); 5666 llvm::Value *Lock = getCriticalRegionLock(Name); 5667 5668 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5669 // RedList, reduce_func, &<lock>); 5670 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5671 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5672 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5673 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5674 ReductionList.getPointer(), CGF.VoidPtrTy); 5675 llvm::Value *Args[] = { 5676 IdentTLoc, // ident_t *<loc> 5677 ThreadId, // i32 <gtid> 5678 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5679 ReductionArrayTySize, // size_type sizeof(RedList) 5680 RL, // void *RedList 5681 ReductionFn, // void (*) (void *, void *) <reduce_func> 5682 Lock // kmp_critical_name *&<lock> 5683 }; 5684 llvm::Value *Res = CGF.EmitRuntimeCall( 5685 OMPBuilder.getOrCreateRuntimeFunction( 5686 CGM.getModule(), 5687 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5688 Args); 5689 5690 // 5. Build switch(res) 5691 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5692 llvm::SwitchInst *SwInst = 5693 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5694 5695 // 6. Build case 1: 5696 // ... 5697 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5698 // ... 5699 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5700 // break; 5701 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5702 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5703 CGF.EmitBlock(Case1BB); 5704 5705 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5706 llvm::Value *EndArgs[] = { 5707 IdentTLoc, // ident_t *<loc> 5708 ThreadId, // i32 <gtid> 5709 Lock // kmp_critical_name *&<lock> 5710 }; 5711 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5712 CodeGenFunction &CGF, PrePostActionTy &Action) { 5713 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5714 auto IPriv = Privates.begin(); 5715 auto ILHS = LHSExprs.begin(); 5716 auto IRHS = RHSExprs.begin(); 5717 for (const Expr *E : ReductionOps) { 5718 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5719 cast<DeclRefExpr>(*IRHS)); 5720 ++IPriv; 5721 ++ILHS; 5722 ++IRHS; 5723 } 5724 }; 5725 RegionCodeGenTy RCG(CodeGen); 5726 CommonActionTy Action( 5727 nullptr, llvm::None, 5728 OMPBuilder.getOrCreateRuntimeFunction( 5729 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5730 : OMPRTL___kmpc_end_reduce), 5731 EndArgs); 5732 RCG.setAction(Action); 5733 RCG(CGF); 5734 5735 CGF.EmitBranch(DefaultBB); 5736 5737 // 7. Build case 2: 5738 // ... 5739 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5740 // ... 5741 // break; 5742 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5743 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5744 CGF.EmitBlock(Case2BB); 5745 5746 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5747 CodeGenFunction &CGF, PrePostActionTy &Action) { 5748 auto ILHS = LHSExprs.begin(); 5749 auto IRHS = RHSExprs.begin(); 5750 auto IPriv = Privates.begin(); 5751 for (const Expr *E : ReductionOps) { 5752 const Expr *XExpr = nullptr; 5753 const Expr *EExpr = nullptr; 5754 const Expr *UpExpr = nullptr; 5755 BinaryOperatorKind BO = BO_Comma; 5756 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5757 if (BO->getOpcode() == BO_Assign) { 5758 XExpr = BO->getLHS(); 5759 UpExpr = BO->getRHS(); 5760 } 5761 } 5762 // Try to emit update expression as a simple atomic. 5763 const Expr *RHSExpr = UpExpr; 5764 if (RHSExpr) { 5765 // Analyze RHS part of the whole expression. 5766 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5767 RHSExpr->IgnoreParenImpCasts())) { 5768 // If this is a conditional operator, analyze its condition for 5769 // min/max reduction operator. 5770 RHSExpr = ACO->getCond(); 5771 } 5772 if (const auto *BORHS = 5773 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5774 EExpr = BORHS->getRHS(); 5775 BO = BORHS->getOpcode(); 5776 } 5777 } 5778 if (XExpr) { 5779 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5780 auto &&AtomicRedGen = [BO, VD, 5781 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5782 const Expr *EExpr, const Expr *UpExpr) { 5783 LValue X = CGF.EmitLValue(XExpr); 5784 RValue E; 5785 if (EExpr) 5786 E = CGF.EmitAnyExpr(EExpr); 5787 CGF.EmitOMPAtomicSimpleUpdateExpr( 5788 X, E, BO, /*IsXLHSInRHSPart=*/true, 5789 llvm::AtomicOrdering::Monotonic, Loc, 5790 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5791 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5792 PrivateScope.addPrivate( 5793 VD, [&CGF, VD, XRValue, Loc]() { 5794 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5795 CGF.emitOMPSimpleStore( 5796 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5797 VD->getType().getNonReferenceType(), Loc); 5798 return LHSTemp; 5799 }); 5800 (void)PrivateScope.Privatize(); 5801 return CGF.EmitAnyExpr(UpExpr); 5802 }); 5803 }; 5804 if ((*IPriv)->getType()->isArrayType()) { 5805 // Emit atomic reduction for array section. 5806 const auto *RHSVar = 5807 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5808 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5809 AtomicRedGen, XExpr, EExpr, UpExpr); 5810 } else { 5811 // Emit atomic reduction for array subscript or single variable. 5812 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5813 } 5814 } else { 5815 // Emit as a critical region. 5816 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5817 const Expr *, const Expr *) { 5818 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5819 std::string Name = RT.getName({"atomic_reduction"}); 5820 RT.emitCriticalRegion( 5821 CGF, Name, 5822 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5823 Action.Enter(CGF); 5824 emitReductionCombiner(CGF, E); 5825 }, 5826 Loc); 5827 }; 5828 if ((*IPriv)->getType()->isArrayType()) { 5829 const auto *LHSVar = 5830 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5831 const auto *RHSVar = 5832 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5833 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5834 CritRedGen); 5835 } else { 5836 CritRedGen(CGF, nullptr, nullptr, nullptr); 5837 } 5838 } 5839 ++ILHS; 5840 ++IRHS; 5841 ++IPriv; 5842 } 5843 }; 5844 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5845 if (!WithNowait) { 5846 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5847 llvm::Value *EndArgs[] = { 5848 IdentTLoc, // ident_t *<loc> 5849 ThreadId, // i32 <gtid> 5850 Lock // kmp_critical_name *&<lock> 5851 }; 5852 CommonActionTy Action(nullptr, llvm::None, 5853 OMPBuilder.getOrCreateRuntimeFunction( 5854 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5855 EndArgs); 5856 AtomicRCG.setAction(Action); 5857 AtomicRCG(CGF); 5858 } else { 5859 AtomicRCG(CGF); 5860 } 5861 5862 CGF.EmitBranch(DefaultBB); 5863 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5864 } 5865 5866 /// Generates unique name for artificial threadprivate variables. 5867 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5868 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5869 const Expr *Ref) { 5870 SmallString<256> Buffer; 5871 llvm::raw_svector_ostream Out(Buffer); 5872 const clang::DeclRefExpr *DE; 5873 const VarDecl *D = ::getBaseDecl(Ref, DE); 5874 if (!D) 5875 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5876 D = D->getCanonicalDecl(); 5877 std::string Name = CGM.getOpenMPRuntime().getName( 5878 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5879 Out << Prefix << Name << "_" 5880 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5881 return std::string(Out.str()); 5882 } 5883 5884 /// Emits reduction initializer function: 5885 /// \code 5886 /// void @.red_init(void* %arg, void* %orig) { 5887 /// %0 = bitcast void* %arg to <type>* 5888 /// store <type> <init>, <type>* %0 5889 /// ret void 5890 /// } 5891 /// \endcode 5892 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5893 SourceLocation Loc, 5894 ReductionCodeGen &RCG, unsigned N) { 5895 ASTContext &C = CGM.getContext(); 5896 QualType VoidPtrTy = C.VoidPtrTy; 5897 VoidPtrTy.addRestrict(); 5898 FunctionArgList Args; 5899 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5900 ImplicitParamDecl::Other); 5901 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5902 ImplicitParamDecl::Other); 5903 Args.emplace_back(&Param); 5904 Args.emplace_back(&ParamOrig); 5905 const auto &FnInfo = 5906 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5907 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5908 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5909 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5910 Name, &CGM.getModule()); 5911 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5912 Fn->setDoesNotRecurse(); 5913 CodeGenFunction CGF(CGM); 5914 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5915 Address PrivateAddr = CGF.EmitLoadOfPointer( 5916 CGF.GetAddrOfLocalVar(&Param), 5917 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5918 llvm::Value *Size = nullptr; 5919 // If the size of the reduction item is non-constant, load it from global 5920 // threadprivate variable. 5921 if (RCG.getSizes(N).second) { 5922 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5923 CGF, CGM.getContext().getSizeType(), 5924 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5925 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5926 CGM.getContext().getSizeType(), Loc); 5927 } 5928 RCG.emitAggregateType(CGF, N, Size); 5929 LValue OrigLVal; 5930 // If initializer uses initializer from declare reduction construct, emit a 5931 // pointer to the address of the original reduction item (reuired by reduction 5932 // initializer) 5933 if (RCG.usesReductionInitializer(N)) { 5934 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5935 SharedAddr = CGF.EmitLoadOfPointer( 5936 SharedAddr, 5937 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5938 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5939 } else { 5940 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5941 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5942 CGM.getContext().VoidPtrTy); 5943 } 5944 // Emit the initializer: 5945 // %0 = bitcast void* %arg to <type>* 5946 // store <type> <init>, <type>* %0 5947 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5948 [](CodeGenFunction &) { return false; }); 5949 CGF.FinishFunction(); 5950 return Fn; 5951 } 5952 5953 /// Emits reduction combiner function: 5954 /// \code 5955 /// void @.red_comb(void* %arg0, void* %arg1) { 5956 /// %lhs = bitcast void* %arg0 to <type>* 5957 /// %rhs = bitcast void* %arg1 to <type>* 5958 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5959 /// store <type> %2, <type>* %lhs 5960 /// ret void 5961 /// } 5962 /// \endcode 5963 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5964 SourceLocation Loc, 5965 ReductionCodeGen &RCG, unsigned N, 5966 const Expr *ReductionOp, 5967 const Expr *LHS, const Expr *RHS, 5968 const Expr *PrivateRef) { 5969 ASTContext &C = CGM.getContext(); 5970 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5971 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5972 FunctionArgList Args; 5973 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5974 C.VoidPtrTy, ImplicitParamDecl::Other); 5975 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5976 ImplicitParamDecl::Other); 5977 Args.emplace_back(&ParamInOut); 5978 Args.emplace_back(&ParamIn); 5979 const auto &FnInfo = 5980 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5981 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5982 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5983 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5984 Name, &CGM.getModule()); 5985 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5986 Fn->setDoesNotRecurse(); 5987 CodeGenFunction CGF(CGM); 5988 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5989 llvm::Value *Size = nullptr; 5990 // If the size of the reduction item is non-constant, load it from global 5991 // threadprivate variable. 5992 if (RCG.getSizes(N).second) { 5993 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5994 CGF, CGM.getContext().getSizeType(), 5995 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5996 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5997 CGM.getContext().getSizeType(), Loc); 5998 } 5999 RCG.emitAggregateType(CGF, N, Size); 6000 // Remap lhs and rhs variables to the addresses of the function arguments. 6001 // %lhs = bitcast void* %arg0 to <type>* 6002 // %rhs = bitcast void* %arg1 to <type>* 6003 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6004 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6005 // Pull out the pointer to the variable. 6006 Address PtrAddr = CGF.EmitLoadOfPointer( 6007 CGF.GetAddrOfLocalVar(&ParamInOut), 6008 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6009 return CGF.Builder.CreateElementBitCast( 6010 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6011 }); 6012 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6013 // Pull out the pointer to the variable. 6014 Address PtrAddr = CGF.EmitLoadOfPointer( 6015 CGF.GetAddrOfLocalVar(&ParamIn), 6016 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6017 return CGF.Builder.CreateElementBitCast( 6018 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6019 }); 6020 PrivateScope.Privatize(); 6021 // Emit the combiner body: 6022 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6023 // store <type> %2, <type>* %lhs 6024 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6025 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6026 cast<DeclRefExpr>(RHS)); 6027 CGF.FinishFunction(); 6028 return Fn; 6029 } 6030 6031 /// Emits reduction finalizer function: 6032 /// \code 6033 /// void @.red_fini(void* %arg) { 6034 /// %0 = bitcast void* %arg to <type>* 6035 /// <destroy>(<type>* %0) 6036 /// ret void 6037 /// } 6038 /// \endcode 6039 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6040 SourceLocation Loc, 6041 ReductionCodeGen &RCG, unsigned N) { 6042 if (!RCG.needCleanups(N)) 6043 return nullptr; 6044 ASTContext &C = CGM.getContext(); 6045 FunctionArgList Args; 6046 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6047 ImplicitParamDecl::Other); 6048 Args.emplace_back(&Param); 6049 const auto &FnInfo = 6050 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6051 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6052 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6053 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6054 Name, &CGM.getModule()); 6055 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6056 Fn->setDoesNotRecurse(); 6057 CodeGenFunction CGF(CGM); 6058 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6059 Address PrivateAddr = CGF.EmitLoadOfPointer( 6060 CGF.GetAddrOfLocalVar(&Param), 6061 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6062 llvm::Value *Size = nullptr; 6063 // If the size of the reduction item is non-constant, load it from global 6064 // threadprivate variable. 6065 if (RCG.getSizes(N).second) { 6066 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6067 CGF, CGM.getContext().getSizeType(), 6068 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6069 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6070 CGM.getContext().getSizeType(), Loc); 6071 } 6072 RCG.emitAggregateType(CGF, N, Size); 6073 // Emit the finalizer body: 6074 // <destroy>(<type>* %0) 6075 RCG.emitCleanups(CGF, N, PrivateAddr); 6076 CGF.FinishFunction(Loc); 6077 return Fn; 6078 } 6079 6080 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6081 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6082 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6083 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6084 return nullptr; 6085 6086 // Build typedef struct: 6087 // kmp_taskred_input { 6088 // void *reduce_shar; // shared reduction item 6089 // void *reduce_orig; // original reduction item used for initialization 6090 // size_t reduce_size; // size of data item 6091 // void *reduce_init; // data initialization routine 6092 // void *reduce_fini; // data finalization routine 6093 // void *reduce_comb; // data combiner routine 6094 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6095 // } kmp_taskred_input_t; 6096 ASTContext &C = CGM.getContext(); 6097 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6098 RD->startDefinition(); 6099 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6100 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6101 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6102 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6103 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6104 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6105 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6106 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6107 RD->completeDefinition(); 6108 QualType RDType = C.getRecordType(RD); 6109 unsigned Size = Data.ReductionVars.size(); 6110 llvm::APInt ArraySize(/*numBits=*/64, Size); 6111 QualType ArrayRDType = C.getConstantArrayType( 6112 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6113 // kmp_task_red_input_t .rd_input.[Size]; 6114 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6115 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6116 Data.ReductionCopies, Data.ReductionOps); 6117 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6118 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6119 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6120 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6121 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6122 TaskRedInput.getPointer(), Idxs, 6123 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6124 ".rd_input.gep."); 6125 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6126 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6127 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6128 RCG.emitSharedOrigLValue(CGF, Cnt); 6129 llvm::Value *CastedShared = 6130 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6131 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6132 // ElemLVal.reduce_orig = &Origs[Cnt]; 6133 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6134 llvm::Value *CastedOrig = 6135 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6136 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6137 RCG.emitAggregateType(CGF, Cnt); 6138 llvm::Value *SizeValInChars; 6139 llvm::Value *SizeVal; 6140 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6141 // We use delayed creation/initialization for VLAs and array sections. It is 6142 // required because runtime does not provide the way to pass the sizes of 6143 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6144 // threadprivate global variables are used to store these values and use 6145 // them in the functions. 6146 bool DelayedCreation = !!SizeVal; 6147 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6148 /*isSigned=*/false); 6149 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6150 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6151 // ElemLVal.reduce_init = init; 6152 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6153 llvm::Value *InitAddr = 6154 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6155 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6156 // ElemLVal.reduce_fini = fini; 6157 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6158 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6159 llvm::Value *FiniAddr = Fini 6160 ? CGF.EmitCastToVoidPtr(Fini) 6161 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6162 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6163 // ElemLVal.reduce_comb = comb; 6164 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6165 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6166 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6167 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6168 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6169 // ElemLVal.flags = 0; 6170 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6171 if (DelayedCreation) { 6172 CGF.EmitStoreOfScalar( 6173 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6174 FlagsLVal); 6175 } else 6176 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6177 FlagsLVal.getType()); 6178 } 6179 if (Data.IsReductionWithTaskMod) { 6180 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6181 // is_ws, int num, void *data); 6182 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6183 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6184 CGM.IntTy, /*isSigned=*/true); 6185 llvm::Value *Args[] = { 6186 IdentTLoc, GTid, 6187 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6188 /*isSigned=*/true), 6189 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6190 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6191 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6192 return CGF.EmitRuntimeCall( 6193 OMPBuilder.getOrCreateRuntimeFunction( 6194 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6195 Args); 6196 } 6197 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6198 llvm::Value *Args[] = { 6199 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6200 /*isSigned=*/true), 6201 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6202 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6203 CGM.VoidPtrTy)}; 6204 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6205 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6206 Args); 6207 } 6208 6209 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6210 SourceLocation Loc, 6211 bool IsWorksharingReduction) { 6212 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6213 // is_ws, int num, void *data); 6214 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6215 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6216 CGM.IntTy, /*isSigned=*/true); 6217 llvm::Value *Args[] = {IdentTLoc, GTid, 6218 llvm::ConstantInt::get(CGM.IntTy, 6219 IsWorksharingReduction ? 1 : 0, 6220 /*isSigned=*/true)}; 6221 (void)CGF.EmitRuntimeCall( 6222 OMPBuilder.getOrCreateRuntimeFunction( 6223 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6224 Args); 6225 } 6226 6227 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6228 SourceLocation Loc, 6229 ReductionCodeGen &RCG, 6230 unsigned N) { 6231 auto Sizes = RCG.getSizes(N); 6232 // Emit threadprivate global variable if the type is non-constant 6233 // (Sizes.second = nullptr). 6234 if (Sizes.second) { 6235 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6236 /*isSigned=*/false); 6237 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6238 CGF, CGM.getContext().getSizeType(), 6239 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6240 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6241 } 6242 } 6243 6244 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6245 SourceLocation Loc, 6246 llvm::Value *ReductionsPtr, 6247 LValue SharedLVal) { 6248 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6249 // *d); 6250 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6251 CGM.IntTy, 6252 /*isSigned=*/true), 6253 ReductionsPtr, 6254 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6255 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6256 return Address( 6257 CGF.EmitRuntimeCall( 6258 OMPBuilder.getOrCreateRuntimeFunction( 6259 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6260 Args), 6261 SharedLVal.getAlignment()); 6262 } 6263 6264 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6265 SourceLocation Loc) { 6266 if (!CGF.HaveInsertPoint()) 6267 return; 6268 6269 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6270 OMPBuilder.createTaskwait(CGF.Builder); 6271 } else { 6272 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6273 // global_tid); 6274 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6275 // Ignore return result until untied tasks are supported. 6276 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6277 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6278 Args); 6279 } 6280 6281 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6282 Region->emitUntiedSwitch(CGF); 6283 } 6284 6285 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6286 OpenMPDirectiveKind InnerKind, 6287 const RegionCodeGenTy &CodeGen, 6288 bool HasCancel) { 6289 if (!CGF.HaveInsertPoint()) 6290 return; 6291 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6292 InnerKind != OMPD_critical && 6293 InnerKind != OMPD_master && 6294 InnerKind != OMPD_masked); 6295 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6296 } 6297 6298 namespace { 6299 enum RTCancelKind { 6300 CancelNoreq = 0, 6301 CancelParallel = 1, 6302 CancelLoop = 2, 6303 CancelSections = 3, 6304 CancelTaskgroup = 4 6305 }; 6306 } // anonymous namespace 6307 6308 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6309 RTCancelKind CancelKind = CancelNoreq; 6310 if (CancelRegion == OMPD_parallel) 6311 CancelKind = CancelParallel; 6312 else if (CancelRegion == OMPD_for) 6313 CancelKind = CancelLoop; 6314 else if (CancelRegion == OMPD_sections) 6315 CancelKind = CancelSections; 6316 else { 6317 assert(CancelRegion == OMPD_taskgroup); 6318 CancelKind = CancelTaskgroup; 6319 } 6320 return CancelKind; 6321 } 6322 6323 void CGOpenMPRuntime::emitCancellationPointCall( 6324 CodeGenFunction &CGF, SourceLocation Loc, 6325 OpenMPDirectiveKind CancelRegion) { 6326 if (!CGF.HaveInsertPoint()) 6327 return; 6328 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6329 // global_tid, kmp_int32 cncl_kind); 6330 if (auto *OMPRegionInfo = 6331 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6332 // For 'cancellation point taskgroup', the task region info may not have a 6333 // cancel. This may instead happen in another adjacent task. 6334 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6335 llvm::Value *Args[] = { 6336 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6337 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6338 // Ignore return result until untied tasks are supported. 6339 llvm::Value *Result = CGF.EmitRuntimeCall( 6340 OMPBuilder.getOrCreateRuntimeFunction( 6341 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6342 Args); 6343 // if (__kmpc_cancellationpoint()) { 6344 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6345 // exit from construct; 6346 // } 6347 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6348 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6349 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6350 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6351 CGF.EmitBlock(ExitBB); 6352 if (CancelRegion == OMPD_parallel) 6353 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6354 // exit from construct; 6355 CodeGenFunction::JumpDest CancelDest = 6356 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6357 CGF.EmitBranchThroughCleanup(CancelDest); 6358 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6359 } 6360 } 6361 } 6362 6363 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6364 const Expr *IfCond, 6365 OpenMPDirectiveKind CancelRegion) { 6366 if (!CGF.HaveInsertPoint()) 6367 return; 6368 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6369 // kmp_int32 cncl_kind); 6370 auto &M = CGM.getModule(); 6371 if (auto *OMPRegionInfo = 6372 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6373 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6374 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6375 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6376 llvm::Value *Args[] = { 6377 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6378 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6379 // Ignore return result until untied tasks are supported. 6380 llvm::Value *Result = CGF.EmitRuntimeCall( 6381 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6382 // if (__kmpc_cancel()) { 6383 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6384 // exit from construct; 6385 // } 6386 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6387 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6388 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6389 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6390 CGF.EmitBlock(ExitBB); 6391 if (CancelRegion == OMPD_parallel) 6392 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6393 // exit from construct; 6394 CodeGenFunction::JumpDest CancelDest = 6395 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6396 CGF.EmitBranchThroughCleanup(CancelDest); 6397 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6398 }; 6399 if (IfCond) { 6400 emitIfClause(CGF, IfCond, ThenGen, 6401 [](CodeGenFunction &, PrePostActionTy &) {}); 6402 } else { 6403 RegionCodeGenTy ThenRCG(ThenGen); 6404 ThenRCG(CGF); 6405 } 6406 } 6407 } 6408 6409 namespace { 6410 /// Cleanup action for uses_allocators support. 6411 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6412 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6413 6414 public: 6415 OMPUsesAllocatorsActionTy( 6416 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6417 : Allocators(Allocators) {} 6418 void Enter(CodeGenFunction &CGF) override { 6419 if (!CGF.HaveInsertPoint()) 6420 return; 6421 for (const auto &AllocatorData : Allocators) { 6422 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6423 CGF, AllocatorData.first, AllocatorData.second); 6424 } 6425 } 6426 void Exit(CodeGenFunction &CGF) override { 6427 if (!CGF.HaveInsertPoint()) 6428 return; 6429 for (const auto &AllocatorData : Allocators) { 6430 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6431 AllocatorData.first); 6432 } 6433 } 6434 }; 6435 } // namespace 6436 6437 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6438 const OMPExecutableDirective &D, StringRef ParentName, 6439 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6440 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6441 assert(!ParentName.empty() && "Invalid target region parent name!"); 6442 HasEmittedTargetRegion = true; 6443 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6444 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6445 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6446 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6447 if (!D.AllocatorTraits) 6448 continue; 6449 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6450 } 6451 } 6452 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6453 CodeGen.setAction(UsesAllocatorAction); 6454 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6455 IsOffloadEntry, CodeGen); 6456 } 6457 6458 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6459 const Expr *Allocator, 6460 const Expr *AllocatorTraits) { 6461 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6462 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6463 // Use default memspace handle. 6464 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6465 llvm::Value *NumTraits = llvm::ConstantInt::get( 6466 CGF.IntTy, cast<ConstantArrayType>( 6467 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6468 ->getSize() 6469 .getLimitedValue()); 6470 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6471 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6472 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6473 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6474 AllocatorTraitsLVal.getBaseInfo(), 6475 AllocatorTraitsLVal.getTBAAInfo()); 6476 llvm::Value *Traits = 6477 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6478 6479 llvm::Value *AllocatorVal = 6480 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6481 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6482 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6483 // Store to allocator. 6484 CGF.EmitVarDecl(*cast<VarDecl>( 6485 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6486 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6487 AllocatorVal = 6488 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6489 Allocator->getType(), Allocator->getExprLoc()); 6490 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6491 } 6492 6493 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6494 const Expr *Allocator) { 6495 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6496 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6497 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6498 llvm::Value *AllocatorVal = 6499 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6500 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6501 CGF.getContext().VoidPtrTy, 6502 Allocator->getExprLoc()); 6503 (void)CGF.EmitRuntimeCall( 6504 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6505 OMPRTL___kmpc_destroy_allocator), 6506 {ThreadId, AllocatorVal}); 6507 } 6508 6509 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6510 const OMPExecutableDirective &D, StringRef ParentName, 6511 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6512 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6513 // Create a unique name for the entry function using the source location 6514 // information of the current target region. The name will be something like: 6515 // 6516 // __omp_offloading_DD_FFFF_PP_lBB 6517 // 6518 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6519 // mangled name of the function that encloses the target region and BB is the 6520 // line number of the target region. 6521 6522 unsigned DeviceID; 6523 unsigned FileID; 6524 unsigned Line; 6525 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6526 Line); 6527 SmallString<64> EntryFnName; 6528 { 6529 llvm::raw_svector_ostream OS(EntryFnName); 6530 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6531 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6532 } 6533 6534 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6535 6536 CodeGenFunction CGF(CGM, true); 6537 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6538 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6539 6540 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6541 6542 // If this target outline function is not an offload entry, we don't need to 6543 // register it. 6544 if (!IsOffloadEntry) 6545 return; 6546 6547 // The target region ID is used by the runtime library to identify the current 6548 // target region, so it only has to be unique and not necessarily point to 6549 // anything. It could be the pointer to the outlined function that implements 6550 // the target region, but we aren't using that so that the compiler doesn't 6551 // need to keep that, and could therefore inline the host function if proven 6552 // worthwhile during optimization. In the other hand, if emitting code for the 6553 // device, the ID has to be the function address so that it can retrieved from 6554 // the offloading entry and launched by the runtime library. We also mark the 6555 // outlined function to have external linkage in case we are emitting code for 6556 // the device, because these functions will be entry points to the device. 6557 6558 if (CGM.getLangOpts().OpenMPIsDevice) { 6559 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6560 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6561 OutlinedFn->setDSOLocal(false); 6562 if (CGM.getTriple().isAMDGCN()) 6563 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6564 } else { 6565 std::string Name = getName({EntryFnName, "region_id"}); 6566 OutlinedFnID = new llvm::GlobalVariable( 6567 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6568 llvm::GlobalValue::WeakAnyLinkage, 6569 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6570 } 6571 6572 // Register the information for the entry associated with this target region. 6573 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6574 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6575 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6576 6577 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6578 int32_t DefaultValTeams = -1; 6579 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6580 if (DefaultValTeams > 0) { 6581 OutlinedFn->addFnAttr("omp_target_num_teams", 6582 std::to_string(DefaultValTeams)); 6583 } 6584 int32_t DefaultValThreads = -1; 6585 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6586 if (DefaultValThreads > 0) { 6587 OutlinedFn->addFnAttr("omp_target_thread_limit", 6588 std::to_string(DefaultValThreads)); 6589 } 6590 } 6591 6592 /// Checks if the expression is constant or does not have non-trivial function 6593 /// calls. 6594 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6595 // We can skip constant expressions. 6596 // We can skip expressions with trivial calls or simple expressions. 6597 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6598 !E->hasNonTrivialCall(Ctx)) && 6599 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6600 } 6601 6602 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6603 const Stmt *Body) { 6604 const Stmt *Child = Body->IgnoreContainers(); 6605 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6606 Child = nullptr; 6607 for (const Stmt *S : C->body()) { 6608 if (const auto *E = dyn_cast<Expr>(S)) { 6609 if (isTrivial(Ctx, E)) 6610 continue; 6611 } 6612 // Some of the statements can be ignored. 6613 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6614 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6615 continue; 6616 // Analyze declarations. 6617 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6618 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6619 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6620 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6621 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6622 isa<UsingDirectiveDecl>(D) || 6623 isa<OMPDeclareReductionDecl>(D) || 6624 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6625 return true; 6626 const auto *VD = dyn_cast<VarDecl>(D); 6627 if (!VD) 6628 return false; 6629 return VD->hasGlobalStorage() || !VD->isUsed(); 6630 })) 6631 continue; 6632 } 6633 // Found multiple children - cannot get the one child only. 6634 if (Child) 6635 return nullptr; 6636 Child = S; 6637 } 6638 if (Child) 6639 Child = Child->IgnoreContainers(); 6640 } 6641 return Child; 6642 } 6643 6644 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6645 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6646 int32_t &DefaultVal) { 6647 6648 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6649 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6650 "Expected target-based executable directive."); 6651 switch (DirectiveKind) { 6652 case OMPD_target: { 6653 const auto *CS = D.getInnermostCapturedStmt(); 6654 const auto *Body = 6655 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6656 const Stmt *ChildStmt = 6657 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6658 if (const auto *NestedDir = 6659 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6660 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6661 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6662 const Expr *NumTeams = 6663 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6664 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6665 if (auto Constant = 6666 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6667 DefaultVal = Constant->getExtValue(); 6668 return NumTeams; 6669 } 6670 DefaultVal = 0; 6671 return nullptr; 6672 } 6673 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6674 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6675 DefaultVal = 1; 6676 return nullptr; 6677 } 6678 DefaultVal = 1; 6679 return nullptr; 6680 } 6681 // A value of -1 is used to check if we need to emit no teams region 6682 DefaultVal = -1; 6683 return nullptr; 6684 } 6685 case OMPD_target_teams: 6686 case OMPD_target_teams_distribute: 6687 case OMPD_target_teams_distribute_simd: 6688 case OMPD_target_teams_distribute_parallel_for: 6689 case OMPD_target_teams_distribute_parallel_for_simd: { 6690 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6691 const Expr *NumTeams = 6692 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6693 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6694 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6695 DefaultVal = Constant->getExtValue(); 6696 return NumTeams; 6697 } 6698 DefaultVal = 0; 6699 return nullptr; 6700 } 6701 case OMPD_target_parallel: 6702 case OMPD_target_parallel_for: 6703 case OMPD_target_parallel_for_simd: 6704 case OMPD_target_simd: 6705 DefaultVal = 1; 6706 return nullptr; 6707 case OMPD_parallel: 6708 case OMPD_for: 6709 case OMPD_parallel_for: 6710 case OMPD_parallel_master: 6711 case OMPD_parallel_sections: 6712 case OMPD_for_simd: 6713 case OMPD_parallel_for_simd: 6714 case OMPD_cancel: 6715 case OMPD_cancellation_point: 6716 case OMPD_ordered: 6717 case OMPD_threadprivate: 6718 case OMPD_allocate: 6719 case OMPD_task: 6720 case OMPD_simd: 6721 case OMPD_tile: 6722 case OMPD_unroll: 6723 case OMPD_sections: 6724 case OMPD_section: 6725 case OMPD_single: 6726 case OMPD_master: 6727 case OMPD_critical: 6728 case OMPD_taskyield: 6729 case OMPD_barrier: 6730 case OMPD_taskwait: 6731 case OMPD_taskgroup: 6732 case OMPD_atomic: 6733 case OMPD_flush: 6734 case OMPD_depobj: 6735 case OMPD_scan: 6736 case OMPD_teams: 6737 case OMPD_target_data: 6738 case OMPD_target_exit_data: 6739 case OMPD_target_enter_data: 6740 case OMPD_distribute: 6741 case OMPD_distribute_simd: 6742 case OMPD_distribute_parallel_for: 6743 case OMPD_distribute_parallel_for_simd: 6744 case OMPD_teams_distribute: 6745 case OMPD_teams_distribute_simd: 6746 case OMPD_teams_distribute_parallel_for: 6747 case OMPD_teams_distribute_parallel_for_simd: 6748 case OMPD_target_update: 6749 case OMPD_declare_simd: 6750 case OMPD_declare_variant: 6751 case OMPD_begin_declare_variant: 6752 case OMPD_end_declare_variant: 6753 case OMPD_declare_target: 6754 case OMPD_end_declare_target: 6755 case OMPD_declare_reduction: 6756 case OMPD_declare_mapper: 6757 case OMPD_taskloop: 6758 case OMPD_taskloop_simd: 6759 case OMPD_master_taskloop: 6760 case OMPD_master_taskloop_simd: 6761 case OMPD_parallel_master_taskloop: 6762 case OMPD_parallel_master_taskloop_simd: 6763 case OMPD_requires: 6764 case OMPD_metadirective: 6765 case OMPD_unknown: 6766 break; 6767 default: 6768 break; 6769 } 6770 llvm_unreachable("Unexpected directive kind."); 6771 } 6772 6773 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6774 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6775 assert(!CGF.getLangOpts().OpenMPIsDevice && 6776 "Clauses associated with the teams directive expected to be emitted " 6777 "only for the host!"); 6778 CGBuilderTy &Bld = CGF.Builder; 6779 int32_t DefaultNT = -1; 6780 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6781 if (NumTeams != nullptr) { 6782 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6783 6784 switch (DirectiveKind) { 6785 case OMPD_target: { 6786 const auto *CS = D.getInnermostCapturedStmt(); 6787 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6788 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6789 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6790 /*IgnoreResultAssign*/ true); 6791 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6792 /*isSigned=*/true); 6793 } 6794 case OMPD_target_teams: 6795 case OMPD_target_teams_distribute: 6796 case OMPD_target_teams_distribute_simd: 6797 case OMPD_target_teams_distribute_parallel_for: 6798 case OMPD_target_teams_distribute_parallel_for_simd: { 6799 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6800 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6801 /*IgnoreResultAssign*/ true); 6802 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6803 /*isSigned=*/true); 6804 } 6805 default: 6806 break; 6807 } 6808 } else if (DefaultNT == -1) { 6809 return nullptr; 6810 } 6811 6812 return Bld.getInt32(DefaultNT); 6813 } 6814 6815 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6816 llvm::Value *DefaultThreadLimitVal) { 6817 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6818 CGF.getContext(), CS->getCapturedStmt()); 6819 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6820 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6821 llvm::Value *NumThreads = nullptr; 6822 llvm::Value *CondVal = nullptr; 6823 // Handle if clause. If if clause present, the number of threads is 6824 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6825 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6826 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6827 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6828 const OMPIfClause *IfClause = nullptr; 6829 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6830 if (C->getNameModifier() == OMPD_unknown || 6831 C->getNameModifier() == OMPD_parallel) { 6832 IfClause = C; 6833 break; 6834 } 6835 } 6836 if (IfClause) { 6837 const Expr *Cond = IfClause->getCondition(); 6838 bool Result; 6839 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6840 if (!Result) 6841 return CGF.Builder.getInt32(1); 6842 } else { 6843 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6844 if (const auto *PreInit = 6845 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6846 for (const auto *I : PreInit->decls()) { 6847 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6848 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6849 } else { 6850 CodeGenFunction::AutoVarEmission Emission = 6851 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6852 CGF.EmitAutoVarCleanups(Emission); 6853 } 6854 } 6855 } 6856 CondVal = CGF.EvaluateExprAsBool(Cond); 6857 } 6858 } 6859 } 6860 // Check the value of num_threads clause iff if clause was not specified 6861 // or is not evaluated to false. 6862 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6863 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6864 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6865 const auto *NumThreadsClause = 6866 Dir->getSingleClause<OMPNumThreadsClause>(); 6867 CodeGenFunction::LexicalScope Scope( 6868 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6869 if (const auto *PreInit = 6870 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6871 for (const auto *I : PreInit->decls()) { 6872 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6873 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6874 } else { 6875 CodeGenFunction::AutoVarEmission Emission = 6876 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6877 CGF.EmitAutoVarCleanups(Emission); 6878 } 6879 } 6880 } 6881 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6882 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6883 /*isSigned=*/false); 6884 if (DefaultThreadLimitVal) 6885 NumThreads = CGF.Builder.CreateSelect( 6886 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6887 DefaultThreadLimitVal, NumThreads); 6888 } else { 6889 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6890 : CGF.Builder.getInt32(0); 6891 } 6892 // Process condition of the if clause. 6893 if (CondVal) { 6894 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6895 CGF.Builder.getInt32(1)); 6896 } 6897 return NumThreads; 6898 } 6899 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6900 return CGF.Builder.getInt32(1); 6901 return DefaultThreadLimitVal; 6902 } 6903 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6904 : CGF.Builder.getInt32(0); 6905 } 6906 6907 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6908 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6909 int32_t &DefaultVal) { 6910 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6911 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6912 "Expected target-based executable directive."); 6913 6914 switch (DirectiveKind) { 6915 case OMPD_target: 6916 // Teams have no clause thread_limit 6917 return nullptr; 6918 case OMPD_target_teams: 6919 case OMPD_target_teams_distribute: 6920 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6921 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6922 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6923 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6924 if (auto Constant = 6925 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6926 DefaultVal = Constant->getExtValue(); 6927 return ThreadLimit; 6928 } 6929 return nullptr; 6930 case OMPD_target_parallel: 6931 case OMPD_target_parallel_for: 6932 case OMPD_target_parallel_for_simd: 6933 case OMPD_target_teams_distribute_parallel_for: 6934 case OMPD_target_teams_distribute_parallel_for_simd: { 6935 Expr *ThreadLimit = nullptr; 6936 Expr *NumThreads = nullptr; 6937 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6938 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6939 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6940 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6941 if (auto Constant = 6942 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6943 DefaultVal = Constant->getExtValue(); 6944 } 6945 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6946 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6947 NumThreads = NumThreadsClause->getNumThreads(); 6948 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6949 if (auto Constant = 6950 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6951 if (Constant->getExtValue() < DefaultVal) { 6952 DefaultVal = Constant->getExtValue(); 6953 ThreadLimit = NumThreads; 6954 } 6955 } 6956 } 6957 } 6958 return ThreadLimit; 6959 } 6960 case OMPD_target_teams_distribute_simd: 6961 case OMPD_target_simd: 6962 DefaultVal = 1; 6963 return nullptr; 6964 case OMPD_parallel: 6965 case OMPD_for: 6966 case OMPD_parallel_for: 6967 case OMPD_parallel_master: 6968 case OMPD_parallel_sections: 6969 case OMPD_for_simd: 6970 case OMPD_parallel_for_simd: 6971 case OMPD_cancel: 6972 case OMPD_cancellation_point: 6973 case OMPD_ordered: 6974 case OMPD_threadprivate: 6975 case OMPD_allocate: 6976 case OMPD_task: 6977 case OMPD_simd: 6978 case OMPD_tile: 6979 case OMPD_unroll: 6980 case OMPD_sections: 6981 case OMPD_section: 6982 case OMPD_single: 6983 case OMPD_master: 6984 case OMPD_critical: 6985 case OMPD_taskyield: 6986 case OMPD_barrier: 6987 case OMPD_taskwait: 6988 case OMPD_taskgroup: 6989 case OMPD_atomic: 6990 case OMPD_flush: 6991 case OMPD_depobj: 6992 case OMPD_scan: 6993 case OMPD_teams: 6994 case OMPD_target_data: 6995 case OMPD_target_exit_data: 6996 case OMPD_target_enter_data: 6997 case OMPD_distribute: 6998 case OMPD_distribute_simd: 6999 case OMPD_distribute_parallel_for: 7000 case OMPD_distribute_parallel_for_simd: 7001 case OMPD_teams_distribute: 7002 case OMPD_teams_distribute_simd: 7003 case OMPD_teams_distribute_parallel_for: 7004 case OMPD_teams_distribute_parallel_for_simd: 7005 case OMPD_target_update: 7006 case OMPD_declare_simd: 7007 case OMPD_declare_variant: 7008 case OMPD_begin_declare_variant: 7009 case OMPD_end_declare_variant: 7010 case OMPD_declare_target: 7011 case OMPD_end_declare_target: 7012 case OMPD_declare_reduction: 7013 case OMPD_declare_mapper: 7014 case OMPD_taskloop: 7015 case OMPD_taskloop_simd: 7016 case OMPD_master_taskloop: 7017 case OMPD_master_taskloop_simd: 7018 case OMPD_parallel_master_taskloop: 7019 case OMPD_parallel_master_taskloop_simd: 7020 case OMPD_requires: 7021 case OMPD_unknown: 7022 break; 7023 default: 7024 break; 7025 } 7026 llvm_unreachable("Unsupported directive kind."); 7027 } 7028 7029 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7030 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7031 assert(!CGF.getLangOpts().OpenMPIsDevice && 7032 "Clauses associated with the teams directive expected to be emitted " 7033 "only for the host!"); 7034 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7035 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7036 "Expected target-based executable directive."); 7037 CGBuilderTy &Bld = CGF.Builder; 7038 llvm::Value *ThreadLimitVal = nullptr; 7039 llvm::Value *NumThreadsVal = nullptr; 7040 switch (DirectiveKind) { 7041 case OMPD_target: { 7042 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7043 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7044 return NumThreads; 7045 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7046 CGF.getContext(), CS->getCapturedStmt()); 7047 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7048 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7049 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7050 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7051 const auto *ThreadLimitClause = 7052 Dir->getSingleClause<OMPThreadLimitClause>(); 7053 CodeGenFunction::LexicalScope Scope( 7054 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7055 if (const auto *PreInit = 7056 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7057 for (const auto *I : PreInit->decls()) { 7058 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7059 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7060 } else { 7061 CodeGenFunction::AutoVarEmission Emission = 7062 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7063 CGF.EmitAutoVarCleanups(Emission); 7064 } 7065 } 7066 } 7067 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7068 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7069 ThreadLimitVal = 7070 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7071 } 7072 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7073 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7074 CS = Dir->getInnermostCapturedStmt(); 7075 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7076 CGF.getContext(), CS->getCapturedStmt()); 7077 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7078 } 7079 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7080 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7081 CS = Dir->getInnermostCapturedStmt(); 7082 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7083 return NumThreads; 7084 } 7085 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7086 return Bld.getInt32(1); 7087 } 7088 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7089 } 7090 case OMPD_target_teams: { 7091 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7092 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7093 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7094 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7095 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7096 ThreadLimitVal = 7097 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7098 } 7099 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7100 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7101 return NumThreads; 7102 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7103 CGF.getContext(), CS->getCapturedStmt()); 7104 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7105 if (Dir->getDirectiveKind() == OMPD_distribute) { 7106 CS = Dir->getInnermostCapturedStmt(); 7107 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7108 return NumThreads; 7109 } 7110 } 7111 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7112 } 7113 case OMPD_target_teams_distribute: 7114 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7115 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7116 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7117 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7118 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7119 ThreadLimitVal = 7120 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7121 } 7122 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7123 case OMPD_target_parallel: 7124 case OMPD_target_parallel_for: 7125 case OMPD_target_parallel_for_simd: 7126 case OMPD_target_teams_distribute_parallel_for: 7127 case OMPD_target_teams_distribute_parallel_for_simd: { 7128 llvm::Value *CondVal = nullptr; 7129 // Handle if clause. If if clause present, the number of threads is 7130 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7131 if (D.hasClausesOfKind<OMPIfClause>()) { 7132 const OMPIfClause *IfClause = nullptr; 7133 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7134 if (C->getNameModifier() == OMPD_unknown || 7135 C->getNameModifier() == OMPD_parallel) { 7136 IfClause = C; 7137 break; 7138 } 7139 } 7140 if (IfClause) { 7141 const Expr *Cond = IfClause->getCondition(); 7142 bool Result; 7143 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7144 if (!Result) 7145 return Bld.getInt32(1); 7146 } else { 7147 CodeGenFunction::RunCleanupsScope Scope(CGF); 7148 CondVal = CGF.EvaluateExprAsBool(Cond); 7149 } 7150 } 7151 } 7152 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7153 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7154 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7155 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7156 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7157 ThreadLimitVal = 7158 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7159 } 7160 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7161 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7162 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7163 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7164 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7165 NumThreadsVal = 7166 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7167 ThreadLimitVal = ThreadLimitVal 7168 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7169 ThreadLimitVal), 7170 NumThreadsVal, ThreadLimitVal) 7171 : NumThreadsVal; 7172 } 7173 if (!ThreadLimitVal) 7174 ThreadLimitVal = Bld.getInt32(0); 7175 if (CondVal) 7176 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7177 return ThreadLimitVal; 7178 } 7179 case OMPD_target_teams_distribute_simd: 7180 case OMPD_target_simd: 7181 return Bld.getInt32(1); 7182 case OMPD_parallel: 7183 case OMPD_for: 7184 case OMPD_parallel_for: 7185 case OMPD_parallel_master: 7186 case OMPD_parallel_sections: 7187 case OMPD_for_simd: 7188 case OMPD_parallel_for_simd: 7189 case OMPD_cancel: 7190 case OMPD_cancellation_point: 7191 case OMPD_ordered: 7192 case OMPD_threadprivate: 7193 case OMPD_allocate: 7194 case OMPD_task: 7195 case OMPD_simd: 7196 case OMPD_tile: 7197 case OMPD_unroll: 7198 case OMPD_sections: 7199 case OMPD_section: 7200 case OMPD_single: 7201 case OMPD_master: 7202 case OMPD_critical: 7203 case OMPD_taskyield: 7204 case OMPD_barrier: 7205 case OMPD_taskwait: 7206 case OMPD_taskgroup: 7207 case OMPD_atomic: 7208 case OMPD_flush: 7209 case OMPD_depobj: 7210 case OMPD_scan: 7211 case OMPD_teams: 7212 case OMPD_target_data: 7213 case OMPD_target_exit_data: 7214 case OMPD_target_enter_data: 7215 case OMPD_distribute: 7216 case OMPD_distribute_simd: 7217 case OMPD_distribute_parallel_for: 7218 case OMPD_distribute_parallel_for_simd: 7219 case OMPD_teams_distribute: 7220 case OMPD_teams_distribute_simd: 7221 case OMPD_teams_distribute_parallel_for: 7222 case OMPD_teams_distribute_parallel_for_simd: 7223 case OMPD_target_update: 7224 case OMPD_declare_simd: 7225 case OMPD_declare_variant: 7226 case OMPD_begin_declare_variant: 7227 case OMPD_end_declare_variant: 7228 case OMPD_declare_target: 7229 case OMPD_end_declare_target: 7230 case OMPD_declare_reduction: 7231 case OMPD_declare_mapper: 7232 case OMPD_taskloop: 7233 case OMPD_taskloop_simd: 7234 case OMPD_master_taskloop: 7235 case OMPD_master_taskloop_simd: 7236 case OMPD_parallel_master_taskloop: 7237 case OMPD_parallel_master_taskloop_simd: 7238 case OMPD_requires: 7239 case OMPD_metadirective: 7240 case OMPD_unknown: 7241 break; 7242 default: 7243 break; 7244 } 7245 llvm_unreachable("Unsupported directive kind."); 7246 } 7247 7248 namespace { 7249 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7250 7251 // Utility to handle information from clauses associated with a given 7252 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7253 // It provides a convenient interface to obtain the information and generate 7254 // code for that information. 7255 class MappableExprsHandler { 7256 public: 7257 /// Values for bit flags used to specify the mapping type for 7258 /// offloading. 7259 enum OpenMPOffloadMappingFlags : uint64_t { 7260 /// No flags 7261 OMP_MAP_NONE = 0x0, 7262 /// Allocate memory on the device and move data from host to device. 7263 OMP_MAP_TO = 0x01, 7264 /// Allocate memory on the device and move data from device to host. 7265 OMP_MAP_FROM = 0x02, 7266 /// Always perform the requested mapping action on the element, even 7267 /// if it was already mapped before. 7268 OMP_MAP_ALWAYS = 0x04, 7269 /// Delete the element from the device environment, ignoring the 7270 /// current reference count associated with the element. 7271 OMP_MAP_DELETE = 0x08, 7272 /// The element being mapped is a pointer-pointee pair; both the 7273 /// pointer and the pointee should be mapped. 7274 OMP_MAP_PTR_AND_OBJ = 0x10, 7275 /// This flags signals that the base address of an entry should be 7276 /// passed to the target kernel as an argument. 7277 OMP_MAP_TARGET_PARAM = 0x20, 7278 /// Signal that the runtime library has to return the device pointer 7279 /// in the current position for the data being mapped. Used when we have the 7280 /// use_device_ptr or use_device_addr clause. 7281 OMP_MAP_RETURN_PARAM = 0x40, 7282 /// This flag signals that the reference being passed is a pointer to 7283 /// private data. 7284 OMP_MAP_PRIVATE = 0x80, 7285 /// Pass the element to the device by value. 7286 OMP_MAP_LITERAL = 0x100, 7287 /// Implicit map 7288 OMP_MAP_IMPLICIT = 0x200, 7289 /// Close is a hint to the runtime to allocate memory close to 7290 /// the target device. 7291 OMP_MAP_CLOSE = 0x400, 7292 /// 0x800 is reserved for compatibility with XLC. 7293 /// Produce a runtime error if the data is not already allocated. 7294 OMP_MAP_PRESENT = 0x1000, 7295 // Increment and decrement a separate reference counter so that the data 7296 // cannot be unmapped within the associated region. Thus, this flag is 7297 // intended to be used on 'target' and 'target data' directives because they 7298 // are inherently structured. It is not intended to be used on 'target 7299 // enter data' and 'target exit data' directives because they are inherently 7300 // dynamic. 7301 // This is an OpenMP extension for the sake of OpenACC support. 7302 OMP_MAP_OMPX_HOLD = 0x2000, 7303 /// Signal that the runtime library should use args as an array of 7304 /// descriptor_dim pointers and use args_size as dims. Used when we have 7305 /// non-contiguous list items in target update directive 7306 OMP_MAP_NON_CONTIG = 0x100000000000, 7307 /// The 16 MSBs of the flags indicate whether the entry is member of some 7308 /// struct/class. 7309 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7310 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7311 }; 7312 7313 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7314 static unsigned getFlagMemberOffset() { 7315 unsigned Offset = 0; 7316 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7317 Remain = Remain >> 1) 7318 Offset++; 7319 return Offset; 7320 } 7321 7322 /// Class that holds debugging information for a data mapping to be passed to 7323 /// the runtime library. 7324 class MappingExprInfo { 7325 /// The variable declaration used for the data mapping. 7326 const ValueDecl *MapDecl = nullptr; 7327 /// The original expression used in the map clause, or null if there is 7328 /// none. 7329 const Expr *MapExpr = nullptr; 7330 7331 public: 7332 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7333 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7334 7335 const ValueDecl *getMapDecl() const { return MapDecl; } 7336 const Expr *getMapExpr() const { return MapExpr; } 7337 }; 7338 7339 /// Class that associates information with a base pointer to be passed to the 7340 /// runtime library. 7341 class BasePointerInfo { 7342 /// The base pointer. 7343 llvm::Value *Ptr = nullptr; 7344 /// The base declaration that refers to this device pointer, or null if 7345 /// there is none. 7346 const ValueDecl *DevPtrDecl = nullptr; 7347 7348 public: 7349 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7350 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7351 llvm::Value *operator*() const { return Ptr; } 7352 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7353 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7354 }; 7355 7356 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7357 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7358 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7359 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7360 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7361 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7362 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7363 7364 /// This structure contains combined information generated for mappable 7365 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7366 /// mappers, and non-contiguous information. 7367 struct MapCombinedInfoTy { 7368 struct StructNonContiguousInfo { 7369 bool IsNonContiguous = false; 7370 MapDimArrayTy Dims; 7371 MapNonContiguousArrayTy Offsets; 7372 MapNonContiguousArrayTy Counts; 7373 MapNonContiguousArrayTy Strides; 7374 }; 7375 MapExprsArrayTy Exprs; 7376 MapBaseValuesArrayTy BasePointers; 7377 MapValuesArrayTy Pointers; 7378 MapValuesArrayTy Sizes; 7379 MapFlagsArrayTy Types; 7380 MapMappersArrayTy Mappers; 7381 StructNonContiguousInfo NonContigInfo; 7382 7383 /// Append arrays in \a CurInfo. 7384 void append(MapCombinedInfoTy &CurInfo) { 7385 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7386 BasePointers.append(CurInfo.BasePointers.begin(), 7387 CurInfo.BasePointers.end()); 7388 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7389 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7390 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7391 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7392 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7393 CurInfo.NonContigInfo.Dims.end()); 7394 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7395 CurInfo.NonContigInfo.Offsets.end()); 7396 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7397 CurInfo.NonContigInfo.Counts.end()); 7398 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7399 CurInfo.NonContigInfo.Strides.end()); 7400 } 7401 }; 7402 7403 /// Map between a struct and the its lowest & highest elements which have been 7404 /// mapped. 7405 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7406 /// HE(FieldIndex, Pointer)} 7407 struct StructRangeInfoTy { 7408 MapCombinedInfoTy PreliminaryMapData; 7409 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7410 0, Address::invalid()}; 7411 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7412 0, Address::invalid()}; 7413 Address Base = Address::invalid(); 7414 Address LB = Address::invalid(); 7415 bool IsArraySection = false; 7416 bool HasCompleteRecord = false; 7417 }; 7418 7419 private: 7420 /// Kind that defines how a device pointer has to be returned. 7421 struct MapInfo { 7422 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7423 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7424 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7425 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7426 bool ReturnDevicePointer = false; 7427 bool IsImplicit = false; 7428 const ValueDecl *Mapper = nullptr; 7429 const Expr *VarRef = nullptr; 7430 bool ForDeviceAddr = false; 7431 7432 MapInfo() = default; 7433 MapInfo( 7434 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7435 OpenMPMapClauseKind MapType, 7436 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7437 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7438 bool ReturnDevicePointer, bool IsImplicit, 7439 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7440 bool ForDeviceAddr = false) 7441 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7442 MotionModifiers(MotionModifiers), 7443 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7444 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7445 }; 7446 7447 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7448 /// member and there is no map information about it, then emission of that 7449 /// entry is deferred until the whole struct has been processed. 7450 struct DeferredDevicePtrEntryTy { 7451 const Expr *IE = nullptr; 7452 const ValueDecl *VD = nullptr; 7453 bool ForDeviceAddr = false; 7454 7455 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7456 bool ForDeviceAddr) 7457 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7458 }; 7459 7460 /// The target directive from where the mappable clauses were extracted. It 7461 /// is either a executable directive or a user-defined mapper directive. 7462 llvm::PointerUnion<const OMPExecutableDirective *, 7463 const OMPDeclareMapperDecl *> 7464 CurDir; 7465 7466 /// Function the directive is being generated for. 7467 CodeGenFunction &CGF; 7468 7469 /// Set of all first private variables in the current directive. 7470 /// bool data is set to true if the variable is implicitly marked as 7471 /// firstprivate, false otherwise. 7472 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7473 7474 /// Map between device pointer declarations and their expression components. 7475 /// The key value for declarations in 'this' is null. 7476 llvm::DenseMap< 7477 const ValueDecl *, 7478 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7479 DevPointersMap; 7480 7481 llvm::Value *getExprTypeSize(const Expr *E) const { 7482 QualType ExprTy = E->getType().getCanonicalType(); 7483 7484 // Calculate the size for array shaping expression. 7485 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7486 llvm::Value *Size = 7487 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7488 for (const Expr *SE : OAE->getDimensions()) { 7489 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7490 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7491 CGF.getContext().getSizeType(), 7492 SE->getExprLoc()); 7493 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7494 } 7495 return Size; 7496 } 7497 7498 // Reference types are ignored for mapping purposes. 7499 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7500 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7501 7502 // Given that an array section is considered a built-in type, we need to 7503 // do the calculation based on the length of the section instead of relying 7504 // on CGF.getTypeSize(E->getType()). 7505 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7506 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7507 OAE->getBase()->IgnoreParenImpCasts()) 7508 .getCanonicalType(); 7509 7510 // If there is no length associated with the expression and lower bound is 7511 // not specified too, that means we are using the whole length of the 7512 // base. 7513 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7514 !OAE->getLowerBound()) 7515 return CGF.getTypeSize(BaseTy); 7516 7517 llvm::Value *ElemSize; 7518 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7519 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7520 } else { 7521 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7522 assert(ATy && "Expecting array type if not a pointer type."); 7523 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7524 } 7525 7526 // If we don't have a length at this point, that is because we have an 7527 // array section with a single element. 7528 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7529 return ElemSize; 7530 7531 if (const Expr *LenExpr = OAE->getLength()) { 7532 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7533 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7534 CGF.getContext().getSizeType(), 7535 LenExpr->getExprLoc()); 7536 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7537 } 7538 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7539 OAE->getLowerBound() && "expected array_section[lb:]."); 7540 // Size = sizetype - lb * elemtype; 7541 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7542 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7543 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7544 CGF.getContext().getSizeType(), 7545 OAE->getLowerBound()->getExprLoc()); 7546 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7547 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7548 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7549 LengthVal = CGF.Builder.CreateSelect( 7550 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7551 return LengthVal; 7552 } 7553 return CGF.getTypeSize(ExprTy); 7554 } 7555 7556 /// Return the corresponding bits for a given map clause modifier. Add 7557 /// a flag marking the map as a pointer if requested. Add a flag marking the 7558 /// map as the first one of a series of maps that relate to the same map 7559 /// expression. 7560 OpenMPOffloadMappingFlags getMapTypeBits( 7561 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7562 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7563 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7564 OpenMPOffloadMappingFlags Bits = 7565 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7566 switch (MapType) { 7567 case OMPC_MAP_alloc: 7568 case OMPC_MAP_release: 7569 // alloc and release is the default behavior in the runtime library, i.e. 7570 // if we don't pass any bits alloc/release that is what the runtime is 7571 // going to do. Therefore, we don't need to signal anything for these two 7572 // type modifiers. 7573 break; 7574 case OMPC_MAP_to: 7575 Bits |= OMP_MAP_TO; 7576 break; 7577 case OMPC_MAP_from: 7578 Bits |= OMP_MAP_FROM; 7579 break; 7580 case OMPC_MAP_tofrom: 7581 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7582 break; 7583 case OMPC_MAP_delete: 7584 Bits |= OMP_MAP_DELETE; 7585 break; 7586 case OMPC_MAP_unknown: 7587 llvm_unreachable("Unexpected map type!"); 7588 } 7589 if (AddPtrFlag) 7590 Bits |= OMP_MAP_PTR_AND_OBJ; 7591 if (AddIsTargetParamFlag) 7592 Bits |= OMP_MAP_TARGET_PARAM; 7593 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7594 != MapModifiers.end()) 7595 Bits |= OMP_MAP_ALWAYS; 7596 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7597 != MapModifiers.end()) 7598 Bits |= OMP_MAP_CLOSE; 7599 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) != 7600 MapModifiers.end() || 7601 llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) != 7602 MotionModifiers.end()) 7603 Bits |= OMP_MAP_PRESENT; 7604 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold) != 7605 MapModifiers.end()) 7606 Bits |= OMP_MAP_OMPX_HOLD; 7607 if (IsNonContiguous) 7608 Bits |= OMP_MAP_NON_CONTIG; 7609 return Bits; 7610 } 7611 7612 /// Return true if the provided expression is a final array section. A 7613 /// final array section, is one whose length can't be proved to be one. 7614 bool isFinalArraySectionExpression(const Expr *E) const { 7615 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7616 7617 // It is not an array section and therefore not a unity-size one. 7618 if (!OASE) 7619 return false; 7620 7621 // An array section with no colon always refer to a single element. 7622 if (OASE->getColonLocFirst().isInvalid()) 7623 return false; 7624 7625 const Expr *Length = OASE->getLength(); 7626 7627 // If we don't have a length we have to check if the array has size 1 7628 // for this dimension. Also, we should always expect a length if the 7629 // base type is pointer. 7630 if (!Length) { 7631 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7632 OASE->getBase()->IgnoreParenImpCasts()) 7633 .getCanonicalType(); 7634 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7635 return ATy->getSize().getSExtValue() != 1; 7636 // If we don't have a constant dimension length, we have to consider 7637 // the current section as having any size, so it is not necessarily 7638 // unitary. If it happen to be unity size, that's user fault. 7639 return true; 7640 } 7641 7642 // Check if the length evaluates to 1. 7643 Expr::EvalResult Result; 7644 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7645 return true; // Can have more that size 1. 7646 7647 llvm::APSInt ConstLength = Result.Val.getInt(); 7648 return ConstLength.getSExtValue() != 1; 7649 } 7650 7651 /// Generate the base pointers, section pointers, sizes, map type bits, and 7652 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7653 /// map type, map or motion modifiers, and expression components. 7654 /// \a IsFirstComponent should be set to true if the provided set of 7655 /// components is the first associated with a capture. 7656 void generateInfoForComponentList( 7657 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7658 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7659 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7660 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7661 bool IsFirstComponentList, bool IsImplicit, 7662 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7663 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7664 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7665 OverlappedElements = llvm::None) const { 7666 // The following summarizes what has to be generated for each map and the 7667 // types below. The generated information is expressed in this order: 7668 // base pointer, section pointer, size, flags 7669 // (to add to the ones that come from the map type and modifier). 7670 // 7671 // double d; 7672 // int i[100]; 7673 // float *p; 7674 // 7675 // struct S1 { 7676 // int i; 7677 // float f[50]; 7678 // } 7679 // struct S2 { 7680 // int i; 7681 // float f[50]; 7682 // S1 s; 7683 // double *p; 7684 // struct S2 *ps; 7685 // int &ref; 7686 // } 7687 // S2 s; 7688 // S2 *ps; 7689 // 7690 // map(d) 7691 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7692 // 7693 // map(i) 7694 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7695 // 7696 // map(i[1:23]) 7697 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7698 // 7699 // map(p) 7700 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7701 // 7702 // map(p[1:24]) 7703 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7704 // in unified shared memory mode or for local pointers 7705 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7706 // 7707 // map(s) 7708 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7709 // 7710 // map(s.i) 7711 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7712 // 7713 // map(s.s.f) 7714 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7715 // 7716 // map(s.p) 7717 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7718 // 7719 // map(to: s.p[:22]) 7720 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7721 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7722 // &(s.p), &(s.p[0]), 22*sizeof(double), 7723 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7724 // (*) alloc space for struct members, only this is a target parameter 7725 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7726 // optimizes this entry out, same in the examples below) 7727 // (***) map the pointee (map: to) 7728 // 7729 // map(to: s.ref) 7730 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7731 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7732 // (*) alloc space for struct members, only this is a target parameter 7733 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7734 // optimizes this entry out, same in the examples below) 7735 // (***) map the pointee (map: to) 7736 // 7737 // map(s.ps) 7738 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7739 // 7740 // map(from: s.ps->s.i) 7741 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7742 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7743 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7744 // 7745 // map(to: s.ps->ps) 7746 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7747 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7748 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7749 // 7750 // map(s.ps->ps->ps) 7751 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7752 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7753 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7754 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7755 // 7756 // map(to: s.ps->ps->s.f[:22]) 7757 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7758 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7759 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7760 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7761 // 7762 // map(ps) 7763 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7764 // 7765 // map(ps->i) 7766 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7767 // 7768 // map(ps->s.f) 7769 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7770 // 7771 // map(from: ps->p) 7772 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7773 // 7774 // map(to: ps->p[:22]) 7775 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7776 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7777 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7778 // 7779 // map(ps->ps) 7780 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7781 // 7782 // map(from: ps->ps->s.i) 7783 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7784 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7785 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7786 // 7787 // map(from: ps->ps->ps) 7788 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7789 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7790 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7791 // 7792 // map(ps->ps->ps->ps) 7793 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7794 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7795 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7796 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7797 // 7798 // map(to: ps->ps->ps->s.f[:22]) 7799 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7800 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7801 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7802 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7803 // 7804 // map(to: s.f[:22]) map(from: s.p[:33]) 7805 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7806 // sizeof(double*) (**), TARGET_PARAM 7807 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7808 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7809 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7810 // (*) allocate contiguous space needed to fit all mapped members even if 7811 // we allocate space for members not mapped (in this example, 7812 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7813 // them as well because they fall between &s.f[0] and &s.p) 7814 // 7815 // map(from: s.f[:22]) map(to: ps->p[:33]) 7816 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7817 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7818 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7819 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7820 // (*) the struct this entry pertains to is the 2nd element in the list of 7821 // arguments, hence MEMBER_OF(2) 7822 // 7823 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7824 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7825 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7826 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7827 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7828 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7829 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7830 // (*) the struct this entry pertains to is the 4th element in the list 7831 // of arguments, hence MEMBER_OF(4) 7832 7833 // Track if the map information being generated is the first for a capture. 7834 bool IsCaptureFirstInfo = IsFirstComponentList; 7835 // When the variable is on a declare target link or in a to clause with 7836 // unified memory, a reference is needed to hold the host/device address 7837 // of the variable. 7838 bool RequiresReference = false; 7839 7840 // Scan the components from the base to the complete expression. 7841 auto CI = Components.rbegin(); 7842 auto CE = Components.rend(); 7843 auto I = CI; 7844 7845 // Track if the map information being generated is the first for a list of 7846 // components. 7847 bool IsExpressionFirstInfo = true; 7848 bool FirstPointerInComplexData = false; 7849 Address BP = Address::invalid(); 7850 const Expr *AssocExpr = I->getAssociatedExpression(); 7851 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7852 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7853 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7854 7855 if (isa<MemberExpr>(AssocExpr)) { 7856 // The base is the 'this' pointer. The content of the pointer is going 7857 // to be the base of the field being mapped. 7858 BP = CGF.LoadCXXThisAddress(); 7859 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7860 (OASE && 7861 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7862 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7863 } else if (OAShE && 7864 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7865 BP = Address( 7866 CGF.EmitScalarExpr(OAShE->getBase()), 7867 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7868 } else { 7869 // The base is the reference to the variable. 7870 // BP = &Var. 7871 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7872 if (const auto *VD = 7873 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7874 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7875 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7876 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7877 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7878 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7879 RequiresReference = true; 7880 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7881 } 7882 } 7883 } 7884 7885 // If the variable is a pointer and is being dereferenced (i.e. is not 7886 // the last component), the base has to be the pointer itself, not its 7887 // reference. References are ignored for mapping purposes. 7888 QualType Ty = 7889 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7890 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7891 // No need to generate individual map information for the pointer, it 7892 // can be associated with the combined storage if shared memory mode is 7893 // active or the base declaration is not global variable. 7894 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7895 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7896 !VD || VD->hasLocalStorage()) 7897 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7898 else 7899 FirstPointerInComplexData = true; 7900 ++I; 7901 } 7902 } 7903 7904 // Track whether a component of the list should be marked as MEMBER_OF some 7905 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7906 // in a component list should be marked as MEMBER_OF, all subsequent entries 7907 // do not belong to the base struct. E.g. 7908 // struct S2 s; 7909 // s.ps->ps->ps->f[:] 7910 // (1) (2) (3) (4) 7911 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7912 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7913 // is the pointee of ps(2) which is not member of struct s, so it should not 7914 // be marked as such (it is still PTR_AND_OBJ). 7915 // The variable is initialized to false so that PTR_AND_OBJ entries which 7916 // are not struct members are not considered (e.g. array of pointers to 7917 // data). 7918 bool ShouldBeMemberOf = false; 7919 7920 // Variable keeping track of whether or not we have encountered a component 7921 // in the component list which is a member expression. Useful when we have a 7922 // pointer or a final array section, in which case it is the previous 7923 // component in the list which tells us whether we have a member expression. 7924 // E.g. X.f[:] 7925 // While processing the final array section "[:]" it is "f" which tells us 7926 // whether we are dealing with a member of a declared struct. 7927 const MemberExpr *EncounteredME = nullptr; 7928 7929 // Track for the total number of dimension. Start from one for the dummy 7930 // dimension. 7931 uint64_t DimSize = 1; 7932 7933 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7934 bool IsPrevMemberReference = false; 7935 7936 for (; I != CE; ++I) { 7937 // If the current component is member of a struct (parent struct) mark it. 7938 if (!EncounteredME) { 7939 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7940 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7941 // as MEMBER_OF the parent struct. 7942 if (EncounteredME) { 7943 ShouldBeMemberOf = true; 7944 // Do not emit as complex pointer if this is actually not array-like 7945 // expression. 7946 if (FirstPointerInComplexData) { 7947 QualType Ty = std::prev(I) 7948 ->getAssociatedDeclaration() 7949 ->getType() 7950 .getNonReferenceType(); 7951 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7952 FirstPointerInComplexData = false; 7953 } 7954 } 7955 } 7956 7957 auto Next = std::next(I); 7958 7959 // We need to generate the addresses and sizes if this is the last 7960 // component, if the component is a pointer or if it is an array section 7961 // whose length can't be proved to be one. If this is a pointer, it 7962 // becomes the base address for the following components. 7963 7964 // A final array section, is one whose length can't be proved to be one. 7965 // If the map item is non-contiguous then we don't treat any array section 7966 // as final array section. 7967 bool IsFinalArraySection = 7968 !IsNonContiguous && 7969 isFinalArraySectionExpression(I->getAssociatedExpression()); 7970 7971 // If we have a declaration for the mapping use that, otherwise use 7972 // the base declaration of the map clause. 7973 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7974 ? I->getAssociatedDeclaration() 7975 : BaseDecl; 7976 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7977 : MapExpr; 7978 7979 // Get information on whether the element is a pointer. Have to do a 7980 // special treatment for array sections given that they are built-in 7981 // types. 7982 const auto *OASE = 7983 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7984 const auto *OAShE = 7985 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7986 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7987 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7988 bool IsPointer = 7989 OAShE || 7990 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7991 .getCanonicalType() 7992 ->isAnyPointerType()) || 7993 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7994 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7995 MapDecl && 7996 MapDecl->getType()->isLValueReferenceType(); 7997 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7998 7999 if (OASE) 8000 ++DimSize; 8001 8002 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8003 IsFinalArraySection) { 8004 // If this is not the last component, we expect the pointer to be 8005 // associated with an array expression or member expression. 8006 assert((Next == CE || 8007 isa<MemberExpr>(Next->getAssociatedExpression()) || 8008 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8009 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8010 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8011 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8012 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8013 "Unexpected expression"); 8014 8015 Address LB = Address::invalid(); 8016 Address LowestElem = Address::invalid(); 8017 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8018 const MemberExpr *E) { 8019 const Expr *BaseExpr = E->getBase(); 8020 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8021 // scalar. 8022 LValue BaseLV; 8023 if (E->isArrow()) { 8024 LValueBaseInfo BaseInfo; 8025 TBAAAccessInfo TBAAInfo; 8026 Address Addr = 8027 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8028 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8029 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8030 } else { 8031 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8032 } 8033 return BaseLV; 8034 }; 8035 if (OAShE) { 8036 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 8037 CGF.getContext().getTypeAlignInChars( 8038 OAShE->getBase()->getType())); 8039 } else if (IsMemberReference) { 8040 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8041 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8042 LowestElem = CGF.EmitLValueForFieldInitialization( 8043 BaseLVal, cast<FieldDecl>(MapDecl)) 8044 .getAddress(CGF); 8045 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8046 .getAddress(CGF); 8047 } else { 8048 LowestElem = LB = 8049 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8050 .getAddress(CGF); 8051 } 8052 8053 // If this component is a pointer inside the base struct then we don't 8054 // need to create any entry for it - it will be combined with the object 8055 // it is pointing to into a single PTR_AND_OBJ entry. 8056 bool IsMemberPointerOrAddr = 8057 EncounteredME && 8058 (((IsPointer || ForDeviceAddr) && 8059 I->getAssociatedExpression() == EncounteredME) || 8060 (IsPrevMemberReference && !IsPointer) || 8061 (IsMemberReference && Next != CE && 8062 !Next->getAssociatedExpression()->getType()->isPointerType())); 8063 if (!OverlappedElements.empty() && Next == CE) { 8064 // Handle base element with the info for overlapped elements. 8065 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8066 assert(!IsPointer && 8067 "Unexpected base element with the pointer type."); 8068 // Mark the whole struct as the struct that requires allocation on the 8069 // device. 8070 PartialStruct.LowestElem = {0, LowestElem}; 8071 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8072 I->getAssociatedExpression()->getType()); 8073 Address HB = CGF.Builder.CreateConstGEP( 8074 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 8075 CGF.VoidPtrTy), 8076 TypeSize.getQuantity() - 1); 8077 PartialStruct.HighestElem = { 8078 std::numeric_limits<decltype( 8079 PartialStruct.HighestElem.first)>::max(), 8080 HB}; 8081 PartialStruct.Base = BP; 8082 PartialStruct.LB = LB; 8083 assert( 8084 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8085 "Overlapped elements must be used only once for the variable."); 8086 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8087 // Emit data for non-overlapped data. 8088 OpenMPOffloadMappingFlags Flags = 8089 OMP_MAP_MEMBER_OF | 8090 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8091 /*AddPtrFlag=*/false, 8092 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8093 llvm::Value *Size = nullptr; 8094 // Do bitcopy of all non-overlapped structure elements. 8095 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8096 Component : OverlappedElements) { 8097 Address ComponentLB = Address::invalid(); 8098 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8099 Component) { 8100 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8101 const auto *FD = dyn_cast<FieldDecl>(VD); 8102 if (FD && FD->getType()->isLValueReferenceType()) { 8103 const auto *ME = 8104 cast<MemberExpr>(MC.getAssociatedExpression()); 8105 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8106 ComponentLB = 8107 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8108 .getAddress(CGF); 8109 } else { 8110 ComponentLB = 8111 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8112 .getAddress(CGF); 8113 } 8114 Size = CGF.Builder.CreatePtrDiff( 8115 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8116 CGF.EmitCastToVoidPtr(LB.getPointer())); 8117 break; 8118 } 8119 } 8120 assert(Size && "Failed to determine structure size"); 8121 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8122 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8123 CombinedInfo.Pointers.push_back(LB.getPointer()); 8124 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8125 Size, CGF.Int64Ty, /*isSigned=*/true)); 8126 CombinedInfo.Types.push_back(Flags); 8127 CombinedInfo.Mappers.push_back(nullptr); 8128 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8129 : 1); 8130 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8131 } 8132 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8133 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8134 CombinedInfo.Pointers.push_back(LB.getPointer()); 8135 Size = CGF.Builder.CreatePtrDiff( 8136 CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8137 CGF.EmitCastToVoidPtr(LB.getPointer())); 8138 CombinedInfo.Sizes.push_back( 8139 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8140 CombinedInfo.Types.push_back(Flags); 8141 CombinedInfo.Mappers.push_back(nullptr); 8142 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8143 : 1); 8144 break; 8145 } 8146 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8147 if (!IsMemberPointerOrAddr || 8148 (Next == CE && MapType != OMPC_MAP_unknown)) { 8149 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8150 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8151 CombinedInfo.Pointers.push_back(LB.getPointer()); 8152 CombinedInfo.Sizes.push_back( 8153 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8154 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8155 : 1); 8156 8157 // If Mapper is valid, the last component inherits the mapper. 8158 bool HasMapper = Mapper && Next == CE; 8159 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8160 8161 // We need to add a pointer flag for each map that comes from the 8162 // same expression except for the first one. We also need to signal 8163 // this map is the first one that relates with the current capture 8164 // (there is a set of entries for each capture). 8165 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8166 MapType, MapModifiers, MotionModifiers, IsImplicit, 8167 !IsExpressionFirstInfo || RequiresReference || 8168 FirstPointerInComplexData || IsMemberReference, 8169 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8170 8171 if (!IsExpressionFirstInfo || IsMemberReference) { 8172 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8173 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8174 if (IsPointer || (IsMemberReference && Next != CE)) 8175 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8176 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8177 8178 if (ShouldBeMemberOf) { 8179 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8180 // should be later updated with the correct value of MEMBER_OF. 8181 Flags |= OMP_MAP_MEMBER_OF; 8182 // From now on, all subsequent PTR_AND_OBJ entries should not be 8183 // marked as MEMBER_OF. 8184 ShouldBeMemberOf = false; 8185 } 8186 } 8187 8188 CombinedInfo.Types.push_back(Flags); 8189 } 8190 8191 // If we have encountered a member expression so far, keep track of the 8192 // mapped member. If the parent is "*this", then the value declaration 8193 // is nullptr. 8194 if (EncounteredME) { 8195 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8196 unsigned FieldIndex = FD->getFieldIndex(); 8197 8198 // Update info about the lowest and highest elements for this struct 8199 if (!PartialStruct.Base.isValid()) { 8200 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8201 if (IsFinalArraySection) { 8202 Address HB = 8203 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8204 .getAddress(CGF); 8205 PartialStruct.HighestElem = {FieldIndex, HB}; 8206 } else { 8207 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8208 } 8209 PartialStruct.Base = BP; 8210 PartialStruct.LB = BP; 8211 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8212 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8213 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8214 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8215 } 8216 } 8217 8218 // Need to emit combined struct for array sections. 8219 if (IsFinalArraySection || IsNonContiguous) 8220 PartialStruct.IsArraySection = true; 8221 8222 // If we have a final array section, we are done with this expression. 8223 if (IsFinalArraySection) 8224 break; 8225 8226 // The pointer becomes the base for the next element. 8227 if (Next != CE) 8228 BP = IsMemberReference ? LowestElem : LB; 8229 8230 IsExpressionFirstInfo = false; 8231 IsCaptureFirstInfo = false; 8232 FirstPointerInComplexData = false; 8233 IsPrevMemberReference = IsMemberReference; 8234 } else if (FirstPointerInComplexData) { 8235 QualType Ty = Components.rbegin() 8236 ->getAssociatedDeclaration() 8237 ->getType() 8238 .getNonReferenceType(); 8239 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8240 FirstPointerInComplexData = false; 8241 } 8242 } 8243 // If ran into the whole component - allocate the space for the whole 8244 // record. 8245 if (!EncounteredME) 8246 PartialStruct.HasCompleteRecord = true; 8247 8248 if (!IsNonContiguous) 8249 return; 8250 8251 const ASTContext &Context = CGF.getContext(); 8252 8253 // For supporting stride in array section, we need to initialize the first 8254 // dimension size as 1, first offset as 0, and first count as 1 8255 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8256 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8257 MapValuesArrayTy CurStrides; 8258 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8259 uint64_t ElementTypeSize; 8260 8261 // Collect Size information for each dimension and get the element size as 8262 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8263 // should be [10, 10] and the first stride is 4 btyes. 8264 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8265 Components) { 8266 const Expr *AssocExpr = Component.getAssociatedExpression(); 8267 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8268 8269 if (!OASE) 8270 continue; 8271 8272 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8273 auto *CAT = Context.getAsConstantArrayType(Ty); 8274 auto *VAT = Context.getAsVariableArrayType(Ty); 8275 8276 // We need all the dimension size except for the last dimension. 8277 assert((VAT || CAT || &Component == &*Components.begin()) && 8278 "Should be either ConstantArray or VariableArray if not the " 8279 "first Component"); 8280 8281 // Get element size if CurStrides is empty. 8282 if (CurStrides.empty()) { 8283 const Type *ElementType = nullptr; 8284 if (CAT) 8285 ElementType = CAT->getElementType().getTypePtr(); 8286 else if (VAT) 8287 ElementType = VAT->getElementType().getTypePtr(); 8288 else 8289 assert(&Component == &*Components.begin() && 8290 "Only expect pointer (non CAT or VAT) when this is the " 8291 "first Component"); 8292 // If ElementType is null, then it means the base is a pointer 8293 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8294 // for next iteration. 8295 if (ElementType) { 8296 // For the case that having pointer as base, we need to remove one 8297 // level of indirection. 8298 if (&Component != &*Components.begin()) 8299 ElementType = ElementType->getPointeeOrArrayElementType(); 8300 ElementTypeSize = 8301 Context.getTypeSizeInChars(ElementType).getQuantity(); 8302 CurStrides.push_back( 8303 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8304 } 8305 } 8306 // Get dimension value except for the last dimension since we don't need 8307 // it. 8308 if (DimSizes.size() < Components.size() - 1) { 8309 if (CAT) 8310 DimSizes.push_back(llvm::ConstantInt::get( 8311 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8312 else if (VAT) 8313 DimSizes.push_back(CGF.Builder.CreateIntCast( 8314 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8315 /*IsSigned=*/false)); 8316 } 8317 } 8318 8319 // Skip the dummy dimension since we have already have its information. 8320 auto DI = DimSizes.begin() + 1; 8321 // Product of dimension. 8322 llvm::Value *DimProd = 8323 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8324 8325 // Collect info for non-contiguous. Notice that offset, count, and stride 8326 // are only meaningful for array-section, so we insert a null for anything 8327 // other than array-section. 8328 // Also, the size of offset, count, and stride are not the same as 8329 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8330 // count, and stride are the same as the number of non-contiguous 8331 // declaration in target update to/from clause. 8332 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8333 Components) { 8334 const Expr *AssocExpr = Component.getAssociatedExpression(); 8335 8336 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8337 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8338 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8339 /*isSigned=*/false); 8340 CurOffsets.push_back(Offset); 8341 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8342 CurStrides.push_back(CurStrides.back()); 8343 continue; 8344 } 8345 8346 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8347 8348 if (!OASE) 8349 continue; 8350 8351 // Offset 8352 const Expr *OffsetExpr = OASE->getLowerBound(); 8353 llvm::Value *Offset = nullptr; 8354 if (!OffsetExpr) { 8355 // If offset is absent, then we just set it to zero. 8356 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8357 } else { 8358 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8359 CGF.Int64Ty, 8360 /*isSigned=*/false); 8361 } 8362 CurOffsets.push_back(Offset); 8363 8364 // Count 8365 const Expr *CountExpr = OASE->getLength(); 8366 llvm::Value *Count = nullptr; 8367 if (!CountExpr) { 8368 // In Clang, once a high dimension is an array section, we construct all 8369 // the lower dimension as array section, however, for case like 8370 // arr[0:2][2], Clang construct the inner dimension as an array section 8371 // but it actually is not in an array section form according to spec. 8372 if (!OASE->getColonLocFirst().isValid() && 8373 !OASE->getColonLocSecond().isValid()) { 8374 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8375 } else { 8376 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8377 // When the length is absent it defaults to ⌈(size − 8378 // lower-bound)/stride⌉, where size is the size of the array 8379 // dimension. 8380 const Expr *StrideExpr = OASE->getStride(); 8381 llvm::Value *Stride = 8382 StrideExpr 8383 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8384 CGF.Int64Ty, /*isSigned=*/false) 8385 : nullptr; 8386 if (Stride) 8387 Count = CGF.Builder.CreateUDiv( 8388 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8389 else 8390 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8391 } 8392 } else { 8393 Count = CGF.EmitScalarExpr(CountExpr); 8394 } 8395 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8396 CurCounts.push_back(Count); 8397 8398 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8399 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8400 // Offset Count Stride 8401 // D0 0 1 4 (int) <- dummy dimension 8402 // D1 0 2 8 (2 * (1) * 4) 8403 // D2 1 2 20 (1 * (1 * 5) * 4) 8404 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8405 const Expr *StrideExpr = OASE->getStride(); 8406 llvm::Value *Stride = 8407 StrideExpr 8408 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8409 CGF.Int64Ty, /*isSigned=*/false) 8410 : nullptr; 8411 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8412 if (Stride) 8413 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8414 else 8415 CurStrides.push_back(DimProd); 8416 if (DI != DimSizes.end()) 8417 ++DI; 8418 } 8419 8420 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8421 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8422 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8423 } 8424 8425 /// Return the adjusted map modifiers if the declaration a capture refers to 8426 /// appears in a first-private clause. This is expected to be used only with 8427 /// directives that start with 'target'. 8428 MappableExprsHandler::OpenMPOffloadMappingFlags 8429 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8430 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8431 8432 // A first private variable captured by reference will use only the 8433 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8434 // declaration is known as first-private in this handler. 8435 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8436 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8437 return MappableExprsHandler::OMP_MAP_TO | 8438 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8439 return MappableExprsHandler::OMP_MAP_PRIVATE | 8440 MappableExprsHandler::OMP_MAP_TO; 8441 } 8442 return MappableExprsHandler::OMP_MAP_TO | 8443 MappableExprsHandler::OMP_MAP_FROM; 8444 } 8445 8446 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8447 // Rotate by getFlagMemberOffset() bits. 8448 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8449 << getFlagMemberOffset()); 8450 } 8451 8452 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8453 OpenMPOffloadMappingFlags MemberOfFlag) { 8454 // If the entry is PTR_AND_OBJ but has not been marked with the special 8455 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8456 // marked as MEMBER_OF. 8457 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8458 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8459 return; 8460 8461 // Reset the placeholder value to prepare the flag for the assignment of the 8462 // proper MEMBER_OF value. 8463 Flags &= ~OMP_MAP_MEMBER_OF; 8464 Flags |= MemberOfFlag; 8465 } 8466 8467 void getPlainLayout(const CXXRecordDecl *RD, 8468 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8469 bool AsBase) const { 8470 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8471 8472 llvm::StructType *St = 8473 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8474 8475 unsigned NumElements = St->getNumElements(); 8476 llvm::SmallVector< 8477 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8478 RecordLayout(NumElements); 8479 8480 // Fill bases. 8481 for (const auto &I : RD->bases()) { 8482 if (I.isVirtual()) 8483 continue; 8484 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8485 // Ignore empty bases. 8486 if (Base->isEmpty() || CGF.getContext() 8487 .getASTRecordLayout(Base) 8488 .getNonVirtualSize() 8489 .isZero()) 8490 continue; 8491 8492 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8493 RecordLayout[FieldIndex] = Base; 8494 } 8495 // Fill in virtual bases. 8496 for (const auto &I : RD->vbases()) { 8497 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8498 // Ignore empty bases. 8499 if (Base->isEmpty()) 8500 continue; 8501 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8502 if (RecordLayout[FieldIndex]) 8503 continue; 8504 RecordLayout[FieldIndex] = Base; 8505 } 8506 // Fill in all the fields. 8507 assert(!RD->isUnion() && "Unexpected union."); 8508 for (const auto *Field : RD->fields()) { 8509 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8510 // will fill in later.) 8511 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8512 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8513 RecordLayout[FieldIndex] = Field; 8514 } 8515 } 8516 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8517 &Data : RecordLayout) { 8518 if (Data.isNull()) 8519 continue; 8520 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8521 getPlainLayout(Base, Layout, /*AsBase=*/true); 8522 else 8523 Layout.push_back(Data.get<const FieldDecl *>()); 8524 } 8525 } 8526 8527 /// Generate all the base pointers, section pointers, sizes, map types, and 8528 /// mappers for the extracted mappable expressions (all included in \a 8529 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8530 /// pair of the relevant declaration and index where it occurs is appended to 8531 /// the device pointers info array. 8532 void generateAllInfoForClauses( 8533 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8534 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8535 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8536 // We have to process the component lists that relate with the same 8537 // declaration in a single chunk so that we can generate the map flags 8538 // correctly. Therefore, we organize all lists in a map. 8539 enum MapKind { Present, Allocs, Other, Total }; 8540 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8541 SmallVector<SmallVector<MapInfo, 8>, 4>> 8542 Info; 8543 8544 // Helper function to fill the information map for the different supported 8545 // clauses. 8546 auto &&InfoGen = 8547 [&Info, &SkipVarSet]( 8548 const ValueDecl *D, MapKind Kind, 8549 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8550 OpenMPMapClauseKind MapType, 8551 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8552 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8553 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8554 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8555 if (SkipVarSet.contains(D)) 8556 return; 8557 auto It = Info.find(D); 8558 if (It == Info.end()) 8559 It = Info 8560 .insert(std::make_pair( 8561 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8562 .first; 8563 It->second[Kind].emplace_back( 8564 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8565 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8566 }; 8567 8568 for (const auto *Cl : Clauses) { 8569 const auto *C = dyn_cast<OMPMapClause>(Cl); 8570 if (!C) 8571 continue; 8572 MapKind Kind = Other; 8573 if (!C->getMapTypeModifiers().empty() && 8574 llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) { 8575 return K == OMPC_MAP_MODIFIER_present; 8576 })) 8577 Kind = Present; 8578 else if (C->getMapType() == OMPC_MAP_alloc) 8579 Kind = Allocs; 8580 const auto *EI = C->getVarRefs().begin(); 8581 for (const auto L : C->component_lists()) { 8582 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8583 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8584 C->getMapTypeModifiers(), llvm::None, 8585 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8586 E); 8587 ++EI; 8588 } 8589 } 8590 for (const auto *Cl : Clauses) { 8591 const auto *C = dyn_cast<OMPToClause>(Cl); 8592 if (!C) 8593 continue; 8594 MapKind Kind = Other; 8595 if (!C->getMotionModifiers().empty() && 8596 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8597 return K == OMPC_MOTION_MODIFIER_present; 8598 })) 8599 Kind = Present; 8600 const auto *EI = C->getVarRefs().begin(); 8601 for (const auto L : C->component_lists()) { 8602 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8603 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8604 C->isImplicit(), std::get<2>(L), *EI); 8605 ++EI; 8606 } 8607 } 8608 for (const auto *Cl : Clauses) { 8609 const auto *C = dyn_cast<OMPFromClause>(Cl); 8610 if (!C) 8611 continue; 8612 MapKind Kind = Other; 8613 if (!C->getMotionModifiers().empty() && 8614 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8615 return K == OMPC_MOTION_MODIFIER_present; 8616 })) 8617 Kind = Present; 8618 const auto *EI = C->getVarRefs().begin(); 8619 for (const auto L : C->component_lists()) { 8620 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8621 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8622 C->isImplicit(), std::get<2>(L), *EI); 8623 ++EI; 8624 } 8625 } 8626 8627 // Look at the use_device_ptr clause information and mark the existing map 8628 // entries as such. If there is no map information for an entry in the 8629 // use_device_ptr list, we create one with map type 'alloc' and zero size 8630 // section. It is the user fault if that was not mapped before. If there is 8631 // no map information and the pointer is a struct member, then we defer the 8632 // emission of that entry until the whole struct has been processed. 8633 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8634 SmallVector<DeferredDevicePtrEntryTy, 4>> 8635 DeferredInfo; 8636 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8637 8638 for (const auto *Cl : Clauses) { 8639 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8640 if (!C) 8641 continue; 8642 for (const auto L : C->component_lists()) { 8643 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8644 std::get<1>(L); 8645 assert(!Components.empty() && 8646 "Not expecting empty list of components!"); 8647 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8648 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8649 const Expr *IE = Components.back().getAssociatedExpression(); 8650 // If the first component is a member expression, we have to look into 8651 // 'this', which maps to null in the map of map information. Otherwise 8652 // look directly for the information. 8653 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8654 8655 // We potentially have map information for this declaration already. 8656 // Look for the first set of components that refer to it. 8657 if (It != Info.end()) { 8658 bool Found = false; 8659 for (auto &Data : It->second) { 8660 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8661 return MI.Components.back().getAssociatedDeclaration() == VD; 8662 }); 8663 // If we found a map entry, signal that the pointer has to be 8664 // returned and move on to the next declaration. Exclude cases where 8665 // the base pointer is mapped as array subscript, array section or 8666 // array shaping. The base address is passed as a pointer to base in 8667 // this case and cannot be used as a base for use_device_ptr list 8668 // item. 8669 if (CI != Data.end()) { 8670 auto PrevCI = std::next(CI->Components.rbegin()); 8671 const auto *VarD = dyn_cast<VarDecl>(VD); 8672 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8673 isa<MemberExpr>(IE) || 8674 !VD->getType().getNonReferenceType()->isPointerType() || 8675 PrevCI == CI->Components.rend() || 8676 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8677 VarD->hasLocalStorage()) { 8678 CI->ReturnDevicePointer = true; 8679 Found = true; 8680 break; 8681 } 8682 } 8683 } 8684 if (Found) 8685 continue; 8686 } 8687 8688 // We didn't find any match in our map information - generate a zero 8689 // size array section - if the pointer is a struct member we defer this 8690 // action until the whole struct has been processed. 8691 if (isa<MemberExpr>(IE)) { 8692 // Insert the pointer into Info to be processed by 8693 // generateInfoForComponentList. Because it is a member pointer 8694 // without a pointee, no entry will be generated for it, therefore 8695 // we need to generate one after the whole struct has been processed. 8696 // Nonetheless, generateInfoForComponentList must be called to take 8697 // the pointer into account for the calculation of the range of the 8698 // partial struct. 8699 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8700 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8701 nullptr); 8702 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8703 } else { 8704 llvm::Value *Ptr = 8705 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8706 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8707 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8708 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8709 UseDevicePtrCombinedInfo.Sizes.push_back( 8710 llvm::Constant::getNullValue(CGF.Int64Ty)); 8711 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8712 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8713 } 8714 } 8715 } 8716 8717 // Look at the use_device_addr clause information and mark the existing map 8718 // entries as such. If there is no map information for an entry in the 8719 // use_device_addr list, we create one with map type 'alloc' and zero size 8720 // section. It is the user fault if that was not mapped before. If there is 8721 // no map information and the pointer is a struct member, then we defer the 8722 // emission of that entry until the whole struct has been processed. 8723 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8724 for (const auto *Cl : Clauses) { 8725 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8726 if (!C) 8727 continue; 8728 for (const auto L : C->component_lists()) { 8729 assert(!std::get<1>(L).empty() && 8730 "Not expecting empty list of components!"); 8731 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8732 if (!Processed.insert(VD).second) 8733 continue; 8734 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8735 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8736 // If the first component is a member expression, we have to look into 8737 // 'this', which maps to null in the map of map information. Otherwise 8738 // look directly for the information. 8739 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8740 8741 // We potentially have map information for this declaration already. 8742 // Look for the first set of components that refer to it. 8743 if (It != Info.end()) { 8744 bool Found = false; 8745 for (auto &Data : It->second) { 8746 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8747 return MI.Components.back().getAssociatedDeclaration() == VD; 8748 }); 8749 // If we found a map entry, signal that the pointer has to be 8750 // returned and move on to the next declaration. 8751 if (CI != Data.end()) { 8752 CI->ReturnDevicePointer = true; 8753 Found = true; 8754 break; 8755 } 8756 } 8757 if (Found) 8758 continue; 8759 } 8760 8761 // We didn't find any match in our map information - generate a zero 8762 // size array section - if the pointer is a struct member we defer this 8763 // action until the whole struct has been processed. 8764 if (isa<MemberExpr>(IE)) { 8765 // Insert the pointer into Info to be processed by 8766 // generateInfoForComponentList. Because it is a member pointer 8767 // without a pointee, no entry will be generated for it, therefore 8768 // we need to generate one after the whole struct has been processed. 8769 // Nonetheless, generateInfoForComponentList must be called to take 8770 // the pointer into account for the calculation of the range of the 8771 // partial struct. 8772 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8773 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8774 nullptr, nullptr, /*ForDeviceAddr=*/true); 8775 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8776 } else { 8777 llvm::Value *Ptr; 8778 if (IE->isGLValue()) 8779 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8780 else 8781 Ptr = CGF.EmitScalarExpr(IE); 8782 CombinedInfo.Exprs.push_back(VD); 8783 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8784 CombinedInfo.Pointers.push_back(Ptr); 8785 CombinedInfo.Sizes.push_back( 8786 llvm::Constant::getNullValue(CGF.Int64Ty)); 8787 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8788 CombinedInfo.Mappers.push_back(nullptr); 8789 } 8790 } 8791 } 8792 8793 for (const auto &Data : Info) { 8794 StructRangeInfoTy PartialStruct; 8795 // Temporary generated information. 8796 MapCombinedInfoTy CurInfo; 8797 const Decl *D = Data.first; 8798 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8799 for (const auto &M : Data.second) { 8800 for (const MapInfo &L : M) { 8801 assert(!L.Components.empty() && 8802 "Not expecting declaration with no component lists."); 8803 8804 // Remember the current base pointer index. 8805 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8806 CurInfo.NonContigInfo.IsNonContiguous = 8807 L.Components.back().isNonContiguous(); 8808 generateInfoForComponentList( 8809 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8810 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8811 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8812 8813 // If this entry relates with a device pointer, set the relevant 8814 // declaration and add the 'return pointer' flag. 8815 if (L.ReturnDevicePointer) { 8816 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8817 "Unexpected number of mapped base pointers."); 8818 8819 const ValueDecl *RelevantVD = 8820 L.Components.back().getAssociatedDeclaration(); 8821 assert(RelevantVD && 8822 "No relevant declaration related with device pointer??"); 8823 8824 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8825 RelevantVD); 8826 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8827 } 8828 } 8829 } 8830 8831 // Append any pending zero-length pointers which are struct members and 8832 // used with use_device_ptr or use_device_addr. 8833 auto CI = DeferredInfo.find(Data.first); 8834 if (CI != DeferredInfo.end()) { 8835 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8836 llvm::Value *BasePtr; 8837 llvm::Value *Ptr; 8838 if (L.ForDeviceAddr) { 8839 if (L.IE->isGLValue()) 8840 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8841 else 8842 Ptr = this->CGF.EmitScalarExpr(L.IE); 8843 BasePtr = Ptr; 8844 // Entry is RETURN_PARAM. Also, set the placeholder value 8845 // MEMBER_OF=FFFF so that the entry is later updated with the 8846 // correct value of MEMBER_OF. 8847 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8848 } else { 8849 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8850 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8851 L.IE->getExprLoc()); 8852 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8853 // placeholder value MEMBER_OF=FFFF so that the entry is later 8854 // updated with the correct value of MEMBER_OF. 8855 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8856 OMP_MAP_MEMBER_OF); 8857 } 8858 CurInfo.Exprs.push_back(L.VD); 8859 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8860 CurInfo.Pointers.push_back(Ptr); 8861 CurInfo.Sizes.push_back( 8862 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8863 CurInfo.Mappers.push_back(nullptr); 8864 } 8865 } 8866 // If there is an entry in PartialStruct it means we have a struct with 8867 // individual members mapped. Emit an extra combined entry. 8868 if (PartialStruct.Base.isValid()) { 8869 CurInfo.NonContigInfo.Dims.push_back(0); 8870 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8871 } 8872 8873 // We need to append the results of this capture to what we already 8874 // have. 8875 CombinedInfo.append(CurInfo); 8876 } 8877 // Append data for use_device_ptr clauses. 8878 CombinedInfo.append(UseDevicePtrCombinedInfo); 8879 } 8880 8881 public: 8882 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8883 : CurDir(&Dir), CGF(CGF) { 8884 // Extract firstprivate clause information. 8885 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8886 for (const auto *D : C->varlists()) 8887 FirstPrivateDecls.try_emplace( 8888 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8889 // Extract implicit firstprivates from uses_allocators clauses. 8890 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8891 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8892 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8893 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8894 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8895 /*Implicit=*/true); 8896 else if (const auto *VD = dyn_cast<VarDecl>( 8897 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8898 ->getDecl())) 8899 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8900 } 8901 } 8902 // Extract device pointer clause information. 8903 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8904 for (auto L : C->component_lists()) 8905 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8906 } 8907 8908 /// Constructor for the declare mapper directive. 8909 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8910 : CurDir(&Dir), CGF(CGF) {} 8911 8912 /// Generate code for the combined entry if we have a partially mapped struct 8913 /// and take care of the mapping flags of the arguments corresponding to 8914 /// individual struct members. 8915 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8916 MapFlagsArrayTy &CurTypes, 8917 const StructRangeInfoTy &PartialStruct, 8918 const ValueDecl *VD = nullptr, 8919 bool NotTargetParams = true) const { 8920 if (CurTypes.size() == 1 && 8921 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8922 !PartialStruct.IsArraySection) 8923 return; 8924 Address LBAddr = PartialStruct.LowestElem.second; 8925 Address HBAddr = PartialStruct.HighestElem.second; 8926 if (PartialStruct.HasCompleteRecord) { 8927 LBAddr = PartialStruct.LB; 8928 HBAddr = PartialStruct.LB; 8929 } 8930 CombinedInfo.Exprs.push_back(VD); 8931 // Base is the base of the struct 8932 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8933 // Pointer is the address of the lowest element 8934 llvm::Value *LB = LBAddr.getPointer(); 8935 CombinedInfo.Pointers.push_back(LB); 8936 // There should not be a mapper for a combined entry. 8937 CombinedInfo.Mappers.push_back(nullptr); 8938 // Size is (addr of {highest+1} element) - (addr of lowest element) 8939 llvm::Value *HB = HBAddr.getPointer(); 8940 llvm::Value *HAddr = 8941 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8942 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8943 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8944 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8945 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8946 /*isSigned=*/false); 8947 CombinedInfo.Sizes.push_back(Size); 8948 // Map type is always TARGET_PARAM, if generate info for captures. 8949 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8950 : OMP_MAP_TARGET_PARAM); 8951 // If any element has the present modifier, then make sure the runtime 8952 // doesn't attempt to allocate the struct. 8953 if (CurTypes.end() != 8954 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8955 return Type & OMP_MAP_PRESENT; 8956 })) 8957 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8958 // Remove TARGET_PARAM flag from the first element 8959 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8960 // If any element has the ompx_hold modifier, then make sure the runtime 8961 // uses the hold reference count for the struct as a whole so that it won't 8962 // be unmapped by an extra dynamic reference count decrement. Add it to all 8963 // elements as well so the runtime knows which reference count to check 8964 // when determining whether it's time for device-to-host transfers of 8965 // individual elements. 8966 if (CurTypes.end() != 8967 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8968 return Type & OMP_MAP_OMPX_HOLD; 8969 })) { 8970 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 8971 for (auto &M : CurTypes) 8972 M |= OMP_MAP_OMPX_HOLD; 8973 } 8974 8975 // All other current entries will be MEMBER_OF the combined entry 8976 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8977 // 0xFFFF in the MEMBER_OF field). 8978 OpenMPOffloadMappingFlags MemberOfFlag = 8979 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8980 for (auto &M : CurTypes) 8981 setCorrectMemberOfFlag(M, MemberOfFlag); 8982 } 8983 8984 /// Generate all the base pointers, section pointers, sizes, map types, and 8985 /// mappers for the extracted mappable expressions (all included in \a 8986 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8987 /// pair of the relevant declaration and index where it occurs is appended to 8988 /// the device pointers info array. 8989 void generateAllInfo( 8990 MapCombinedInfoTy &CombinedInfo, 8991 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8992 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8993 assert(CurDir.is<const OMPExecutableDirective *>() && 8994 "Expect a executable directive"); 8995 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8996 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8997 } 8998 8999 /// Generate all the base pointers, section pointers, sizes, map types, and 9000 /// mappers for the extracted map clauses of user-defined mapper (all included 9001 /// in \a CombinedInfo). 9002 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9003 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9004 "Expect a declare mapper directive"); 9005 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9006 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9007 } 9008 9009 /// Emit capture info for lambdas for variables captured by reference. 9010 void generateInfoForLambdaCaptures( 9011 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9012 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9013 const auto *RD = VD->getType() 9014 .getCanonicalType() 9015 .getNonReferenceType() 9016 ->getAsCXXRecordDecl(); 9017 if (!RD || !RD->isLambda()) 9018 return; 9019 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 9020 LValue VDLVal = CGF.MakeAddrLValue( 9021 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 9022 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9023 FieldDecl *ThisCapture = nullptr; 9024 RD->getCaptureFields(Captures, ThisCapture); 9025 if (ThisCapture) { 9026 LValue ThisLVal = 9027 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9028 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9029 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9030 VDLVal.getPointer(CGF)); 9031 CombinedInfo.Exprs.push_back(VD); 9032 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9033 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9034 CombinedInfo.Sizes.push_back( 9035 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9036 CGF.Int64Ty, /*isSigned=*/true)); 9037 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9038 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9039 CombinedInfo.Mappers.push_back(nullptr); 9040 } 9041 for (const LambdaCapture &LC : RD->captures()) { 9042 if (!LC.capturesVariable()) 9043 continue; 9044 const VarDecl *VD = LC.getCapturedVar(); 9045 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9046 continue; 9047 auto It = Captures.find(VD); 9048 assert(It != Captures.end() && "Found lambda capture without field."); 9049 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9050 if (LC.getCaptureKind() == LCK_ByRef) { 9051 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9052 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9053 VDLVal.getPointer(CGF)); 9054 CombinedInfo.Exprs.push_back(VD); 9055 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9056 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9057 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9058 CGF.getTypeSize( 9059 VD->getType().getCanonicalType().getNonReferenceType()), 9060 CGF.Int64Ty, /*isSigned=*/true)); 9061 } else { 9062 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9063 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9064 VDLVal.getPointer(CGF)); 9065 CombinedInfo.Exprs.push_back(VD); 9066 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9067 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9068 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9069 } 9070 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9071 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9072 CombinedInfo.Mappers.push_back(nullptr); 9073 } 9074 } 9075 9076 /// Set correct indices for lambdas captures. 9077 void adjustMemberOfForLambdaCaptures( 9078 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9079 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9080 MapFlagsArrayTy &Types) const { 9081 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9082 // Set correct member_of idx for all implicit lambda captures. 9083 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9084 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9085 continue; 9086 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9087 assert(BasePtr && "Unable to find base lambda address."); 9088 int TgtIdx = -1; 9089 for (unsigned J = I; J > 0; --J) { 9090 unsigned Idx = J - 1; 9091 if (Pointers[Idx] != BasePtr) 9092 continue; 9093 TgtIdx = Idx; 9094 break; 9095 } 9096 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9097 // All other current entries will be MEMBER_OF the combined entry 9098 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9099 // 0xFFFF in the MEMBER_OF field). 9100 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9101 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9102 } 9103 } 9104 9105 /// Generate the base pointers, section pointers, sizes, map types, and 9106 /// mappers associated to a given capture (all included in \a CombinedInfo). 9107 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9108 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9109 StructRangeInfoTy &PartialStruct) const { 9110 assert(!Cap->capturesVariableArrayType() && 9111 "Not expecting to generate map info for a variable array type!"); 9112 9113 // We need to know when we generating information for the first component 9114 const ValueDecl *VD = Cap->capturesThis() 9115 ? nullptr 9116 : Cap->getCapturedVar()->getCanonicalDecl(); 9117 9118 // If this declaration appears in a is_device_ptr clause we just have to 9119 // pass the pointer by value. If it is a reference to a declaration, we just 9120 // pass its value. 9121 if (DevPointersMap.count(VD)) { 9122 CombinedInfo.Exprs.push_back(VD); 9123 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9124 CombinedInfo.Pointers.push_back(Arg); 9125 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9126 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9127 /*isSigned=*/true)); 9128 CombinedInfo.Types.push_back( 9129 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9130 OMP_MAP_TARGET_PARAM); 9131 CombinedInfo.Mappers.push_back(nullptr); 9132 return; 9133 } 9134 9135 using MapData = 9136 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9137 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9138 const ValueDecl *, const Expr *>; 9139 SmallVector<MapData, 4> DeclComponentLists; 9140 assert(CurDir.is<const OMPExecutableDirective *>() && 9141 "Expect a executable directive"); 9142 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9143 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9144 const auto *EI = C->getVarRefs().begin(); 9145 for (const auto L : C->decl_component_lists(VD)) { 9146 const ValueDecl *VDecl, *Mapper; 9147 // The Expression is not correct if the mapping is implicit 9148 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9149 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9150 std::tie(VDecl, Components, Mapper) = L; 9151 assert(VDecl == VD && "We got information for the wrong declaration??"); 9152 assert(!Components.empty() && 9153 "Not expecting declaration with no component lists."); 9154 DeclComponentLists.emplace_back(Components, C->getMapType(), 9155 C->getMapTypeModifiers(), 9156 C->isImplicit(), Mapper, E); 9157 ++EI; 9158 } 9159 } 9160 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9161 const MapData &RHS) { 9162 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9163 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9164 bool HasPresent = !MapModifiers.empty() && 9165 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 9166 return K == clang::OMPC_MAP_MODIFIER_present; 9167 }); 9168 bool HasAllocs = MapType == OMPC_MAP_alloc; 9169 MapModifiers = std::get<2>(RHS); 9170 MapType = std::get<1>(LHS); 9171 bool HasPresentR = 9172 !MapModifiers.empty() && 9173 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 9174 return K == clang::OMPC_MAP_MODIFIER_present; 9175 }); 9176 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9177 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9178 }); 9179 9180 // Find overlapping elements (including the offset from the base element). 9181 llvm::SmallDenseMap< 9182 const MapData *, 9183 llvm::SmallVector< 9184 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9185 4> 9186 OverlappedData; 9187 size_t Count = 0; 9188 for (const MapData &L : DeclComponentLists) { 9189 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9190 OpenMPMapClauseKind MapType; 9191 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9192 bool IsImplicit; 9193 const ValueDecl *Mapper; 9194 const Expr *VarRef; 9195 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9196 L; 9197 ++Count; 9198 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9199 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9200 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9201 VarRef) = L1; 9202 auto CI = Components.rbegin(); 9203 auto CE = Components.rend(); 9204 auto SI = Components1.rbegin(); 9205 auto SE = Components1.rend(); 9206 for (; CI != CE && SI != SE; ++CI, ++SI) { 9207 if (CI->getAssociatedExpression()->getStmtClass() != 9208 SI->getAssociatedExpression()->getStmtClass()) 9209 break; 9210 // Are we dealing with different variables/fields? 9211 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9212 break; 9213 } 9214 // Found overlapping if, at least for one component, reached the head 9215 // of the components list. 9216 if (CI == CE || SI == SE) { 9217 // Ignore it if it is the same component. 9218 if (CI == CE && SI == SE) 9219 continue; 9220 const auto It = (SI == SE) ? CI : SI; 9221 // If one component is a pointer and another one is a kind of 9222 // dereference of this pointer (array subscript, section, dereference, 9223 // etc.), it is not an overlapping. 9224 // Same, if one component is a base and another component is a 9225 // dereferenced pointer memberexpr with the same base. 9226 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9227 (std::prev(It)->getAssociatedDeclaration() && 9228 std::prev(It) 9229 ->getAssociatedDeclaration() 9230 ->getType() 9231 ->isPointerType()) || 9232 (It->getAssociatedDeclaration() && 9233 It->getAssociatedDeclaration()->getType()->isPointerType() && 9234 std::next(It) != CE && std::next(It) != SE)) 9235 continue; 9236 const MapData &BaseData = CI == CE ? L : L1; 9237 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9238 SI == SE ? Components : Components1; 9239 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9240 OverlappedElements.getSecond().push_back(SubData); 9241 } 9242 } 9243 } 9244 // Sort the overlapped elements for each item. 9245 llvm::SmallVector<const FieldDecl *, 4> Layout; 9246 if (!OverlappedData.empty()) { 9247 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9248 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9249 while (BaseType != OrigType) { 9250 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9251 OrigType = BaseType->getPointeeOrArrayElementType(); 9252 } 9253 9254 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9255 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9256 else { 9257 const auto *RD = BaseType->getAsRecordDecl(); 9258 Layout.append(RD->field_begin(), RD->field_end()); 9259 } 9260 } 9261 for (auto &Pair : OverlappedData) { 9262 llvm::stable_sort( 9263 Pair.getSecond(), 9264 [&Layout]( 9265 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9266 OMPClauseMappableExprCommon::MappableExprComponentListRef 9267 Second) { 9268 auto CI = First.rbegin(); 9269 auto CE = First.rend(); 9270 auto SI = Second.rbegin(); 9271 auto SE = Second.rend(); 9272 for (; CI != CE && SI != SE; ++CI, ++SI) { 9273 if (CI->getAssociatedExpression()->getStmtClass() != 9274 SI->getAssociatedExpression()->getStmtClass()) 9275 break; 9276 // Are we dealing with different variables/fields? 9277 if (CI->getAssociatedDeclaration() != 9278 SI->getAssociatedDeclaration()) 9279 break; 9280 } 9281 9282 // Lists contain the same elements. 9283 if (CI == CE && SI == SE) 9284 return false; 9285 9286 // List with less elements is less than list with more elements. 9287 if (CI == CE || SI == SE) 9288 return CI == CE; 9289 9290 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9291 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9292 if (FD1->getParent() == FD2->getParent()) 9293 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9294 const auto *It = 9295 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9296 return FD == FD1 || FD == FD2; 9297 }); 9298 return *It == FD1; 9299 }); 9300 } 9301 9302 // Associated with a capture, because the mapping flags depend on it. 9303 // Go through all of the elements with the overlapped elements. 9304 bool IsFirstComponentList = true; 9305 for (const auto &Pair : OverlappedData) { 9306 const MapData &L = *Pair.getFirst(); 9307 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9308 OpenMPMapClauseKind MapType; 9309 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9310 bool IsImplicit; 9311 const ValueDecl *Mapper; 9312 const Expr *VarRef; 9313 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9314 L; 9315 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9316 OverlappedComponents = Pair.getSecond(); 9317 generateInfoForComponentList( 9318 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9319 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9320 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9321 IsFirstComponentList = false; 9322 } 9323 // Go through other elements without overlapped elements. 9324 for (const MapData &L : DeclComponentLists) { 9325 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9326 OpenMPMapClauseKind MapType; 9327 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9328 bool IsImplicit; 9329 const ValueDecl *Mapper; 9330 const Expr *VarRef; 9331 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9332 L; 9333 auto It = OverlappedData.find(&L); 9334 if (It == OverlappedData.end()) 9335 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9336 Components, CombinedInfo, PartialStruct, 9337 IsFirstComponentList, IsImplicit, Mapper, 9338 /*ForDeviceAddr=*/false, VD, VarRef); 9339 IsFirstComponentList = false; 9340 } 9341 } 9342 9343 /// Generate the default map information for a given capture \a CI, 9344 /// record field declaration \a RI and captured value \a CV. 9345 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9346 const FieldDecl &RI, llvm::Value *CV, 9347 MapCombinedInfoTy &CombinedInfo) const { 9348 bool IsImplicit = true; 9349 // Do the default mapping. 9350 if (CI.capturesThis()) { 9351 CombinedInfo.Exprs.push_back(nullptr); 9352 CombinedInfo.BasePointers.push_back(CV); 9353 CombinedInfo.Pointers.push_back(CV); 9354 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9355 CombinedInfo.Sizes.push_back( 9356 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9357 CGF.Int64Ty, /*isSigned=*/true)); 9358 // Default map type. 9359 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9360 } else if (CI.capturesVariableByCopy()) { 9361 const VarDecl *VD = CI.getCapturedVar(); 9362 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9363 CombinedInfo.BasePointers.push_back(CV); 9364 CombinedInfo.Pointers.push_back(CV); 9365 if (!RI.getType()->isAnyPointerType()) { 9366 // We have to signal to the runtime captures passed by value that are 9367 // not pointers. 9368 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9369 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9370 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9371 } else { 9372 // Pointers are implicitly mapped with a zero size and no flags 9373 // (other than first map that is added for all implicit maps). 9374 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9375 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9376 } 9377 auto I = FirstPrivateDecls.find(VD); 9378 if (I != FirstPrivateDecls.end()) 9379 IsImplicit = I->getSecond(); 9380 } else { 9381 assert(CI.capturesVariable() && "Expected captured reference."); 9382 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9383 QualType ElementType = PtrTy->getPointeeType(); 9384 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9385 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9386 // The default map type for a scalar/complex type is 'to' because by 9387 // default the value doesn't have to be retrieved. For an aggregate 9388 // type, the default is 'tofrom'. 9389 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9390 const VarDecl *VD = CI.getCapturedVar(); 9391 auto I = FirstPrivateDecls.find(VD); 9392 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9393 CombinedInfo.BasePointers.push_back(CV); 9394 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9395 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9396 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9397 AlignmentSource::Decl)); 9398 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9399 } else { 9400 CombinedInfo.Pointers.push_back(CV); 9401 } 9402 if (I != FirstPrivateDecls.end()) 9403 IsImplicit = I->getSecond(); 9404 } 9405 // Every default map produces a single argument which is a target parameter. 9406 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9407 9408 // Add flag stating this is an implicit map. 9409 if (IsImplicit) 9410 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9411 9412 // No user-defined mapper for default mapping. 9413 CombinedInfo.Mappers.push_back(nullptr); 9414 } 9415 }; 9416 } // anonymous namespace 9417 9418 static void emitNonContiguousDescriptor( 9419 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9420 CGOpenMPRuntime::TargetDataInfo &Info) { 9421 CodeGenModule &CGM = CGF.CGM; 9422 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9423 &NonContigInfo = CombinedInfo.NonContigInfo; 9424 9425 // Build an array of struct descriptor_dim and then assign it to 9426 // offload_args. 9427 // 9428 // struct descriptor_dim { 9429 // uint64_t offset; 9430 // uint64_t count; 9431 // uint64_t stride 9432 // }; 9433 ASTContext &C = CGF.getContext(); 9434 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9435 RecordDecl *RD; 9436 RD = C.buildImplicitRecord("descriptor_dim"); 9437 RD->startDefinition(); 9438 addFieldToRecordDecl(C, RD, Int64Ty); 9439 addFieldToRecordDecl(C, RD, Int64Ty); 9440 addFieldToRecordDecl(C, RD, Int64Ty); 9441 RD->completeDefinition(); 9442 QualType DimTy = C.getRecordType(RD); 9443 9444 enum { OffsetFD = 0, CountFD, StrideFD }; 9445 // We need two index variable here since the size of "Dims" is the same as the 9446 // size of Components, however, the size of offset, count, and stride is equal 9447 // to the size of base declaration that is non-contiguous. 9448 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9449 // Skip emitting ir if dimension size is 1 since it cannot be 9450 // non-contiguous. 9451 if (NonContigInfo.Dims[I] == 1) 9452 continue; 9453 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9454 QualType ArrayTy = 9455 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9456 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9457 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9458 unsigned RevIdx = EE - II - 1; 9459 LValue DimsLVal = CGF.MakeAddrLValue( 9460 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9461 // Offset 9462 LValue OffsetLVal = CGF.EmitLValueForField( 9463 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9464 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9465 // Count 9466 LValue CountLVal = CGF.EmitLValueForField( 9467 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9468 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9469 // Stride 9470 LValue StrideLVal = CGF.EmitLValueForField( 9471 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9472 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9473 } 9474 // args[I] = &dims 9475 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9476 DimsAddr, CGM.Int8PtrTy); 9477 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9478 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9479 Info.PointersArray, 0, I); 9480 Address PAddr(P, CGF.getPointerAlign()); 9481 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9482 ++L; 9483 } 9484 } 9485 9486 // Try to extract the base declaration from a `this->x` expression if possible. 9487 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9488 if (!E) 9489 return nullptr; 9490 9491 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9492 if (const MemberExpr *ME = 9493 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9494 return ME->getMemberDecl(); 9495 return nullptr; 9496 } 9497 9498 /// Emit a string constant containing the names of the values mapped to the 9499 /// offloading runtime library. 9500 llvm::Constant * 9501 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9502 MappableExprsHandler::MappingExprInfo &MapExprs) { 9503 9504 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9505 return OMPBuilder.getOrCreateDefaultSrcLocStr(); 9506 9507 SourceLocation Loc; 9508 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9509 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9510 Loc = VD->getLocation(); 9511 else 9512 Loc = MapExprs.getMapExpr()->getExprLoc(); 9513 } else { 9514 Loc = MapExprs.getMapDecl()->getLocation(); 9515 } 9516 9517 std::string ExprName = ""; 9518 if (MapExprs.getMapExpr()) { 9519 PrintingPolicy P(CGF.getContext().getLangOpts()); 9520 llvm::raw_string_ostream OS(ExprName); 9521 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9522 OS.flush(); 9523 } else { 9524 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9525 } 9526 9527 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9528 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(), 9529 PLoc.getLine(), PLoc.getColumn()); 9530 } 9531 9532 /// Emit the arrays used to pass the captures and map information to the 9533 /// offloading runtime library. If there is no map or capture information, 9534 /// return nullptr by reference. 9535 static void emitOffloadingArrays( 9536 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9537 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9538 bool IsNonContiguous = false) { 9539 CodeGenModule &CGM = CGF.CGM; 9540 ASTContext &Ctx = CGF.getContext(); 9541 9542 // Reset the array information. 9543 Info.clearArrayInfo(); 9544 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9545 9546 if (Info.NumberOfPtrs) { 9547 // Detect if we have any capture size requiring runtime evaluation of the 9548 // size so that a constant array could be eventually used. 9549 bool hasRuntimeEvaluationCaptureSize = false; 9550 for (llvm::Value *S : CombinedInfo.Sizes) 9551 if (!isa<llvm::Constant>(S)) { 9552 hasRuntimeEvaluationCaptureSize = true; 9553 break; 9554 } 9555 9556 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9557 QualType PointerArrayType = Ctx.getConstantArrayType( 9558 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9559 /*IndexTypeQuals=*/0); 9560 9561 Info.BasePointersArray = 9562 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9563 Info.PointersArray = 9564 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9565 Address MappersArray = 9566 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9567 Info.MappersArray = MappersArray.getPointer(); 9568 9569 // If we don't have any VLA types or other types that require runtime 9570 // evaluation, we can use a constant array for the map sizes, otherwise we 9571 // need to fill up the arrays as we do for the pointers. 9572 QualType Int64Ty = 9573 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9574 if (hasRuntimeEvaluationCaptureSize) { 9575 QualType SizeArrayType = Ctx.getConstantArrayType( 9576 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9577 /*IndexTypeQuals=*/0); 9578 Info.SizesArray = 9579 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9580 } else { 9581 // We expect all the sizes to be constant, so we collect them to create 9582 // a constant array. 9583 SmallVector<llvm::Constant *, 16> ConstSizes; 9584 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9585 if (IsNonContiguous && 9586 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9587 ConstSizes.push_back(llvm::ConstantInt::get( 9588 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9589 } else { 9590 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9591 } 9592 } 9593 9594 auto *SizesArrayInit = llvm::ConstantArray::get( 9595 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9596 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9597 auto *SizesArrayGbl = new llvm::GlobalVariable( 9598 CGM.getModule(), SizesArrayInit->getType(), 9599 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9600 SizesArrayInit, Name); 9601 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9602 Info.SizesArray = SizesArrayGbl; 9603 } 9604 9605 // The map types are always constant so we don't need to generate code to 9606 // fill arrays. Instead, we create an array constant. 9607 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9608 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9609 std::string MaptypesName = 9610 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9611 auto *MapTypesArrayGbl = 9612 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9613 Info.MapTypesArray = MapTypesArrayGbl; 9614 9615 // The information types are only built if there is debug information 9616 // requested. 9617 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9618 Info.MapNamesArray = llvm::Constant::getNullValue( 9619 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9620 } else { 9621 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9622 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9623 }; 9624 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9625 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9626 std::string MapnamesName = 9627 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9628 auto *MapNamesArrayGbl = 9629 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9630 Info.MapNamesArray = MapNamesArrayGbl; 9631 } 9632 9633 // If there's a present map type modifier, it must not be applied to the end 9634 // of a region, so generate a separate map type array in that case. 9635 if (Info.separateBeginEndCalls()) { 9636 bool EndMapTypesDiffer = false; 9637 for (uint64_t &Type : Mapping) { 9638 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9639 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9640 EndMapTypesDiffer = true; 9641 } 9642 } 9643 if (EndMapTypesDiffer) { 9644 MapTypesArrayGbl = 9645 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9646 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9647 } 9648 } 9649 9650 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9651 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9652 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9653 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9654 Info.BasePointersArray, 0, I); 9655 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9656 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9657 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9658 CGF.Builder.CreateStore(BPVal, BPAddr); 9659 9660 if (Info.requiresDevicePointerInfo()) 9661 if (const ValueDecl *DevVD = 9662 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9663 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9664 9665 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9666 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9667 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9668 Info.PointersArray, 0, I); 9669 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9670 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9671 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9672 CGF.Builder.CreateStore(PVal, PAddr); 9673 9674 if (hasRuntimeEvaluationCaptureSize) { 9675 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9676 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9677 Info.SizesArray, 9678 /*Idx0=*/0, 9679 /*Idx1=*/I); 9680 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9681 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9682 CGM.Int64Ty, 9683 /*isSigned=*/true), 9684 SAddr); 9685 } 9686 9687 // Fill up the mapper array. 9688 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9689 if (CombinedInfo.Mappers[I]) { 9690 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9691 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9692 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9693 Info.HasMapper = true; 9694 } 9695 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9696 CGF.Builder.CreateStore(MFunc, MAddr); 9697 } 9698 } 9699 9700 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9701 Info.NumberOfPtrs == 0) 9702 return; 9703 9704 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9705 } 9706 9707 namespace { 9708 /// Additional arguments for emitOffloadingArraysArgument function. 9709 struct ArgumentsOptions { 9710 bool ForEndCall = false; 9711 ArgumentsOptions() = default; 9712 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9713 }; 9714 } // namespace 9715 9716 /// Emit the arguments to be passed to the runtime library based on the 9717 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9718 /// ForEndCall, emit map types to be passed for the end of the region instead of 9719 /// the beginning. 9720 static void emitOffloadingArraysArgument( 9721 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9722 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9723 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9724 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9725 const ArgumentsOptions &Options = ArgumentsOptions()) { 9726 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9727 "expected region end call to runtime only when end call is separate"); 9728 CodeGenModule &CGM = CGF.CGM; 9729 if (Info.NumberOfPtrs) { 9730 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9731 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9732 Info.BasePointersArray, 9733 /*Idx0=*/0, /*Idx1=*/0); 9734 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9735 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9736 Info.PointersArray, 9737 /*Idx0=*/0, 9738 /*Idx1=*/0); 9739 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9740 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9741 /*Idx0=*/0, /*Idx1=*/0); 9742 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9743 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9744 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9745 : Info.MapTypesArray, 9746 /*Idx0=*/0, 9747 /*Idx1=*/0); 9748 9749 // Only emit the mapper information arrays if debug information is 9750 // requested. 9751 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9752 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9753 else 9754 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9755 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9756 Info.MapNamesArray, 9757 /*Idx0=*/0, 9758 /*Idx1=*/0); 9759 // If there is no user-defined mapper, set the mapper array to nullptr to 9760 // avoid an unnecessary data privatization 9761 if (!Info.HasMapper) 9762 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9763 else 9764 MappersArrayArg = 9765 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9766 } else { 9767 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9768 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9769 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9770 MapTypesArrayArg = 9771 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9772 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9773 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9774 } 9775 } 9776 9777 /// Check for inner distribute directive. 9778 static const OMPExecutableDirective * 9779 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9780 const auto *CS = D.getInnermostCapturedStmt(); 9781 const auto *Body = 9782 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9783 const Stmt *ChildStmt = 9784 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9785 9786 if (const auto *NestedDir = 9787 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9788 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9789 switch (D.getDirectiveKind()) { 9790 case OMPD_target: 9791 if (isOpenMPDistributeDirective(DKind)) 9792 return NestedDir; 9793 if (DKind == OMPD_teams) { 9794 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9795 /*IgnoreCaptured=*/true); 9796 if (!Body) 9797 return nullptr; 9798 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9799 if (const auto *NND = 9800 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9801 DKind = NND->getDirectiveKind(); 9802 if (isOpenMPDistributeDirective(DKind)) 9803 return NND; 9804 } 9805 } 9806 return nullptr; 9807 case OMPD_target_teams: 9808 if (isOpenMPDistributeDirective(DKind)) 9809 return NestedDir; 9810 return nullptr; 9811 case OMPD_target_parallel: 9812 case OMPD_target_simd: 9813 case OMPD_target_parallel_for: 9814 case OMPD_target_parallel_for_simd: 9815 return nullptr; 9816 case OMPD_target_teams_distribute: 9817 case OMPD_target_teams_distribute_simd: 9818 case OMPD_target_teams_distribute_parallel_for: 9819 case OMPD_target_teams_distribute_parallel_for_simd: 9820 case OMPD_parallel: 9821 case OMPD_for: 9822 case OMPD_parallel_for: 9823 case OMPD_parallel_master: 9824 case OMPD_parallel_sections: 9825 case OMPD_for_simd: 9826 case OMPD_parallel_for_simd: 9827 case OMPD_cancel: 9828 case OMPD_cancellation_point: 9829 case OMPD_ordered: 9830 case OMPD_threadprivate: 9831 case OMPD_allocate: 9832 case OMPD_task: 9833 case OMPD_simd: 9834 case OMPD_tile: 9835 case OMPD_unroll: 9836 case OMPD_sections: 9837 case OMPD_section: 9838 case OMPD_single: 9839 case OMPD_master: 9840 case OMPD_critical: 9841 case OMPD_taskyield: 9842 case OMPD_barrier: 9843 case OMPD_taskwait: 9844 case OMPD_taskgroup: 9845 case OMPD_atomic: 9846 case OMPD_flush: 9847 case OMPD_depobj: 9848 case OMPD_scan: 9849 case OMPD_teams: 9850 case OMPD_target_data: 9851 case OMPD_target_exit_data: 9852 case OMPD_target_enter_data: 9853 case OMPD_distribute: 9854 case OMPD_distribute_simd: 9855 case OMPD_distribute_parallel_for: 9856 case OMPD_distribute_parallel_for_simd: 9857 case OMPD_teams_distribute: 9858 case OMPD_teams_distribute_simd: 9859 case OMPD_teams_distribute_parallel_for: 9860 case OMPD_teams_distribute_parallel_for_simd: 9861 case OMPD_target_update: 9862 case OMPD_declare_simd: 9863 case OMPD_declare_variant: 9864 case OMPD_begin_declare_variant: 9865 case OMPD_end_declare_variant: 9866 case OMPD_declare_target: 9867 case OMPD_end_declare_target: 9868 case OMPD_declare_reduction: 9869 case OMPD_declare_mapper: 9870 case OMPD_taskloop: 9871 case OMPD_taskloop_simd: 9872 case OMPD_master_taskloop: 9873 case OMPD_master_taskloop_simd: 9874 case OMPD_parallel_master_taskloop: 9875 case OMPD_parallel_master_taskloop_simd: 9876 case OMPD_requires: 9877 case OMPD_metadirective: 9878 case OMPD_unknown: 9879 default: 9880 llvm_unreachable("Unexpected directive."); 9881 } 9882 } 9883 9884 return nullptr; 9885 } 9886 9887 /// Emit the user-defined mapper function. The code generation follows the 9888 /// pattern in the example below. 9889 /// \code 9890 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9891 /// void *base, void *begin, 9892 /// int64_t size, int64_t type, 9893 /// void *name = nullptr) { 9894 /// // Allocate space for an array section first or add a base/begin for 9895 /// // pointer dereference. 9896 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9897 /// !maptype.IsDelete) 9898 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9899 /// size*sizeof(Ty), clearToFromMember(type)); 9900 /// // Map members. 9901 /// for (unsigned i = 0; i < size; i++) { 9902 /// // For each component specified by this mapper: 9903 /// for (auto c : begin[i]->all_components) { 9904 /// if (c.hasMapper()) 9905 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9906 /// c.arg_type, c.arg_name); 9907 /// else 9908 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9909 /// c.arg_begin, c.arg_size, c.arg_type, 9910 /// c.arg_name); 9911 /// } 9912 /// } 9913 /// // Delete the array section. 9914 /// if (size > 1 && maptype.IsDelete) 9915 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9916 /// size*sizeof(Ty), clearToFromMember(type)); 9917 /// } 9918 /// \endcode 9919 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9920 CodeGenFunction *CGF) { 9921 if (UDMMap.count(D) > 0) 9922 return; 9923 ASTContext &C = CGM.getContext(); 9924 QualType Ty = D->getType(); 9925 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9926 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9927 auto *MapperVarDecl = 9928 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9929 SourceLocation Loc = D->getLocation(); 9930 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9931 9932 // Prepare mapper function arguments and attributes. 9933 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9934 C.VoidPtrTy, ImplicitParamDecl::Other); 9935 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9936 ImplicitParamDecl::Other); 9937 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9938 C.VoidPtrTy, ImplicitParamDecl::Other); 9939 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9940 ImplicitParamDecl::Other); 9941 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9942 ImplicitParamDecl::Other); 9943 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9944 ImplicitParamDecl::Other); 9945 FunctionArgList Args; 9946 Args.push_back(&HandleArg); 9947 Args.push_back(&BaseArg); 9948 Args.push_back(&BeginArg); 9949 Args.push_back(&SizeArg); 9950 Args.push_back(&TypeArg); 9951 Args.push_back(&NameArg); 9952 const CGFunctionInfo &FnInfo = 9953 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9954 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9955 SmallString<64> TyStr; 9956 llvm::raw_svector_ostream Out(TyStr); 9957 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9958 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9959 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9960 Name, &CGM.getModule()); 9961 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9962 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9963 // Start the mapper function code generation. 9964 CodeGenFunction MapperCGF(CGM); 9965 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9966 // Compute the starting and end addresses of array elements. 9967 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9968 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9969 C.getPointerType(Int64Ty), Loc); 9970 // Prepare common arguments for array initiation and deletion. 9971 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9972 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9973 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9974 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9975 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9976 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9977 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9978 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9979 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9980 // Convert the size in bytes into the number of array elements. 9981 Size = MapperCGF.Builder.CreateExactUDiv( 9982 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9983 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9984 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9985 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP( 9986 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size); 9987 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9988 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9989 C.getPointerType(Int64Ty), Loc); 9990 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9991 MapperCGF.GetAddrOfLocalVar(&NameArg), 9992 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9993 9994 // Emit array initiation if this is an array section and \p MapType indicates 9995 // that memory allocation is required. 9996 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9997 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9998 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9999 10000 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10001 10002 // Emit the loop header block. 10003 MapperCGF.EmitBlock(HeadBB); 10004 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10005 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10006 // Evaluate whether the initial condition is satisfied. 10007 llvm::Value *IsEmpty = 10008 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10009 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10010 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10011 10012 // Emit the loop body block. 10013 MapperCGF.EmitBlock(BodyBB); 10014 llvm::BasicBlock *LastBB = BodyBB; 10015 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10016 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10017 PtrPHI->addIncoming(PtrBegin, EntryBB); 10018 Address PtrCurrent = 10019 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 10020 .getAlignment() 10021 .alignmentOfArrayElement(ElementSize)); 10022 // Privatize the declared variable of mapper to be the current array element. 10023 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10024 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 10025 (void)Scope.Privatize(); 10026 10027 // Get map clause information. Fill up the arrays with all mapped variables. 10028 MappableExprsHandler::MapCombinedInfoTy Info; 10029 MappableExprsHandler MEHandler(*D, MapperCGF); 10030 MEHandler.generateAllInfoForMapper(Info); 10031 10032 // Call the runtime API __tgt_mapper_num_components to get the number of 10033 // pre-existing components. 10034 llvm::Value *OffloadingArgs[] = {Handle}; 10035 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10036 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10037 OMPRTL___tgt_mapper_num_components), 10038 OffloadingArgs); 10039 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10040 PreviousSize, 10041 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10042 10043 // Fill up the runtime mapper handle for all components. 10044 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10045 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10046 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10047 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10048 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10049 llvm::Value *CurSizeArg = Info.Sizes[I]; 10050 llvm::Value *CurNameArg = 10051 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10052 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10053 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10054 10055 // Extract the MEMBER_OF field from the map type. 10056 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10057 llvm::Value *MemberMapType = 10058 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10059 10060 // Combine the map type inherited from user-defined mapper with that 10061 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10062 // bits of the \a MapType, which is the input argument of the mapper 10063 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10064 // bits of MemberMapType. 10065 // [OpenMP 5.0], 1.2.6. map-type decay. 10066 // | alloc | to | from | tofrom | release | delete 10067 // ---------------------------------------------------------- 10068 // alloc | alloc | alloc | alloc | alloc | release | delete 10069 // to | alloc | to | alloc | to | release | delete 10070 // from | alloc | alloc | from | from | release | delete 10071 // tofrom | alloc | to | from | tofrom | release | delete 10072 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10073 MapType, 10074 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10075 MappableExprsHandler::OMP_MAP_FROM)); 10076 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10077 llvm::BasicBlock *AllocElseBB = 10078 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10079 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10080 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10081 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10082 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10083 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10084 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10085 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10086 MapperCGF.EmitBlock(AllocBB); 10087 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10088 MemberMapType, 10089 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10090 MappableExprsHandler::OMP_MAP_FROM))); 10091 MapperCGF.Builder.CreateBr(EndBB); 10092 MapperCGF.EmitBlock(AllocElseBB); 10093 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10094 LeftToFrom, 10095 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10096 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10097 // In case of to, clear OMP_MAP_FROM. 10098 MapperCGF.EmitBlock(ToBB); 10099 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10100 MemberMapType, 10101 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10102 MapperCGF.Builder.CreateBr(EndBB); 10103 MapperCGF.EmitBlock(ToElseBB); 10104 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10105 LeftToFrom, 10106 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10107 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10108 // In case of from, clear OMP_MAP_TO. 10109 MapperCGF.EmitBlock(FromBB); 10110 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10111 MemberMapType, 10112 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10113 // In case of tofrom, do nothing. 10114 MapperCGF.EmitBlock(EndBB); 10115 LastBB = EndBB; 10116 llvm::PHINode *CurMapType = 10117 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10118 CurMapType->addIncoming(AllocMapType, AllocBB); 10119 CurMapType->addIncoming(ToMapType, ToBB); 10120 CurMapType->addIncoming(FromMapType, FromBB); 10121 CurMapType->addIncoming(MemberMapType, ToElseBB); 10122 10123 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10124 CurSizeArg, CurMapType, CurNameArg}; 10125 if (Info.Mappers[I]) { 10126 // Call the corresponding mapper function. 10127 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10128 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10129 assert(MapperFunc && "Expect a valid mapper function is available."); 10130 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10131 } else { 10132 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10133 // data structure. 10134 MapperCGF.EmitRuntimeCall( 10135 OMPBuilder.getOrCreateRuntimeFunction( 10136 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10137 OffloadingArgs); 10138 } 10139 } 10140 10141 // Update the pointer to point to the next element that needs to be mapped, 10142 // and check whether we have mapped all elements. 10143 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType(); 10144 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10145 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10146 PtrPHI->addIncoming(PtrNext, LastBB); 10147 llvm::Value *IsDone = 10148 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10149 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10150 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10151 10152 MapperCGF.EmitBlock(ExitBB); 10153 // Emit array deletion if this is an array section and \p MapType indicates 10154 // that deletion is required. 10155 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10156 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10157 10158 // Emit the function exit block. 10159 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10160 MapperCGF.FinishFunction(); 10161 UDMMap.try_emplace(D, Fn); 10162 if (CGF) { 10163 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10164 Decls.second.push_back(D); 10165 } 10166 } 10167 10168 /// Emit the array initialization or deletion portion for user-defined mapper 10169 /// code generation. First, it evaluates whether an array section is mapped and 10170 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10171 /// true, and \a MapType indicates to not delete this array, array 10172 /// initialization code is generated. If \a IsInit is false, and \a MapType 10173 /// indicates to not this array, array deletion code is generated. 10174 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10175 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10176 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10177 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10178 bool IsInit) { 10179 StringRef Prefix = IsInit ? ".init" : ".del"; 10180 10181 // Evaluate if this is an array section. 10182 llvm::BasicBlock *BodyBB = 10183 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10184 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10185 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10186 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10187 MapType, 10188 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10189 llvm::Value *DeleteCond; 10190 llvm::Value *Cond; 10191 if (IsInit) { 10192 // base != begin? 10193 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 10194 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 10195 // IsPtrAndObj? 10196 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10197 MapType, 10198 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10199 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10200 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10201 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10202 DeleteCond = MapperCGF.Builder.CreateIsNull( 10203 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10204 } else { 10205 Cond = IsArray; 10206 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10207 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10208 } 10209 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10210 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10211 10212 MapperCGF.EmitBlock(BodyBB); 10213 // Get the array size by multiplying element size and element number (i.e., \p 10214 // Size). 10215 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10216 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10217 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10218 // memory allocation/deletion purpose only. 10219 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10220 MapType, 10221 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10222 MappableExprsHandler::OMP_MAP_FROM))); 10223 MapTypeArg = MapperCGF.Builder.CreateOr( 10224 MapTypeArg, 10225 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10226 10227 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10228 // data structure. 10229 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10230 ArraySize, MapTypeArg, MapName}; 10231 MapperCGF.EmitRuntimeCall( 10232 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10233 OMPRTL___tgt_push_mapper_component), 10234 OffloadingArgs); 10235 } 10236 10237 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10238 const OMPDeclareMapperDecl *D) { 10239 auto I = UDMMap.find(D); 10240 if (I != UDMMap.end()) 10241 return I->second; 10242 emitUserDefinedMapper(D); 10243 return UDMMap.lookup(D); 10244 } 10245 10246 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10247 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10248 llvm::Value *DeviceID, 10249 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10250 const OMPLoopDirective &D)> 10251 SizeEmitter) { 10252 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10253 const OMPExecutableDirective *TD = &D; 10254 // Get nested teams distribute kind directive, if any. 10255 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10256 TD = getNestedDistributeDirective(CGM.getContext(), D); 10257 if (!TD) 10258 return; 10259 const auto *LD = cast<OMPLoopDirective>(TD); 10260 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10261 PrePostActionTy &) { 10262 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10263 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10264 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10265 CGF.EmitRuntimeCall( 10266 OMPBuilder.getOrCreateRuntimeFunction( 10267 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10268 Args); 10269 } 10270 }; 10271 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10272 } 10273 10274 void CGOpenMPRuntime::emitTargetCall( 10275 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10276 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10277 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10278 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10279 const OMPLoopDirective &D)> 10280 SizeEmitter) { 10281 if (!CGF.HaveInsertPoint()) 10282 return; 10283 10284 assert(OutlinedFn && "Invalid outlined function!"); 10285 10286 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10287 D.hasClausesOfKind<OMPNowaitClause>(); 10288 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10289 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10290 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10291 PrePostActionTy &) { 10292 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10293 }; 10294 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10295 10296 CodeGenFunction::OMPTargetDataInfo InputInfo; 10297 llvm::Value *MapTypesArray = nullptr; 10298 llvm::Value *MapNamesArray = nullptr; 10299 // Fill up the pointer arrays and transfer execution to the device. 10300 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10301 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10302 &CapturedVars, 10303 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10304 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10305 // Reverse offloading is not supported, so just execute on the host. 10306 if (RequiresOuterTask) { 10307 CapturedVars.clear(); 10308 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10309 } 10310 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10311 return; 10312 } 10313 10314 // On top of the arrays that were filled up, the target offloading call 10315 // takes as arguments the device id as well as the host pointer. The host 10316 // pointer is used by the runtime library to identify the current target 10317 // region, so it only has to be unique and not necessarily point to 10318 // anything. It could be the pointer to the outlined function that 10319 // implements the target region, but we aren't using that so that the 10320 // compiler doesn't need to keep that, and could therefore inline the host 10321 // function if proven worthwhile during optimization. 10322 10323 // From this point on, we need to have an ID of the target region defined. 10324 assert(OutlinedFnID && "Invalid outlined function ID!"); 10325 10326 // Emit device ID if any. 10327 llvm::Value *DeviceID; 10328 if (Device.getPointer()) { 10329 assert((Device.getInt() == OMPC_DEVICE_unknown || 10330 Device.getInt() == OMPC_DEVICE_device_num) && 10331 "Expected device_num modifier."); 10332 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10333 DeviceID = 10334 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10335 } else { 10336 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10337 } 10338 10339 // Emit the number of elements in the offloading arrays. 10340 llvm::Value *PointerNum = 10341 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10342 10343 // Return value of the runtime offloading call. 10344 llvm::Value *Return; 10345 10346 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10347 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10348 10349 // Source location for the ident struct 10350 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10351 10352 // Emit tripcount for the target loop-based directive. 10353 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10354 10355 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10356 // The target region is an outlined function launched by the runtime 10357 // via calls __tgt_target() or __tgt_target_teams(). 10358 // 10359 // __tgt_target() launches a target region with one team and one thread, 10360 // executing a serial region. This master thread may in turn launch 10361 // more threads within its team upon encountering a parallel region, 10362 // however, no additional teams can be launched on the device. 10363 // 10364 // __tgt_target_teams() launches a target region with one or more teams, 10365 // each with one or more threads. This call is required for target 10366 // constructs such as: 10367 // 'target teams' 10368 // 'target' / 'teams' 10369 // 'target teams distribute parallel for' 10370 // 'target parallel' 10371 // and so on. 10372 // 10373 // Note that on the host and CPU targets, the runtime implementation of 10374 // these calls simply call the outlined function without forking threads. 10375 // The outlined functions themselves have runtime calls to 10376 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10377 // the compiler in emitTeamsCall() and emitParallelCall(). 10378 // 10379 // In contrast, on the NVPTX target, the implementation of 10380 // __tgt_target_teams() launches a GPU kernel with the requested number 10381 // of teams and threads so no additional calls to the runtime are required. 10382 if (NumTeams) { 10383 // If we have NumTeams defined this means that we have an enclosed teams 10384 // region. Therefore we also expect to have NumThreads defined. These two 10385 // values should be defined in the presence of a teams directive, 10386 // regardless of having any clauses associated. If the user is using teams 10387 // but no clauses, these two values will be the default that should be 10388 // passed to the runtime library - a 32-bit integer with the value zero. 10389 assert(NumThreads && "Thread limit expression should be available along " 10390 "with number of teams."); 10391 SmallVector<llvm::Value *> OffloadingArgs = { 10392 RTLoc, 10393 DeviceID, 10394 OutlinedFnID, 10395 PointerNum, 10396 InputInfo.BasePointersArray.getPointer(), 10397 InputInfo.PointersArray.getPointer(), 10398 InputInfo.SizesArray.getPointer(), 10399 MapTypesArray, 10400 MapNamesArray, 10401 InputInfo.MappersArray.getPointer(), 10402 NumTeams, 10403 NumThreads}; 10404 if (HasNowait) { 10405 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10406 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10407 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10408 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10409 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10410 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10411 } 10412 Return = CGF.EmitRuntimeCall( 10413 OMPBuilder.getOrCreateRuntimeFunction( 10414 CGM.getModule(), HasNowait 10415 ? OMPRTL___tgt_target_teams_nowait_mapper 10416 : OMPRTL___tgt_target_teams_mapper), 10417 OffloadingArgs); 10418 } else { 10419 SmallVector<llvm::Value *> OffloadingArgs = { 10420 RTLoc, 10421 DeviceID, 10422 OutlinedFnID, 10423 PointerNum, 10424 InputInfo.BasePointersArray.getPointer(), 10425 InputInfo.PointersArray.getPointer(), 10426 InputInfo.SizesArray.getPointer(), 10427 MapTypesArray, 10428 MapNamesArray, 10429 InputInfo.MappersArray.getPointer()}; 10430 if (HasNowait) { 10431 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10432 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10433 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10434 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10435 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10436 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10437 } 10438 Return = CGF.EmitRuntimeCall( 10439 OMPBuilder.getOrCreateRuntimeFunction( 10440 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10441 : OMPRTL___tgt_target_mapper), 10442 OffloadingArgs); 10443 } 10444 10445 // Check the error code and execute the host version if required. 10446 llvm::BasicBlock *OffloadFailedBlock = 10447 CGF.createBasicBlock("omp_offload.failed"); 10448 llvm::BasicBlock *OffloadContBlock = 10449 CGF.createBasicBlock("omp_offload.cont"); 10450 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10451 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10452 10453 CGF.EmitBlock(OffloadFailedBlock); 10454 if (RequiresOuterTask) { 10455 CapturedVars.clear(); 10456 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10457 } 10458 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10459 CGF.EmitBranch(OffloadContBlock); 10460 10461 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10462 }; 10463 10464 // Notify that the host version must be executed. 10465 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10466 RequiresOuterTask](CodeGenFunction &CGF, 10467 PrePostActionTy &) { 10468 if (RequiresOuterTask) { 10469 CapturedVars.clear(); 10470 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10471 } 10472 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10473 }; 10474 10475 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10476 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10477 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10478 // Fill up the arrays with all the captured variables. 10479 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10480 10481 // Get mappable expression information. 10482 MappableExprsHandler MEHandler(D, CGF); 10483 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10484 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10485 10486 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10487 auto *CV = CapturedVars.begin(); 10488 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10489 CE = CS.capture_end(); 10490 CI != CE; ++CI, ++RI, ++CV) { 10491 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10492 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10493 10494 // VLA sizes are passed to the outlined region by copy and do not have map 10495 // information associated. 10496 if (CI->capturesVariableArrayType()) { 10497 CurInfo.Exprs.push_back(nullptr); 10498 CurInfo.BasePointers.push_back(*CV); 10499 CurInfo.Pointers.push_back(*CV); 10500 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10501 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10502 // Copy to the device as an argument. No need to retrieve it. 10503 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10504 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10505 MappableExprsHandler::OMP_MAP_IMPLICIT); 10506 CurInfo.Mappers.push_back(nullptr); 10507 } else { 10508 // If we have any information in the map clause, we use it, otherwise we 10509 // just do a default mapping. 10510 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10511 if (!CI->capturesThis()) 10512 MappedVarSet.insert(CI->getCapturedVar()); 10513 else 10514 MappedVarSet.insert(nullptr); 10515 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10516 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10517 // Generate correct mapping for variables captured by reference in 10518 // lambdas. 10519 if (CI->capturesVariable()) 10520 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10521 CurInfo, LambdaPointers); 10522 } 10523 // We expect to have at least an element of information for this capture. 10524 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10525 "Non-existing map pointer for capture!"); 10526 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10527 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10528 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10529 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10530 "Inconsistent map information sizes!"); 10531 10532 // If there is an entry in PartialStruct it means we have a struct with 10533 // individual members mapped. Emit an extra combined entry. 10534 if (PartialStruct.Base.isValid()) { 10535 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10536 MEHandler.emitCombinedEntry( 10537 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10538 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10539 } 10540 10541 // We need to append the results of this capture to what we already have. 10542 CombinedInfo.append(CurInfo); 10543 } 10544 // Adjust MEMBER_OF flags for the lambdas captures. 10545 MEHandler.adjustMemberOfForLambdaCaptures( 10546 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10547 CombinedInfo.Types); 10548 // Map any list items in a map clause that were not captures because they 10549 // weren't referenced within the construct. 10550 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10551 10552 TargetDataInfo Info; 10553 // Fill up the arrays and create the arguments. 10554 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10555 emitOffloadingArraysArgument( 10556 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10557 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10558 {/*ForEndTask=*/false}); 10559 10560 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10561 InputInfo.BasePointersArray = 10562 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10563 InputInfo.PointersArray = 10564 Address(Info.PointersArray, CGM.getPointerAlign()); 10565 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10566 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10567 MapTypesArray = Info.MapTypesArray; 10568 MapNamesArray = Info.MapNamesArray; 10569 if (RequiresOuterTask) 10570 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10571 else 10572 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10573 }; 10574 10575 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10576 CodeGenFunction &CGF, PrePostActionTy &) { 10577 if (RequiresOuterTask) { 10578 CodeGenFunction::OMPTargetDataInfo InputInfo; 10579 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10580 } else { 10581 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10582 } 10583 }; 10584 10585 // If we have a target function ID it means that we need to support 10586 // offloading, otherwise, just execute on the host. We need to execute on host 10587 // regardless of the conditional in the if clause if, e.g., the user do not 10588 // specify target triples. 10589 if (OutlinedFnID) { 10590 if (IfCond) { 10591 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10592 } else { 10593 RegionCodeGenTy ThenRCG(TargetThenGen); 10594 ThenRCG(CGF); 10595 } 10596 } else { 10597 RegionCodeGenTy ElseRCG(TargetElseGen); 10598 ElseRCG(CGF); 10599 } 10600 } 10601 10602 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10603 StringRef ParentName) { 10604 if (!S) 10605 return; 10606 10607 // Codegen OMP target directives that offload compute to the device. 10608 bool RequiresDeviceCodegen = 10609 isa<OMPExecutableDirective>(S) && 10610 isOpenMPTargetExecutionDirective( 10611 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10612 10613 if (RequiresDeviceCodegen) { 10614 const auto &E = *cast<OMPExecutableDirective>(S); 10615 unsigned DeviceID; 10616 unsigned FileID; 10617 unsigned Line; 10618 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10619 FileID, Line); 10620 10621 // Is this a target region that should not be emitted as an entry point? If 10622 // so just signal we are done with this target region. 10623 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10624 ParentName, Line)) 10625 return; 10626 10627 switch (E.getDirectiveKind()) { 10628 case OMPD_target: 10629 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10630 cast<OMPTargetDirective>(E)); 10631 break; 10632 case OMPD_target_parallel: 10633 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10634 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10635 break; 10636 case OMPD_target_teams: 10637 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10638 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10639 break; 10640 case OMPD_target_teams_distribute: 10641 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10642 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10643 break; 10644 case OMPD_target_teams_distribute_simd: 10645 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10646 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10647 break; 10648 case OMPD_target_parallel_for: 10649 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10650 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10651 break; 10652 case OMPD_target_parallel_for_simd: 10653 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10654 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10655 break; 10656 case OMPD_target_simd: 10657 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10658 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10659 break; 10660 case OMPD_target_teams_distribute_parallel_for: 10661 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10662 CGM, ParentName, 10663 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10664 break; 10665 case OMPD_target_teams_distribute_parallel_for_simd: 10666 CodeGenFunction:: 10667 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10668 CGM, ParentName, 10669 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10670 break; 10671 case OMPD_parallel: 10672 case OMPD_for: 10673 case OMPD_parallel_for: 10674 case OMPD_parallel_master: 10675 case OMPD_parallel_sections: 10676 case OMPD_for_simd: 10677 case OMPD_parallel_for_simd: 10678 case OMPD_cancel: 10679 case OMPD_cancellation_point: 10680 case OMPD_ordered: 10681 case OMPD_threadprivate: 10682 case OMPD_allocate: 10683 case OMPD_task: 10684 case OMPD_simd: 10685 case OMPD_tile: 10686 case OMPD_unroll: 10687 case OMPD_sections: 10688 case OMPD_section: 10689 case OMPD_single: 10690 case OMPD_master: 10691 case OMPD_critical: 10692 case OMPD_taskyield: 10693 case OMPD_barrier: 10694 case OMPD_taskwait: 10695 case OMPD_taskgroup: 10696 case OMPD_atomic: 10697 case OMPD_flush: 10698 case OMPD_depobj: 10699 case OMPD_scan: 10700 case OMPD_teams: 10701 case OMPD_target_data: 10702 case OMPD_target_exit_data: 10703 case OMPD_target_enter_data: 10704 case OMPD_distribute: 10705 case OMPD_distribute_simd: 10706 case OMPD_distribute_parallel_for: 10707 case OMPD_distribute_parallel_for_simd: 10708 case OMPD_teams_distribute: 10709 case OMPD_teams_distribute_simd: 10710 case OMPD_teams_distribute_parallel_for: 10711 case OMPD_teams_distribute_parallel_for_simd: 10712 case OMPD_target_update: 10713 case OMPD_declare_simd: 10714 case OMPD_declare_variant: 10715 case OMPD_begin_declare_variant: 10716 case OMPD_end_declare_variant: 10717 case OMPD_declare_target: 10718 case OMPD_end_declare_target: 10719 case OMPD_declare_reduction: 10720 case OMPD_declare_mapper: 10721 case OMPD_taskloop: 10722 case OMPD_taskloop_simd: 10723 case OMPD_master_taskloop: 10724 case OMPD_master_taskloop_simd: 10725 case OMPD_parallel_master_taskloop: 10726 case OMPD_parallel_master_taskloop_simd: 10727 case OMPD_requires: 10728 case OMPD_metadirective: 10729 case OMPD_unknown: 10730 default: 10731 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10732 } 10733 return; 10734 } 10735 10736 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10737 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10738 return; 10739 10740 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10741 return; 10742 } 10743 10744 // If this is a lambda function, look into its body. 10745 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10746 S = L->getBody(); 10747 10748 // Keep looking for target regions recursively. 10749 for (const Stmt *II : S->children()) 10750 scanForTargetRegionsFunctions(II, ParentName); 10751 } 10752 10753 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10754 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10755 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10756 if (!DevTy) 10757 return false; 10758 // Do not emit device_type(nohost) functions for the host. 10759 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10760 return true; 10761 // Do not emit device_type(host) functions for the device. 10762 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10763 return true; 10764 return false; 10765 } 10766 10767 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10768 // If emitting code for the host, we do not process FD here. Instead we do 10769 // the normal code generation. 10770 if (!CGM.getLangOpts().OpenMPIsDevice) { 10771 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10772 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10773 CGM.getLangOpts().OpenMPIsDevice)) 10774 return true; 10775 return false; 10776 } 10777 10778 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10779 // Try to detect target regions in the function. 10780 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10781 StringRef Name = CGM.getMangledName(GD); 10782 scanForTargetRegionsFunctions(FD->getBody(), Name); 10783 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10784 CGM.getLangOpts().OpenMPIsDevice)) 10785 return true; 10786 } 10787 10788 // Do not to emit function if it is not marked as declare target. 10789 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10790 AlreadyEmittedTargetDecls.count(VD) == 0; 10791 } 10792 10793 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10794 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10795 CGM.getLangOpts().OpenMPIsDevice)) 10796 return true; 10797 10798 if (!CGM.getLangOpts().OpenMPIsDevice) 10799 return false; 10800 10801 // Check if there are Ctors/Dtors in this declaration and look for target 10802 // regions in it. We use the complete variant to produce the kernel name 10803 // mangling. 10804 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10805 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10806 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10807 StringRef ParentName = 10808 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10809 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10810 } 10811 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10812 StringRef ParentName = 10813 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10814 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10815 } 10816 } 10817 10818 // Do not to emit variable if it is not marked as declare target. 10819 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10820 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10821 cast<VarDecl>(GD.getDecl())); 10822 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10823 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10824 HasRequiresUnifiedSharedMemory)) { 10825 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10826 return true; 10827 } 10828 return false; 10829 } 10830 10831 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10832 llvm::Constant *Addr) { 10833 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10834 !CGM.getLangOpts().OpenMPIsDevice) 10835 return; 10836 10837 // If we have host/nohost variables, they do not need to be registered. 10838 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10839 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10840 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10841 return; 10842 10843 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10844 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10845 if (!Res) { 10846 if (CGM.getLangOpts().OpenMPIsDevice) { 10847 // Register non-target variables being emitted in device code (debug info 10848 // may cause this). 10849 StringRef VarName = CGM.getMangledName(VD); 10850 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10851 } 10852 return; 10853 } 10854 // Register declare target variables. 10855 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10856 StringRef VarName; 10857 CharUnits VarSize; 10858 llvm::GlobalValue::LinkageTypes Linkage; 10859 10860 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10861 !HasRequiresUnifiedSharedMemory) { 10862 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10863 VarName = CGM.getMangledName(VD); 10864 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10865 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10866 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10867 } else { 10868 VarSize = CharUnits::Zero(); 10869 } 10870 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10871 // Temp solution to prevent optimizations of the internal variables. 10872 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10873 // Do not create a "ref-variable" if the original is not also available 10874 // on the host. 10875 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10876 return; 10877 std::string RefName = getName({VarName, "ref"}); 10878 if (!CGM.GetGlobalValue(RefName)) { 10879 llvm::Constant *AddrRef = 10880 getOrCreateInternalVariable(Addr->getType(), RefName); 10881 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10882 GVAddrRef->setConstant(/*Val=*/true); 10883 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10884 GVAddrRef->setInitializer(Addr); 10885 CGM.addCompilerUsedGlobal(GVAddrRef); 10886 } 10887 } 10888 } else { 10889 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10890 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10891 HasRequiresUnifiedSharedMemory)) && 10892 "Declare target attribute must link or to with unified memory."); 10893 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10894 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10895 else 10896 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10897 10898 if (CGM.getLangOpts().OpenMPIsDevice) { 10899 VarName = Addr->getName(); 10900 Addr = nullptr; 10901 } else { 10902 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10903 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10904 } 10905 VarSize = CGM.getPointerSize(); 10906 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10907 } 10908 10909 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10910 VarName, Addr, VarSize, Flags, Linkage); 10911 } 10912 10913 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10914 if (isa<FunctionDecl>(GD.getDecl()) || 10915 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10916 return emitTargetFunctions(GD); 10917 10918 return emitTargetGlobalVariable(GD); 10919 } 10920 10921 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10922 for (const VarDecl *VD : DeferredGlobalVariables) { 10923 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10924 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10925 if (!Res) 10926 continue; 10927 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10928 !HasRequiresUnifiedSharedMemory) { 10929 CGM.EmitGlobal(VD); 10930 } else { 10931 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10932 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10933 HasRequiresUnifiedSharedMemory)) && 10934 "Expected link clause or to clause with unified memory."); 10935 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10936 } 10937 } 10938 } 10939 10940 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10941 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10942 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10943 " Expected target-based directive."); 10944 } 10945 10946 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10947 for (const OMPClause *Clause : D->clauselists()) { 10948 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10949 HasRequiresUnifiedSharedMemory = true; 10950 } else if (const auto *AC = 10951 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10952 switch (AC->getAtomicDefaultMemOrderKind()) { 10953 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10954 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10955 break; 10956 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10957 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10958 break; 10959 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10960 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10961 break; 10962 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10963 break; 10964 } 10965 } 10966 } 10967 } 10968 10969 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10970 return RequiresAtomicOrdering; 10971 } 10972 10973 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10974 LangAS &AS) { 10975 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10976 return false; 10977 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10978 switch(A->getAllocatorType()) { 10979 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10980 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10981 // Not supported, fallback to the default mem space. 10982 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10983 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10984 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10985 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10986 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10987 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10988 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10989 AS = LangAS::Default; 10990 return true; 10991 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10992 llvm_unreachable("Expected predefined allocator for the variables with the " 10993 "static storage."); 10994 } 10995 return false; 10996 } 10997 10998 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10999 return HasRequiresUnifiedSharedMemory; 11000 } 11001 11002 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11003 CodeGenModule &CGM) 11004 : CGM(CGM) { 11005 if (CGM.getLangOpts().OpenMPIsDevice) { 11006 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11007 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11008 } 11009 } 11010 11011 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11012 if (CGM.getLangOpts().OpenMPIsDevice) 11013 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11014 } 11015 11016 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11017 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11018 return true; 11019 11020 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11021 // Do not to emit function if it is marked as declare target as it was already 11022 // emitted. 11023 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11024 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11025 if (auto *F = dyn_cast_or_null<llvm::Function>( 11026 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11027 return !F->isDeclaration(); 11028 return false; 11029 } 11030 return true; 11031 } 11032 11033 return !AlreadyEmittedTargetDecls.insert(D).second; 11034 } 11035 11036 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11037 // If we don't have entries or if we are emitting code for the device, we 11038 // don't need to do anything. 11039 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11040 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11041 (OffloadEntriesInfoManager.empty() && 11042 !HasEmittedDeclareTargetRegion && 11043 !HasEmittedTargetRegion)) 11044 return nullptr; 11045 11046 // Create and register the function that handles the requires directives. 11047 ASTContext &C = CGM.getContext(); 11048 11049 llvm::Function *RequiresRegFn; 11050 { 11051 CodeGenFunction CGF(CGM); 11052 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11053 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11054 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11055 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11056 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11057 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11058 // TODO: check for other requires clauses. 11059 // The requires directive takes effect only when a target region is 11060 // present in the compilation unit. Otherwise it is ignored and not 11061 // passed to the runtime. This avoids the runtime from throwing an error 11062 // for mismatching requires clauses across compilation units that don't 11063 // contain at least 1 target region. 11064 assert((HasEmittedTargetRegion || 11065 HasEmittedDeclareTargetRegion || 11066 !OffloadEntriesInfoManager.empty()) && 11067 "Target or declare target region expected."); 11068 if (HasRequiresUnifiedSharedMemory) 11069 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11070 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11071 CGM.getModule(), OMPRTL___tgt_register_requires), 11072 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11073 CGF.FinishFunction(); 11074 } 11075 return RequiresRegFn; 11076 } 11077 11078 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11079 const OMPExecutableDirective &D, 11080 SourceLocation Loc, 11081 llvm::Function *OutlinedFn, 11082 ArrayRef<llvm::Value *> CapturedVars) { 11083 if (!CGF.HaveInsertPoint()) 11084 return; 11085 11086 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11087 CodeGenFunction::RunCleanupsScope Scope(CGF); 11088 11089 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11090 llvm::Value *Args[] = { 11091 RTLoc, 11092 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11093 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11094 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11095 RealArgs.append(std::begin(Args), std::end(Args)); 11096 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11097 11098 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11099 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11100 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11101 } 11102 11103 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11104 const Expr *NumTeams, 11105 const Expr *ThreadLimit, 11106 SourceLocation Loc) { 11107 if (!CGF.HaveInsertPoint()) 11108 return; 11109 11110 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11111 11112 llvm::Value *NumTeamsVal = 11113 NumTeams 11114 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11115 CGF.CGM.Int32Ty, /* isSigned = */ true) 11116 : CGF.Builder.getInt32(0); 11117 11118 llvm::Value *ThreadLimitVal = 11119 ThreadLimit 11120 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11121 CGF.CGM.Int32Ty, /* isSigned = */ true) 11122 : CGF.Builder.getInt32(0); 11123 11124 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11125 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11126 ThreadLimitVal}; 11127 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11128 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11129 PushNumTeamsArgs); 11130 } 11131 11132 void CGOpenMPRuntime::emitTargetDataCalls( 11133 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11134 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11135 if (!CGF.HaveInsertPoint()) 11136 return; 11137 11138 // Action used to replace the default codegen action and turn privatization 11139 // off. 11140 PrePostActionTy NoPrivAction; 11141 11142 // Generate the code for the opening of the data environment. Capture all the 11143 // arguments of the runtime call by reference because they are used in the 11144 // closing of the region. 11145 auto &&BeginThenGen = [this, &D, Device, &Info, 11146 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11147 // Fill up the arrays with all the mapped variables. 11148 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11149 11150 // Get map clause information. 11151 MappableExprsHandler MEHandler(D, CGF); 11152 MEHandler.generateAllInfo(CombinedInfo); 11153 11154 // Fill up the arrays and create the arguments. 11155 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11156 /*IsNonContiguous=*/true); 11157 11158 llvm::Value *BasePointersArrayArg = nullptr; 11159 llvm::Value *PointersArrayArg = nullptr; 11160 llvm::Value *SizesArrayArg = nullptr; 11161 llvm::Value *MapTypesArrayArg = nullptr; 11162 llvm::Value *MapNamesArrayArg = nullptr; 11163 llvm::Value *MappersArrayArg = nullptr; 11164 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11165 SizesArrayArg, MapTypesArrayArg, 11166 MapNamesArrayArg, MappersArrayArg, Info); 11167 11168 // Emit device ID if any. 11169 llvm::Value *DeviceID = nullptr; 11170 if (Device) { 11171 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11172 CGF.Int64Ty, /*isSigned=*/true); 11173 } else { 11174 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11175 } 11176 11177 // Emit the number of elements in the offloading arrays. 11178 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11179 // 11180 // Source location for the ident struct 11181 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11182 11183 llvm::Value *OffloadingArgs[] = {RTLoc, 11184 DeviceID, 11185 PointerNum, 11186 BasePointersArrayArg, 11187 PointersArrayArg, 11188 SizesArrayArg, 11189 MapTypesArrayArg, 11190 MapNamesArrayArg, 11191 MappersArrayArg}; 11192 CGF.EmitRuntimeCall( 11193 OMPBuilder.getOrCreateRuntimeFunction( 11194 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11195 OffloadingArgs); 11196 11197 // If device pointer privatization is required, emit the body of the region 11198 // here. It will have to be duplicated: with and without privatization. 11199 if (!Info.CaptureDeviceAddrMap.empty()) 11200 CodeGen(CGF); 11201 }; 11202 11203 // Generate code for the closing of the data region. 11204 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11205 PrePostActionTy &) { 11206 assert(Info.isValid() && "Invalid data environment closing arguments."); 11207 11208 llvm::Value *BasePointersArrayArg = nullptr; 11209 llvm::Value *PointersArrayArg = nullptr; 11210 llvm::Value *SizesArrayArg = nullptr; 11211 llvm::Value *MapTypesArrayArg = nullptr; 11212 llvm::Value *MapNamesArrayArg = nullptr; 11213 llvm::Value *MappersArrayArg = nullptr; 11214 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11215 SizesArrayArg, MapTypesArrayArg, 11216 MapNamesArrayArg, MappersArrayArg, Info, 11217 {/*ForEndCall=*/true}); 11218 11219 // Emit device ID if any. 11220 llvm::Value *DeviceID = nullptr; 11221 if (Device) { 11222 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11223 CGF.Int64Ty, /*isSigned=*/true); 11224 } else { 11225 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11226 } 11227 11228 // Emit the number of elements in the offloading arrays. 11229 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11230 11231 // Source location for the ident struct 11232 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11233 11234 llvm::Value *OffloadingArgs[] = {RTLoc, 11235 DeviceID, 11236 PointerNum, 11237 BasePointersArrayArg, 11238 PointersArrayArg, 11239 SizesArrayArg, 11240 MapTypesArrayArg, 11241 MapNamesArrayArg, 11242 MappersArrayArg}; 11243 CGF.EmitRuntimeCall( 11244 OMPBuilder.getOrCreateRuntimeFunction( 11245 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11246 OffloadingArgs); 11247 }; 11248 11249 // If we need device pointer privatization, we need to emit the body of the 11250 // region with no privatization in the 'else' branch of the conditional. 11251 // Otherwise, we don't have to do anything. 11252 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11253 PrePostActionTy &) { 11254 if (!Info.CaptureDeviceAddrMap.empty()) { 11255 CodeGen.setAction(NoPrivAction); 11256 CodeGen(CGF); 11257 } 11258 }; 11259 11260 // We don't have to do anything to close the region if the if clause evaluates 11261 // to false. 11262 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11263 11264 if (IfCond) { 11265 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11266 } else { 11267 RegionCodeGenTy RCG(BeginThenGen); 11268 RCG(CGF); 11269 } 11270 11271 // If we don't require privatization of device pointers, we emit the body in 11272 // between the runtime calls. This avoids duplicating the body code. 11273 if (Info.CaptureDeviceAddrMap.empty()) { 11274 CodeGen.setAction(NoPrivAction); 11275 CodeGen(CGF); 11276 } 11277 11278 if (IfCond) { 11279 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11280 } else { 11281 RegionCodeGenTy RCG(EndThenGen); 11282 RCG(CGF); 11283 } 11284 } 11285 11286 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11287 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11288 const Expr *Device) { 11289 if (!CGF.HaveInsertPoint()) 11290 return; 11291 11292 assert((isa<OMPTargetEnterDataDirective>(D) || 11293 isa<OMPTargetExitDataDirective>(D) || 11294 isa<OMPTargetUpdateDirective>(D)) && 11295 "Expecting either target enter, exit data, or update directives."); 11296 11297 CodeGenFunction::OMPTargetDataInfo InputInfo; 11298 llvm::Value *MapTypesArray = nullptr; 11299 llvm::Value *MapNamesArray = nullptr; 11300 // Generate the code for the opening of the data environment. 11301 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11302 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11303 // Emit device ID if any. 11304 llvm::Value *DeviceID = nullptr; 11305 if (Device) { 11306 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11307 CGF.Int64Ty, /*isSigned=*/true); 11308 } else { 11309 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11310 } 11311 11312 // Emit the number of elements in the offloading arrays. 11313 llvm::Constant *PointerNum = 11314 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11315 11316 // Source location for the ident struct 11317 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11318 11319 llvm::Value *OffloadingArgs[] = {RTLoc, 11320 DeviceID, 11321 PointerNum, 11322 InputInfo.BasePointersArray.getPointer(), 11323 InputInfo.PointersArray.getPointer(), 11324 InputInfo.SizesArray.getPointer(), 11325 MapTypesArray, 11326 MapNamesArray, 11327 InputInfo.MappersArray.getPointer()}; 11328 11329 // Select the right runtime function call for each standalone 11330 // directive. 11331 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11332 RuntimeFunction RTLFn; 11333 switch (D.getDirectiveKind()) { 11334 case OMPD_target_enter_data: 11335 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11336 : OMPRTL___tgt_target_data_begin_mapper; 11337 break; 11338 case OMPD_target_exit_data: 11339 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11340 : OMPRTL___tgt_target_data_end_mapper; 11341 break; 11342 case OMPD_target_update: 11343 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11344 : OMPRTL___tgt_target_data_update_mapper; 11345 break; 11346 case OMPD_parallel: 11347 case OMPD_for: 11348 case OMPD_parallel_for: 11349 case OMPD_parallel_master: 11350 case OMPD_parallel_sections: 11351 case OMPD_for_simd: 11352 case OMPD_parallel_for_simd: 11353 case OMPD_cancel: 11354 case OMPD_cancellation_point: 11355 case OMPD_ordered: 11356 case OMPD_threadprivate: 11357 case OMPD_allocate: 11358 case OMPD_task: 11359 case OMPD_simd: 11360 case OMPD_tile: 11361 case OMPD_unroll: 11362 case OMPD_sections: 11363 case OMPD_section: 11364 case OMPD_single: 11365 case OMPD_master: 11366 case OMPD_critical: 11367 case OMPD_taskyield: 11368 case OMPD_barrier: 11369 case OMPD_taskwait: 11370 case OMPD_taskgroup: 11371 case OMPD_atomic: 11372 case OMPD_flush: 11373 case OMPD_depobj: 11374 case OMPD_scan: 11375 case OMPD_teams: 11376 case OMPD_target_data: 11377 case OMPD_distribute: 11378 case OMPD_distribute_simd: 11379 case OMPD_distribute_parallel_for: 11380 case OMPD_distribute_parallel_for_simd: 11381 case OMPD_teams_distribute: 11382 case OMPD_teams_distribute_simd: 11383 case OMPD_teams_distribute_parallel_for: 11384 case OMPD_teams_distribute_parallel_for_simd: 11385 case OMPD_declare_simd: 11386 case OMPD_declare_variant: 11387 case OMPD_begin_declare_variant: 11388 case OMPD_end_declare_variant: 11389 case OMPD_declare_target: 11390 case OMPD_end_declare_target: 11391 case OMPD_declare_reduction: 11392 case OMPD_declare_mapper: 11393 case OMPD_taskloop: 11394 case OMPD_taskloop_simd: 11395 case OMPD_master_taskloop: 11396 case OMPD_master_taskloop_simd: 11397 case OMPD_parallel_master_taskloop: 11398 case OMPD_parallel_master_taskloop_simd: 11399 case OMPD_target: 11400 case OMPD_target_simd: 11401 case OMPD_target_teams_distribute: 11402 case OMPD_target_teams_distribute_simd: 11403 case OMPD_target_teams_distribute_parallel_for: 11404 case OMPD_target_teams_distribute_parallel_for_simd: 11405 case OMPD_target_teams: 11406 case OMPD_target_parallel: 11407 case OMPD_target_parallel_for: 11408 case OMPD_target_parallel_for_simd: 11409 case OMPD_requires: 11410 case OMPD_metadirective: 11411 case OMPD_unknown: 11412 default: 11413 llvm_unreachable("Unexpected standalone target data directive."); 11414 break; 11415 } 11416 CGF.EmitRuntimeCall( 11417 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11418 OffloadingArgs); 11419 }; 11420 11421 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11422 &MapNamesArray](CodeGenFunction &CGF, 11423 PrePostActionTy &) { 11424 // Fill up the arrays with all the mapped variables. 11425 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11426 11427 // Get map clause information. 11428 MappableExprsHandler MEHandler(D, CGF); 11429 MEHandler.generateAllInfo(CombinedInfo); 11430 11431 TargetDataInfo Info; 11432 // Fill up the arrays and create the arguments. 11433 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11434 /*IsNonContiguous=*/true); 11435 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11436 D.hasClausesOfKind<OMPNowaitClause>(); 11437 emitOffloadingArraysArgument( 11438 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11439 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11440 {/*ForEndTask=*/false}); 11441 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11442 InputInfo.BasePointersArray = 11443 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11444 InputInfo.PointersArray = 11445 Address(Info.PointersArray, CGM.getPointerAlign()); 11446 InputInfo.SizesArray = 11447 Address(Info.SizesArray, CGM.getPointerAlign()); 11448 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11449 MapTypesArray = Info.MapTypesArray; 11450 MapNamesArray = Info.MapNamesArray; 11451 if (RequiresOuterTask) 11452 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11453 else 11454 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11455 }; 11456 11457 if (IfCond) { 11458 emitIfClause(CGF, IfCond, TargetThenGen, 11459 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11460 } else { 11461 RegionCodeGenTy ThenRCG(TargetThenGen); 11462 ThenRCG(CGF); 11463 } 11464 } 11465 11466 namespace { 11467 /// Kind of parameter in a function with 'declare simd' directive. 11468 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11469 /// Attribute set of the parameter. 11470 struct ParamAttrTy { 11471 ParamKindTy Kind = Vector; 11472 llvm::APSInt StrideOrArg; 11473 llvm::APSInt Alignment; 11474 }; 11475 } // namespace 11476 11477 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11478 ArrayRef<ParamAttrTy> ParamAttrs) { 11479 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11480 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11481 // of that clause. The VLEN value must be power of 2. 11482 // In other case the notion of the function`s "characteristic data type" (CDT) 11483 // is used to compute the vector length. 11484 // CDT is defined in the following order: 11485 // a) For non-void function, the CDT is the return type. 11486 // b) If the function has any non-uniform, non-linear parameters, then the 11487 // CDT is the type of the first such parameter. 11488 // c) If the CDT determined by a) or b) above is struct, union, or class 11489 // type which is pass-by-value (except for the type that maps to the 11490 // built-in complex data type), the characteristic data type is int. 11491 // d) If none of the above three cases is applicable, the CDT is int. 11492 // The VLEN is then determined based on the CDT and the size of vector 11493 // register of that ISA for which current vector version is generated. The 11494 // VLEN is computed using the formula below: 11495 // VLEN = sizeof(vector_register) / sizeof(CDT), 11496 // where vector register size specified in section 3.2.1 Registers and the 11497 // Stack Frame of original AMD64 ABI document. 11498 QualType RetType = FD->getReturnType(); 11499 if (RetType.isNull()) 11500 return 0; 11501 ASTContext &C = FD->getASTContext(); 11502 QualType CDT; 11503 if (!RetType.isNull() && !RetType->isVoidType()) { 11504 CDT = RetType; 11505 } else { 11506 unsigned Offset = 0; 11507 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11508 if (ParamAttrs[Offset].Kind == Vector) 11509 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11510 ++Offset; 11511 } 11512 if (CDT.isNull()) { 11513 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11514 if (ParamAttrs[I + Offset].Kind == Vector) { 11515 CDT = FD->getParamDecl(I)->getType(); 11516 break; 11517 } 11518 } 11519 } 11520 } 11521 if (CDT.isNull()) 11522 CDT = C.IntTy; 11523 CDT = CDT->getCanonicalTypeUnqualified(); 11524 if (CDT->isRecordType() || CDT->isUnionType()) 11525 CDT = C.IntTy; 11526 return C.getTypeSize(CDT); 11527 } 11528 11529 static void 11530 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11531 const llvm::APSInt &VLENVal, 11532 ArrayRef<ParamAttrTy> ParamAttrs, 11533 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11534 struct ISADataTy { 11535 char ISA; 11536 unsigned VecRegSize; 11537 }; 11538 ISADataTy ISAData[] = { 11539 { 11540 'b', 128 11541 }, // SSE 11542 { 11543 'c', 256 11544 }, // AVX 11545 { 11546 'd', 256 11547 }, // AVX2 11548 { 11549 'e', 512 11550 }, // AVX512 11551 }; 11552 llvm::SmallVector<char, 2> Masked; 11553 switch (State) { 11554 case OMPDeclareSimdDeclAttr::BS_Undefined: 11555 Masked.push_back('N'); 11556 Masked.push_back('M'); 11557 break; 11558 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11559 Masked.push_back('N'); 11560 break; 11561 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11562 Masked.push_back('M'); 11563 break; 11564 } 11565 for (char Mask : Masked) { 11566 for (const ISADataTy &Data : ISAData) { 11567 SmallString<256> Buffer; 11568 llvm::raw_svector_ostream Out(Buffer); 11569 Out << "_ZGV" << Data.ISA << Mask; 11570 if (!VLENVal) { 11571 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11572 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11573 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11574 } else { 11575 Out << VLENVal; 11576 } 11577 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11578 switch (ParamAttr.Kind){ 11579 case LinearWithVarStride: 11580 Out << 's' << ParamAttr.StrideOrArg; 11581 break; 11582 case Linear: 11583 Out << 'l'; 11584 if (ParamAttr.StrideOrArg != 1) 11585 Out << ParamAttr.StrideOrArg; 11586 break; 11587 case Uniform: 11588 Out << 'u'; 11589 break; 11590 case Vector: 11591 Out << 'v'; 11592 break; 11593 } 11594 if (!!ParamAttr.Alignment) 11595 Out << 'a' << ParamAttr.Alignment; 11596 } 11597 Out << '_' << Fn->getName(); 11598 Fn->addFnAttr(Out.str()); 11599 } 11600 } 11601 } 11602 11603 // This are the Functions that are needed to mangle the name of the 11604 // vector functions generated by the compiler, according to the rules 11605 // defined in the "Vector Function ABI specifications for AArch64", 11606 // available at 11607 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11608 11609 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11610 /// 11611 /// TODO: Need to implement the behavior for reference marked with a 11612 /// var or no linear modifiers (1.b in the section). For this, we 11613 /// need to extend ParamKindTy to support the linear modifiers. 11614 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11615 QT = QT.getCanonicalType(); 11616 11617 if (QT->isVoidType()) 11618 return false; 11619 11620 if (Kind == ParamKindTy::Uniform) 11621 return false; 11622 11623 if (Kind == ParamKindTy::Linear) 11624 return false; 11625 11626 // TODO: Handle linear references with modifiers 11627 11628 if (Kind == ParamKindTy::LinearWithVarStride) 11629 return false; 11630 11631 return true; 11632 } 11633 11634 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11635 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11636 QT = QT.getCanonicalType(); 11637 unsigned Size = C.getTypeSize(QT); 11638 11639 // Only scalars and complex within 16 bytes wide set PVB to true. 11640 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11641 return false; 11642 11643 if (QT->isFloatingType()) 11644 return true; 11645 11646 if (QT->isIntegerType()) 11647 return true; 11648 11649 if (QT->isPointerType()) 11650 return true; 11651 11652 // TODO: Add support for complex types (section 3.1.2, item 2). 11653 11654 return false; 11655 } 11656 11657 /// Computes the lane size (LS) of a return type or of an input parameter, 11658 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11659 /// TODO: Add support for references, section 3.2.1, item 1. 11660 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11661 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11662 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11663 if (getAArch64PBV(PTy, C)) 11664 return C.getTypeSize(PTy); 11665 } 11666 if (getAArch64PBV(QT, C)) 11667 return C.getTypeSize(QT); 11668 11669 return C.getTypeSize(C.getUIntPtrType()); 11670 } 11671 11672 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11673 // signature of the scalar function, as defined in 3.2.2 of the 11674 // AAVFABI. 11675 static std::tuple<unsigned, unsigned, bool> 11676 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11677 QualType RetType = FD->getReturnType().getCanonicalType(); 11678 11679 ASTContext &C = FD->getASTContext(); 11680 11681 bool OutputBecomesInput = false; 11682 11683 llvm::SmallVector<unsigned, 8> Sizes; 11684 if (!RetType->isVoidType()) { 11685 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11686 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11687 OutputBecomesInput = true; 11688 } 11689 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11690 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11691 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11692 } 11693 11694 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11695 // The LS of a function parameter / return value can only be a power 11696 // of 2, starting from 8 bits, up to 128. 11697 assert(std::all_of(Sizes.begin(), Sizes.end(), 11698 [](unsigned Size) { 11699 return Size == 8 || Size == 16 || Size == 32 || 11700 Size == 64 || Size == 128; 11701 }) && 11702 "Invalid size"); 11703 11704 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11705 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11706 OutputBecomesInput); 11707 } 11708 11709 /// Mangle the parameter part of the vector function name according to 11710 /// their OpenMP classification. The mangling function is defined in 11711 /// section 3.5 of the AAVFABI. 11712 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11713 SmallString<256> Buffer; 11714 llvm::raw_svector_ostream Out(Buffer); 11715 for (const auto &ParamAttr : ParamAttrs) { 11716 switch (ParamAttr.Kind) { 11717 case LinearWithVarStride: 11718 Out << "ls" << ParamAttr.StrideOrArg; 11719 break; 11720 case Linear: 11721 Out << 'l'; 11722 // Don't print the step value if it is not present or if it is 11723 // equal to 1. 11724 if (ParamAttr.StrideOrArg != 1) 11725 Out << ParamAttr.StrideOrArg; 11726 break; 11727 case Uniform: 11728 Out << 'u'; 11729 break; 11730 case Vector: 11731 Out << 'v'; 11732 break; 11733 } 11734 11735 if (!!ParamAttr.Alignment) 11736 Out << 'a' << ParamAttr.Alignment; 11737 } 11738 11739 return std::string(Out.str()); 11740 } 11741 11742 // Function used to add the attribute. The parameter `VLEN` is 11743 // templated to allow the use of "x" when targeting scalable functions 11744 // for SVE. 11745 template <typename T> 11746 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11747 char ISA, StringRef ParSeq, 11748 StringRef MangledName, bool OutputBecomesInput, 11749 llvm::Function *Fn) { 11750 SmallString<256> Buffer; 11751 llvm::raw_svector_ostream Out(Buffer); 11752 Out << Prefix << ISA << LMask << VLEN; 11753 if (OutputBecomesInput) 11754 Out << "v"; 11755 Out << ParSeq << "_" << MangledName; 11756 Fn->addFnAttr(Out.str()); 11757 } 11758 11759 // Helper function to generate the Advanced SIMD names depending on 11760 // the value of the NDS when simdlen is not present. 11761 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11762 StringRef Prefix, char ISA, 11763 StringRef ParSeq, StringRef MangledName, 11764 bool OutputBecomesInput, 11765 llvm::Function *Fn) { 11766 switch (NDS) { 11767 case 8: 11768 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11769 OutputBecomesInput, Fn); 11770 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11771 OutputBecomesInput, Fn); 11772 break; 11773 case 16: 11774 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11775 OutputBecomesInput, Fn); 11776 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11777 OutputBecomesInput, Fn); 11778 break; 11779 case 32: 11780 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11781 OutputBecomesInput, Fn); 11782 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11783 OutputBecomesInput, Fn); 11784 break; 11785 case 64: 11786 case 128: 11787 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11788 OutputBecomesInput, Fn); 11789 break; 11790 default: 11791 llvm_unreachable("Scalar type is too wide."); 11792 } 11793 } 11794 11795 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11796 static void emitAArch64DeclareSimdFunction( 11797 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11798 ArrayRef<ParamAttrTy> ParamAttrs, 11799 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11800 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11801 11802 // Get basic data for building the vector signature. 11803 const auto Data = getNDSWDS(FD, ParamAttrs); 11804 const unsigned NDS = std::get<0>(Data); 11805 const unsigned WDS = std::get<1>(Data); 11806 const bool OutputBecomesInput = std::get<2>(Data); 11807 11808 // Check the values provided via `simdlen` by the user. 11809 // 1. A `simdlen(1)` doesn't produce vector signatures, 11810 if (UserVLEN == 1) { 11811 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11812 DiagnosticsEngine::Warning, 11813 "The clause simdlen(1) has no effect when targeting aarch64."); 11814 CGM.getDiags().Report(SLoc, DiagID); 11815 return; 11816 } 11817 11818 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11819 // Advanced SIMD output. 11820 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11821 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11822 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11823 "power of 2 when targeting Advanced SIMD."); 11824 CGM.getDiags().Report(SLoc, DiagID); 11825 return; 11826 } 11827 11828 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11829 // limits. 11830 if (ISA == 's' && UserVLEN != 0) { 11831 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11832 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11833 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11834 "lanes in the architectural constraints " 11835 "for SVE (min is 128-bit, max is " 11836 "2048-bit, by steps of 128-bit)"); 11837 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11838 return; 11839 } 11840 } 11841 11842 // Sort out parameter sequence. 11843 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11844 StringRef Prefix = "_ZGV"; 11845 // Generate simdlen from user input (if any). 11846 if (UserVLEN) { 11847 if (ISA == 's') { 11848 // SVE generates only a masked function. 11849 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11850 OutputBecomesInput, Fn); 11851 } else { 11852 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11853 // Advanced SIMD generates one or two functions, depending on 11854 // the `[not]inbranch` clause. 11855 switch (State) { 11856 case OMPDeclareSimdDeclAttr::BS_Undefined: 11857 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11858 OutputBecomesInput, Fn); 11859 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11860 OutputBecomesInput, Fn); 11861 break; 11862 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11863 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11864 OutputBecomesInput, Fn); 11865 break; 11866 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11867 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11868 OutputBecomesInput, Fn); 11869 break; 11870 } 11871 } 11872 } else { 11873 // If no user simdlen is provided, follow the AAVFABI rules for 11874 // generating the vector length. 11875 if (ISA == 's') { 11876 // SVE, section 3.4.1, item 1. 11877 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11878 OutputBecomesInput, Fn); 11879 } else { 11880 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11881 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11882 // two vector names depending on the use of the clause 11883 // `[not]inbranch`. 11884 switch (State) { 11885 case OMPDeclareSimdDeclAttr::BS_Undefined: 11886 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11887 OutputBecomesInput, Fn); 11888 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11889 OutputBecomesInput, Fn); 11890 break; 11891 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11892 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11893 OutputBecomesInput, Fn); 11894 break; 11895 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11896 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11897 OutputBecomesInput, Fn); 11898 break; 11899 } 11900 } 11901 } 11902 } 11903 11904 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11905 llvm::Function *Fn) { 11906 ASTContext &C = CGM.getContext(); 11907 FD = FD->getMostRecentDecl(); 11908 // Map params to their positions in function decl. 11909 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11910 if (isa<CXXMethodDecl>(FD)) 11911 ParamPositions.try_emplace(FD, 0); 11912 unsigned ParamPos = ParamPositions.size(); 11913 for (const ParmVarDecl *P : FD->parameters()) { 11914 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11915 ++ParamPos; 11916 } 11917 while (FD) { 11918 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11919 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11920 // Mark uniform parameters. 11921 for (const Expr *E : Attr->uniforms()) { 11922 E = E->IgnoreParenImpCasts(); 11923 unsigned Pos; 11924 if (isa<CXXThisExpr>(E)) { 11925 Pos = ParamPositions[FD]; 11926 } else { 11927 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11928 ->getCanonicalDecl(); 11929 Pos = ParamPositions[PVD]; 11930 } 11931 ParamAttrs[Pos].Kind = Uniform; 11932 } 11933 // Get alignment info. 11934 auto NI = Attr->alignments_begin(); 11935 for (const Expr *E : Attr->aligneds()) { 11936 E = E->IgnoreParenImpCasts(); 11937 unsigned Pos; 11938 QualType ParmTy; 11939 if (isa<CXXThisExpr>(E)) { 11940 Pos = ParamPositions[FD]; 11941 ParmTy = E->getType(); 11942 } else { 11943 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11944 ->getCanonicalDecl(); 11945 Pos = ParamPositions[PVD]; 11946 ParmTy = PVD->getType(); 11947 } 11948 ParamAttrs[Pos].Alignment = 11949 (*NI) 11950 ? (*NI)->EvaluateKnownConstInt(C) 11951 : llvm::APSInt::getUnsigned( 11952 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11953 .getQuantity()); 11954 ++NI; 11955 } 11956 // Mark linear parameters. 11957 auto SI = Attr->steps_begin(); 11958 auto MI = Attr->modifiers_begin(); 11959 for (const Expr *E : Attr->linears()) { 11960 E = E->IgnoreParenImpCasts(); 11961 unsigned Pos; 11962 // Rescaling factor needed to compute the linear parameter 11963 // value in the mangled name. 11964 unsigned PtrRescalingFactor = 1; 11965 if (isa<CXXThisExpr>(E)) { 11966 Pos = ParamPositions[FD]; 11967 } else { 11968 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11969 ->getCanonicalDecl(); 11970 Pos = ParamPositions[PVD]; 11971 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11972 PtrRescalingFactor = CGM.getContext() 11973 .getTypeSizeInChars(P->getPointeeType()) 11974 .getQuantity(); 11975 } 11976 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11977 ParamAttr.Kind = Linear; 11978 // Assuming a stride of 1, for `linear` without modifiers. 11979 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11980 if (*SI) { 11981 Expr::EvalResult Result; 11982 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11983 if (const auto *DRE = 11984 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11985 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11986 ParamAttr.Kind = LinearWithVarStride; 11987 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11988 ParamPositions[StridePVD->getCanonicalDecl()]); 11989 } 11990 } 11991 } else { 11992 ParamAttr.StrideOrArg = Result.Val.getInt(); 11993 } 11994 } 11995 // If we are using a linear clause on a pointer, we need to 11996 // rescale the value of linear_step with the byte size of the 11997 // pointee type. 11998 if (Linear == ParamAttr.Kind) 11999 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12000 ++SI; 12001 ++MI; 12002 } 12003 llvm::APSInt VLENVal; 12004 SourceLocation ExprLoc; 12005 const Expr *VLENExpr = Attr->getSimdlen(); 12006 if (VLENExpr) { 12007 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12008 ExprLoc = VLENExpr->getExprLoc(); 12009 } 12010 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12011 if (CGM.getTriple().isX86()) { 12012 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12013 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12014 unsigned VLEN = VLENVal.getExtValue(); 12015 StringRef MangledName = Fn->getName(); 12016 if (CGM.getTarget().hasFeature("sve")) 12017 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12018 MangledName, 's', 128, Fn, ExprLoc); 12019 if (CGM.getTarget().hasFeature("neon")) 12020 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12021 MangledName, 'n', 128, Fn, ExprLoc); 12022 } 12023 } 12024 FD = FD->getPreviousDecl(); 12025 } 12026 } 12027 12028 namespace { 12029 /// Cleanup action for doacross support. 12030 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12031 public: 12032 static const int DoacrossFinArgs = 2; 12033 12034 private: 12035 llvm::FunctionCallee RTLFn; 12036 llvm::Value *Args[DoacrossFinArgs]; 12037 12038 public: 12039 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12040 ArrayRef<llvm::Value *> CallArgs) 12041 : RTLFn(RTLFn) { 12042 assert(CallArgs.size() == DoacrossFinArgs); 12043 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12044 } 12045 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12046 if (!CGF.HaveInsertPoint()) 12047 return; 12048 CGF.EmitRuntimeCall(RTLFn, Args); 12049 } 12050 }; 12051 } // namespace 12052 12053 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12054 const OMPLoopDirective &D, 12055 ArrayRef<Expr *> NumIterations) { 12056 if (!CGF.HaveInsertPoint()) 12057 return; 12058 12059 ASTContext &C = CGM.getContext(); 12060 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12061 RecordDecl *RD; 12062 if (KmpDimTy.isNull()) { 12063 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12064 // kmp_int64 lo; // lower 12065 // kmp_int64 up; // upper 12066 // kmp_int64 st; // stride 12067 // }; 12068 RD = C.buildImplicitRecord("kmp_dim"); 12069 RD->startDefinition(); 12070 addFieldToRecordDecl(C, RD, Int64Ty); 12071 addFieldToRecordDecl(C, RD, Int64Ty); 12072 addFieldToRecordDecl(C, RD, Int64Ty); 12073 RD->completeDefinition(); 12074 KmpDimTy = C.getRecordType(RD); 12075 } else { 12076 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12077 } 12078 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12079 QualType ArrayTy = 12080 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12081 12082 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12083 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12084 enum { LowerFD = 0, UpperFD, StrideFD }; 12085 // Fill dims with data. 12086 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12087 LValue DimsLVal = CGF.MakeAddrLValue( 12088 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12089 // dims.upper = num_iterations; 12090 LValue UpperLVal = CGF.EmitLValueForField( 12091 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12092 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12093 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12094 Int64Ty, NumIterations[I]->getExprLoc()); 12095 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12096 // dims.stride = 1; 12097 LValue StrideLVal = CGF.EmitLValueForField( 12098 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12099 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12100 StrideLVal); 12101 } 12102 12103 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12104 // kmp_int32 num_dims, struct kmp_dim * dims); 12105 llvm::Value *Args[] = { 12106 emitUpdateLocation(CGF, D.getBeginLoc()), 12107 getThreadID(CGF, D.getBeginLoc()), 12108 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12109 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12110 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12111 CGM.VoidPtrTy)}; 12112 12113 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12114 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12115 CGF.EmitRuntimeCall(RTLFn, Args); 12116 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12117 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12118 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12119 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12120 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12121 llvm::makeArrayRef(FiniArgs)); 12122 } 12123 12124 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12125 const OMPDependClause *C) { 12126 QualType Int64Ty = 12127 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12128 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12129 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12130 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12131 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12132 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12133 const Expr *CounterVal = C->getLoopData(I); 12134 assert(CounterVal); 12135 llvm::Value *CntVal = CGF.EmitScalarConversion( 12136 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12137 CounterVal->getExprLoc()); 12138 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12139 /*Volatile=*/false, Int64Ty); 12140 } 12141 llvm::Value *Args[] = { 12142 emitUpdateLocation(CGF, C->getBeginLoc()), 12143 getThreadID(CGF, C->getBeginLoc()), 12144 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12145 llvm::FunctionCallee RTLFn; 12146 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12147 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12148 OMPRTL___kmpc_doacross_post); 12149 } else { 12150 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12151 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12152 OMPRTL___kmpc_doacross_wait); 12153 } 12154 CGF.EmitRuntimeCall(RTLFn, Args); 12155 } 12156 12157 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12158 llvm::FunctionCallee Callee, 12159 ArrayRef<llvm::Value *> Args) const { 12160 assert(Loc.isValid() && "Outlined function call location must be valid."); 12161 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12162 12163 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12164 if (Fn->doesNotThrow()) { 12165 CGF.EmitNounwindRuntimeCall(Fn, Args); 12166 return; 12167 } 12168 } 12169 CGF.EmitRuntimeCall(Callee, Args); 12170 } 12171 12172 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12173 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12174 ArrayRef<llvm::Value *> Args) const { 12175 emitCall(CGF, Loc, OutlinedFn, Args); 12176 } 12177 12178 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12179 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12180 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12181 HasEmittedDeclareTargetRegion = true; 12182 } 12183 12184 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12185 const VarDecl *NativeParam, 12186 const VarDecl *TargetParam) const { 12187 return CGF.GetAddrOfLocalVar(NativeParam); 12188 } 12189 12190 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12191 const VarDecl *VD) { 12192 if (!VD) 12193 return Address::invalid(); 12194 Address UntiedAddr = Address::invalid(); 12195 Address UntiedRealAddr = Address::invalid(); 12196 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12197 if (It != FunctionToUntiedTaskStackMap.end()) { 12198 const UntiedLocalVarsAddressesMap &UntiedData = 12199 UntiedLocalVarsStack[It->second]; 12200 auto I = UntiedData.find(VD); 12201 if (I != UntiedData.end()) { 12202 UntiedAddr = I->second.first; 12203 UntiedRealAddr = I->second.second; 12204 } 12205 } 12206 const VarDecl *CVD = VD->getCanonicalDecl(); 12207 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12208 // Use the default allocation. 12209 if (!isAllocatableDecl(VD)) 12210 return UntiedAddr; 12211 llvm::Value *Size; 12212 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12213 if (CVD->getType()->isVariablyModifiedType()) { 12214 Size = CGF.getTypeSize(CVD->getType()); 12215 // Align the size: ((size + align - 1) / align) * align 12216 Size = CGF.Builder.CreateNUWAdd( 12217 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12218 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12219 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12220 } else { 12221 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12222 Size = CGM.getSize(Sz.alignTo(Align)); 12223 } 12224 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12225 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12226 assert(AA->getAllocator() && 12227 "Expected allocator expression for non-default allocator."); 12228 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 12229 // According to the standard, the original allocator type is a enum 12230 // (integer). Convert to pointer type, if required. 12231 Allocator = CGF.EmitScalarConversion( 12232 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 12233 AA->getAllocator()->getExprLoc()); 12234 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 12235 12236 llvm::Value *Addr = 12237 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 12238 CGM.getModule(), OMPRTL___kmpc_alloc), 12239 Args, getName({CVD->getName(), ".void.addr"})); 12240 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12241 CGM.getModule(), OMPRTL___kmpc_free); 12242 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12243 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12244 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12245 if (UntiedAddr.isValid()) 12246 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12247 12248 // Cleanup action for allocate support. 12249 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12250 llvm::FunctionCallee RTLFn; 12251 SourceLocation::UIntTy LocEncoding; 12252 Address Addr; 12253 const Expr *Allocator; 12254 12255 public: 12256 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12257 SourceLocation::UIntTy LocEncoding, Address Addr, 12258 const Expr *Allocator) 12259 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12260 Allocator(Allocator) {} 12261 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12262 if (!CGF.HaveInsertPoint()) 12263 return; 12264 llvm::Value *Args[3]; 12265 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12266 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12267 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12268 Addr.getPointer(), CGF.VoidPtrTy); 12269 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 12270 // According to the standard, the original allocator type is a enum 12271 // (integer). Convert to pointer type, if required. 12272 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12273 CGF.getContext().VoidPtrTy, 12274 Allocator->getExprLoc()); 12275 Args[2] = AllocVal; 12276 12277 CGF.EmitRuntimeCall(RTLFn, Args); 12278 } 12279 }; 12280 Address VDAddr = 12281 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12282 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12283 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12284 VDAddr, AA->getAllocator()); 12285 if (UntiedRealAddr.isValid()) 12286 if (auto *Region = 12287 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12288 Region->emitUntiedSwitch(CGF); 12289 return VDAddr; 12290 } 12291 return UntiedAddr; 12292 } 12293 12294 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12295 const VarDecl *VD) const { 12296 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12297 if (It == FunctionToUntiedTaskStackMap.end()) 12298 return false; 12299 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12300 } 12301 12302 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12303 CodeGenModule &CGM, const OMPLoopDirective &S) 12304 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12305 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12306 if (!NeedToPush) 12307 return; 12308 NontemporalDeclsSet &DS = 12309 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12310 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12311 for (const Stmt *Ref : C->private_refs()) { 12312 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12313 const ValueDecl *VD; 12314 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12315 VD = DRE->getDecl(); 12316 } else { 12317 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12318 assert((ME->isImplicitCXXThis() || 12319 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12320 "Expected member of current class."); 12321 VD = ME->getMemberDecl(); 12322 } 12323 DS.insert(VD); 12324 } 12325 } 12326 } 12327 12328 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12329 if (!NeedToPush) 12330 return; 12331 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12332 } 12333 12334 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12335 CodeGenFunction &CGF, 12336 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12337 std::pair<Address, Address>> &LocalVars) 12338 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12339 if (!NeedToPush) 12340 return; 12341 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12342 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12343 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12344 } 12345 12346 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12347 if (!NeedToPush) 12348 return; 12349 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12350 } 12351 12352 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12353 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12354 12355 return llvm::any_of( 12356 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12357 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 12358 } 12359 12360 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12361 const OMPExecutableDirective &S, 12362 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12363 const { 12364 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12365 // Vars in target/task regions must be excluded completely. 12366 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12367 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12368 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12369 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12370 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12371 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12372 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12373 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12374 } 12375 } 12376 // Exclude vars in private clauses. 12377 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12378 for (const Expr *Ref : C->varlists()) { 12379 if (!Ref->getType()->isScalarType()) 12380 continue; 12381 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12382 if (!DRE) 12383 continue; 12384 NeedToCheckForLPCs.insert(DRE->getDecl()); 12385 } 12386 } 12387 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12388 for (const Expr *Ref : C->varlists()) { 12389 if (!Ref->getType()->isScalarType()) 12390 continue; 12391 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12392 if (!DRE) 12393 continue; 12394 NeedToCheckForLPCs.insert(DRE->getDecl()); 12395 } 12396 } 12397 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12398 for (const Expr *Ref : C->varlists()) { 12399 if (!Ref->getType()->isScalarType()) 12400 continue; 12401 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12402 if (!DRE) 12403 continue; 12404 NeedToCheckForLPCs.insert(DRE->getDecl()); 12405 } 12406 } 12407 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12408 for (const Expr *Ref : C->varlists()) { 12409 if (!Ref->getType()->isScalarType()) 12410 continue; 12411 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12412 if (!DRE) 12413 continue; 12414 NeedToCheckForLPCs.insert(DRE->getDecl()); 12415 } 12416 } 12417 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12418 for (const Expr *Ref : C->varlists()) { 12419 if (!Ref->getType()->isScalarType()) 12420 continue; 12421 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12422 if (!DRE) 12423 continue; 12424 NeedToCheckForLPCs.insert(DRE->getDecl()); 12425 } 12426 } 12427 for (const Decl *VD : NeedToCheckForLPCs) { 12428 for (const LastprivateConditionalData &Data : 12429 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12430 if (Data.DeclToUniqueName.count(VD) > 0) { 12431 if (!Data.Disabled) 12432 NeedToAddForLPCsAsDisabled.insert(VD); 12433 break; 12434 } 12435 } 12436 } 12437 } 12438 12439 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12440 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12441 : CGM(CGF.CGM), 12442 Action((CGM.getLangOpts().OpenMP >= 50 && 12443 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12444 [](const OMPLastprivateClause *C) { 12445 return C->getKind() == 12446 OMPC_LASTPRIVATE_conditional; 12447 })) 12448 ? ActionToDo::PushAsLastprivateConditional 12449 : ActionToDo::DoNotPush) { 12450 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12451 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12452 return; 12453 assert(Action == ActionToDo::PushAsLastprivateConditional && 12454 "Expected a push action."); 12455 LastprivateConditionalData &Data = 12456 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12457 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12458 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12459 continue; 12460 12461 for (const Expr *Ref : C->varlists()) { 12462 Data.DeclToUniqueName.insert(std::make_pair( 12463 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12464 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12465 } 12466 } 12467 Data.IVLVal = IVLVal; 12468 Data.Fn = CGF.CurFn; 12469 } 12470 12471 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12472 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12473 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12474 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12475 if (CGM.getLangOpts().OpenMP < 50) 12476 return; 12477 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12478 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12479 if (!NeedToAddForLPCsAsDisabled.empty()) { 12480 Action = ActionToDo::DisableLastprivateConditional; 12481 LastprivateConditionalData &Data = 12482 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12483 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12484 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12485 Data.Fn = CGF.CurFn; 12486 Data.Disabled = true; 12487 } 12488 } 12489 12490 CGOpenMPRuntime::LastprivateConditionalRAII 12491 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12492 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12493 return LastprivateConditionalRAII(CGF, S); 12494 } 12495 12496 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12497 if (CGM.getLangOpts().OpenMP < 50) 12498 return; 12499 if (Action == ActionToDo::DisableLastprivateConditional) { 12500 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12501 "Expected list of disabled private vars."); 12502 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12503 } 12504 if (Action == ActionToDo::PushAsLastprivateConditional) { 12505 assert( 12506 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12507 "Expected list of lastprivate conditional vars."); 12508 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12509 } 12510 } 12511 12512 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12513 const VarDecl *VD) { 12514 ASTContext &C = CGM.getContext(); 12515 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12516 if (I == LastprivateConditionalToTypes.end()) 12517 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12518 QualType NewType; 12519 const FieldDecl *VDField; 12520 const FieldDecl *FiredField; 12521 LValue BaseLVal; 12522 auto VI = I->getSecond().find(VD); 12523 if (VI == I->getSecond().end()) { 12524 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12525 RD->startDefinition(); 12526 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12527 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12528 RD->completeDefinition(); 12529 NewType = C.getRecordType(RD); 12530 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12531 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12532 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12533 } else { 12534 NewType = std::get<0>(VI->getSecond()); 12535 VDField = std::get<1>(VI->getSecond()); 12536 FiredField = std::get<2>(VI->getSecond()); 12537 BaseLVal = std::get<3>(VI->getSecond()); 12538 } 12539 LValue FiredLVal = 12540 CGF.EmitLValueForField(BaseLVal, FiredField); 12541 CGF.EmitStoreOfScalar( 12542 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12543 FiredLVal); 12544 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12545 } 12546 12547 namespace { 12548 /// Checks if the lastprivate conditional variable is referenced in LHS. 12549 class LastprivateConditionalRefChecker final 12550 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12551 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12552 const Expr *FoundE = nullptr; 12553 const Decl *FoundD = nullptr; 12554 StringRef UniqueDeclName; 12555 LValue IVLVal; 12556 llvm::Function *FoundFn = nullptr; 12557 SourceLocation Loc; 12558 12559 public: 12560 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12561 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12562 llvm::reverse(LPM)) { 12563 auto It = D.DeclToUniqueName.find(E->getDecl()); 12564 if (It == D.DeclToUniqueName.end()) 12565 continue; 12566 if (D.Disabled) 12567 return false; 12568 FoundE = E; 12569 FoundD = E->getDecl()->getCanonicalDecl(); 12570 UniqueDeclName = It->second; 12571 IVLVal = D.IVLVal; 12572 FoundFn = D.Fn; 12573 break; 12574 } 12575 return FoundE == E; 12576 } 12577 bool VisitMemberExpr(const MemberExpr *E) { 12578 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12579 return false; 12580 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12581 llvm::reverse(LPM)) { 12582 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12583 if (It == D.DeclToUniqueName.end()) 12584 continue; 12585 if (D.Disabled) 12586 return false; 12587 FoundE = E; 12588 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12589 UniqueDeclName = It->second; 12590 IVLVal = D.IVLVal; 12591 FoundFn = D.Fn; 12592 break; 12593 } 12594 return FoundE == E; 12595 } 12596 bool VisitStmt(const Stmt *S) { 12597 for (const Stmt *Child : S->children()) { 12598 if (!Child) 12599 continue; 12600 if (const auto *E = dyn_cast<Expr>(Child)) 12601 if (!E->isGLValue()) 12602 continue; 12603 if (Visit(Child)) 12604 return true; 12605 } 12606 return false; 12607 } 12608 explicit LastprivateConditionalRefChecker( 12609 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12610 : LPM(LPM) {} 12611 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12612 getFoundData() const { 12613 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12614 } 12615 }; 12616 } // namespace 12617 12618 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12619 LValue IVLVal, 12620 StringRef UniqueDeclName, 12621 LValue LVal, 12622 SourceLocation Loc) { 12623 // Last updated loop counter for the lastprivate conditional var. 12624 // int<xx> last_iv = 0; 12625 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12626 llvm::Constant *LastIV = 12627 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12628 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12629 IVLVal.getAlignment().getAsAlign()); 12630 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12631 12632 // Last value of the lastprivate conditional. 12633 // decltype(priv_a) last_a; 12634 llvm::Constant *Last = getOrCreateInternalVariable( 12635 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12636 cast<llvm::GlobalVariable>(Last)->setAlignment( 12637 LVal.getAlignment().getAsAlign()); 12638 LValue LastLVal = 12639 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12640 12641 // Global loop counter. Required to handle inner parallel-for regions. 12642 // iv 12643 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12644 12645 // #pragma omp critical(a) 12646 // if (last_iv <= iv) { 12647 // last_iv = iv; 12648 // last_a = priv_a; 12649 // } 12650 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12651 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12652 Action.Enter(CGF); 12653 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12654 // (last_iv <= iv) ? Check if the variable is updated and store new 12655 // value in global var. 12656 llvm::Value *CmpRes; 12657 if (IVLVal.getType()->isSignedIntegerType()) { 12658 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12659 } else { 12660 assert(IVLVal.getType()->isUnsignedIntegerType() && 12661 "Loop iteration variable must be integer."); 12662 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12663 } 12664 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12665 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12666 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12667 // { 12668 CGF.EmitBlock(ThenBB); 12669 12670 // last_iv = iv; 12671 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12672 12673 // last_a = priv_a; 12674 switch (CGF.getEvaluationKind(LVal.getType())) { 12675 case TEK_Scalar: { 12676 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12677 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12678 break; 12679 } 12680 case TEK_Complex: { 12681 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12682 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12683 break; 12684 } 12685 case TEK_Aggregate: 12686 llvm_unreachable( 12687 "Aggregates are not supported in lastprivate conditional."); 12688 } 12689 // } 12690 CGF.EmitBranch(ExitBB); 12691 // There is no need to emit line number for unconditional branch. 12692 (void)ApplyDebugLocation::CreateEmpty(CGF); 12693 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12694 }; 12695 12696 if (CGM.getLangOpts().OpenMPSimd) { 12697 // Do not emit as a critical region as no parallel region could be emitted. 12698 RegionCodeGenTy ThenRCG(CodeGen); 12699 ThenRCG(CGF); 12700 } else { 12701 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12702 } 12703 } 12704 12705 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12706 const Expr *LHS) { 12707 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12708 return; 12709 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12710 if (!Checker.Visit(LHS)) 12711 return; 12712 const Expr *FoundE; 12713 const Decl *FoundD; 12714 StringRef UniqueDeclName; 12715 LValue IVLVal; 12716 llvm::Function *FoundFn; 12717 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12718 Checker.getFoundData(); 12719 if (FoundFn != CGF.CurFn) { 12720 // Special codegen for inner parallel regions. 12721 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12722 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12723 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12724 "Lastprivate conditional is not found in outer region."); 12725 QualType StructTy = std::get<0>(It->getSecond()); 12726 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12727 LValue PrivLVal = CGF.EmitLValue(FoundE); 12728 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12729 PrivLVal.getAddress(CGF), 12730 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12731 LValue BaseLVal = 12732 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12733 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12734 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12735 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12736 FiredLVal, llvm::AtomicOrdering::Unordered, 12737 /*IsVolatile=*/true, /*isInit=*/false); 12738 return; 12739 } 12740 12741 // Private address of the lastprivate conditional in the current context. 12742 // priv_a 12743 LValue LVal = CGF.EmitLValue(FoundE); 12744 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12745 FoundE->getExprLoc()); 12746 } 12747 12748 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12749 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12750 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12751 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12752 return; 12753 auto Range = llvm::reverse(LastprivateConditionalStack); 12754 auto It = llvm::find_if( 12755 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12756 if (It == Range.end() || It->Fn != CGF.CurFn) 12757 return; 12758 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12759 assert(LPCI != LastprivateConditionalToTypes.end() && 12760 "Lastprivates must be registered already."); 12761 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12762 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12763 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12764 for (const auto &Pair : It->DeclToUniqueName) { 12765 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12766 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12767 continue; 12768 auto I = LPCI->getSecond().find(Pair.first); 12769 assert(I != LPCI->getSecond().end() && 12770 "Lastprivate must be rehistered already."); 12771 // bool Cmp = priv_a.Fired != 0; 12772 LValue BaseLVal = std::get<3>(I->getSecond()); 12773 LValue FiredLVal = 12774 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12775 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12776 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12777 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12778 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12779 // if (Cmp) { 12780 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12781 CGF.EmitBlock(ThenBB); 12782 Address Addr = CGF.GetAddrOfLocalVar(VD); 12783 LValue LVal; 12784 if (VD->getType()->isReferenceType()) 12785 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12786 AlignmentSource::Decl); 12787 else 12788 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12789 AlignmentSource::Decl); 12790 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12791 D.getBeginLoc()); 12792 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12793 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12794 // } 12795 } 12796 } 12797 12798 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12799 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12800 SourceLocation Loc) { 12801 if (CGF.getLangOpts().OpenMP < 50) 12802 return; 12803 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12804 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12805 "Unknown lastprivate conditional variable."); 12806 StringRef UniqueName = It->second; 12807 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12808 // The variable was not updated in the region - exit. 12809 if (!GV) 12810 return; 12811 LValue LPLVal = CGF.MakeAddrLValue( 12812 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12813 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12814 CGF.EmitStoreOfScalar(Res, PrivLVal); 12815 } 12816 12817 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12818 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12819 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12820 llvm_unreachable("Not supported in SIMD-only mode"); 12821 } 12822 12823 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12824 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12825 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12826 llvm_unreachable("Not supported in SIMD-only mode"); 12827 } 12828 12829 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12830 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12831 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12832 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12833 bool Tied, unsigned &NumberOfParts) { 12834 llvm_unreachable("Not supported in SIMD-only mode"); 12835 } 12836 12837 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12838 SourceLocation Loc, 12839 llvm::Function *OutlinedFn, 12840 ArrayRef<llvm::Value *> CapturedVars, 12841 const Expr *IfCond) { 12842 llvm_unreachable("Not supported in SIMD-only mode"); 12843 } 12844 12845 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12846 CodeGenFunction &CGF, StringRef CriticalName, 12847 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12848 const Expr *Hint) { 12849 llvm_unreachable("Not supported in SIMD-only mode"); 12850 } 12851 12852 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12853 const RegionCodeGenTy &MasterOpGen, 12854 SourceLocation Loc) { 12855 llvm_unreachable("Not supported in SIMD-only mode"); 12856 } 12857 12858 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12859 const RegionCodeGenTy &MasterOpGen, 12860 SourceLocation Loc, 12861 const Expr *Filter) { 12862 llvm_unreachable("Not supported in SIMD-only mode"); 12863 } 12864 12865 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12866 SourceLocation Loc) { 12867 llvm_unreachable("Not supported in SIMD-only mode"); 12868 } 12869 12870 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12871 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12872 SourceLocation Loc) { 12873 llvm_unreachable("Not supported in SIMD-only mode"); 12874 } 12875 12876 void CGOpenMPSIMDRuntime::emitSingleRegion( 12877 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12878 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12879 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12880 ArrayRef<const Expr *> AssignmentOps) { 12881 llvm_unreachable("Not supported in SIMD-only mode"); 12882 } 12883 12884 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12885 const RegionCodeGenTy &OrderedOpGen, 12886 SourceLocation Loc, 12887 bool IsThreads) { 12888 llvm_unreachable("Not supported in SIMD-only mode"); 12889 } 12890 12891 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12892 SourceLocation Loc, 12893 OpenMPDirectiveKind Kind, 12894 bool EmitChecks, 12895 bool ForceSimpleCall) { 12896 llvm_unreachable("Not supported in SIMD-only mode"); 12897 } 12898 12899 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12900 CodeGenFunction &CGF, SourceLocation Loc, 12901 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12902 bool Ordered, const DispatchRTInput &DispatchValues) { 12903 llvm_unreachable("Not supported in SIMD-only mode"); 12904 } 12905 12906 void CGOpenMPSIMDRuntime::emitForStaticInit( 12907 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12908 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12909 llvm_unreachable("Not supported in SIMD-only mode"); 12910 } 12911 12912 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12913 CodeGenFunction &CGF, SourceLocation Loc, 12914 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12915 llvm_unreachable("Not supported in SIMD-only mode"); 12916 } 12917 12918 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12919 SourceLocation Loc, 12920 unsigned IVSize, 12921 bool IVSigned) { 12922 llvm_unreachable("Not supported in SIMD-only mode"); 12923 } 12924 12925 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12926 SourceLocation Loc, 12927 OpenMPDirectiveKind DKind) { 12928 llvm_unreachable("Not supported in SIMD-only mode"); 12929 } 12930 12931 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12932 SourceLocation Loc, 12933 unsigned IVSize, bool IVSigned, 12934 Address IL, Address LB, 12935 Address UB, Address ST) { 12936 llvm_unreachable("Not supported in SIMD-only mode"); 12937 } 12938 12939 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12940 llvm::Value *NumThreads, 12941 SourceLocation Loc) { 12942 llvm_unreachable("Not supported in SIMD-only mode"); 12943 } 12944 12945 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12946 ProcBindKind ProcBind, 12947 SourceLocation Loc) { 12948 llvm_unreachable("Not supported in SIMD-only mode"); 12949 } 12950 12951 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12952 const VarDecl *VD, 12953 Address VDAddr, 12954 SourceLocation Loc) { 12955 llvm_unreachable("Not supported in SIMD-only mode"); 12956 } 12957 12958 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12959 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12960 CodeGenFunction *CGF) { 12961 llvm_unreachable("Not supported in SIMD-only mode"); 12962 } 12963 12964 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12965 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12966 llvm_unreachable("Not supported in SIMD-only mode"); 12967 } 12968 12969 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12970 ArrayRef<const Expr *> Vars, 12971 SourceLocation Loc, 12972 llvm::AtomicOrdering AO) { 12973 llvm_unreachable("Not supported in SIMD-only mode"); 12974 } 12975 12976 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12977 const OMPExecutableDirective &D, 12978 llvm::Function *TaskFunction, 12979 QualType SharedsTy, Address Shareds, 12980 const Expr *IfCond, 12981 const OMPTaskDataTy &Data) { 12982 llvm_unreachable("Not supported in SIMD-only mode"); 12983 } 12984 12985 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12986 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12987 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12988 const Expr *IfCond, const OMPTaskDataTy &Data) { 12989 llvm_unreachable("Not supported in SIMD-only mode"); 12990 } 12991 12992 void CGOpenMPSIMDRuntime::emitReduction( 12993 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12994 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12995 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12996 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12997 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12998 ReductionOps, Options); 12999 } 13000 13001 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13002 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13003 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13004 llvm_unreachable("Not supported in SIMD-only mode"); 13005 } 13006 13007 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13008 SourceLocation Loc, 13009 bool IsWorksharingReduction) { 13010 llvm_unreachable("Not supported in SIMD-only mode"); 13011 } 13012 13013 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13014 SourceLocation Loc, 13015 ReductionCodeGen &RCG, 13016 unsigned N) { 13017 llvm_unreachable("Not supported in SIMD-only mode"); 13018 } 13019 13020 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13021 SourceLocation Loc, 13022 llvm::Value *ReductionsPtr, 13023 LValue SharedLVal) { 13024 llvm_unreachable("Not supported in SIMD-only mode"); 13025 } 13026 13027 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13028 SourceLocation Loc) { 13029 llvm_unreachable("Not supported in SIMD-only mode"); 13030 } 13031 13032 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13033 CodeGenFunction &CGF, SourceLocation Loc, 13034 OpenMPDirectiveKind CancelRegion) { 13035 llvm_unreachable("Not supported in SIMD-only mode"); 13036 } 13037 13038 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13039 SourceLocation Loc, const Expr *IfCond, 13040 OpenMPDirectiveKind CancelRegion) { 13041 llvm_unreachable("Not supported in SIMD-only mode"); 13042 } 13043 13044 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13045 const OMPExecutableDirective &D, StringRef ParentName, 13046 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13047 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13048 llvm_unreachable("Not supported in SIMD-only mode"); 13049 } 13050 13051 void CGOpenMPSIMDRuntime::emitTargetCall( 13052 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13053 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13054 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13055 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13056 const OMPLoopDirective &D)> 13057 SizeEmitter) { 13058 llvm_unreachable("Not supported in SIMD-only mode"); 13059 } 13060 13061 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13062 llvm_unreachable("Not supported in SIMD-only mode"); 13063 } 13064 13065 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13066 llvm_unreachable("Not supported in SIMD-only mode"); 13067 } 13068 13069 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13070 return false; 13071 } 13072 13073 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13074 const OMPExecutableDirective &D, 13075 SourceLocation Loc, 13076 llvm::Function *OutlinedFn, 13077 ArrayRef<llvm::Value *> CapturedVars) { 13078 llvm_unreachable("Not supported in SIMD-only mode"); 13079 } 13080 13081 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13082 const Expr *NumTeams, 13083 const Expr *ThreadLimit, 13084 SourceLocation Loc) { 13085 llvm_unreachable("Not supported in SIMD-only mode"); 13086 } 13087 13088 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13089 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13090 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13091 llvm_unreachable("Not supported in SIMD-only mode"); 13092 } 13093 13094 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13095 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13096 const Expr *Device) { 13097 llvm_unreachable("Not supported in SIMD-only mode"); 13098 } 13099 13100 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13101 const OMPLoopDirective &D, 13102 ArrayRef<Expr *> NumIterations) { 13103 llvm_unreachable("Not supported in SIMD-only mode"); 13104 } 13105 13106 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13107 const OMPDependClause *C) { 13108 llvm_unreachable("Not supported in SIMD-only mode"); 13109 } 13110 13111 const VarDecl * 13112 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13113 const VarDecl *NativeParam) const { 13114 llvm_unreachable("Not supported in SIMD-only mode"); 13115 } 13116 13117 Address 13118 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13119 const VarDecl *NativeParam, 13120 const VarDecl *TargetParam) const { 13121 llvm_unreachable("Not supported in SIMD-only mode"); 13122 } 13123