1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/APValue.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/Decl.h" 21 #include "clang/AST/OpenMPClause.h" 22 #include "clang/AST/StmtOpenMP.h" 23 #include "clang/AST/StmtVisitor.h" 24 #include "clang/Basic/BitmaskEnum.h" 25 #include "clang/Basic/FileManager.h" 26 #include "clang/Basic/OpenMPKinds.h" 27 #include "clang/Basic/SourceManager.h" 28 #include "clang/CodeGen/ConstantInitBuilder.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/SetOperations.h" 31 #include "llvm/ADT/StringExtras.h" 32 #include "llvm/Bitcode/BitcodeReader.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 bool NoInheritance = false; 414 415 public: 416 /// Constructs region for combined constructs. 417 /// \param CodeGen Code generation sequence for combined directives. Includes 418 /// a list of functions used for code generation of implicitly inlined 419 /// regions. 420 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 421 OpenMPDirectiveKind Kind, bool HasCancel, 422 bool NoInheritance = true) 423 : CGF(CGF), NoInheritance(NoInheritance) { 424 // Start emission for the construct. 425 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 426 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 427 if (NoInheritance) { 428 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 429 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 430 CGF.LambdaThisCaptureField = nullptr; 431 BlockInfo = CGF.BlockInfo; 432 CGF.BlockInfo = nullptr; 433 } 434 } 435 436 ~InlinedOpenMPRegionRAII() { 437 // Restore original CapturedStmtInfo only if we're done with code emission. 438 auto *OldCSI = 439 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 440 delete CGF.CapturedStmtInfo; 441 CGF.CapturedStmtInfo = OldCSI; 442 if (NoInheritance) { 443 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 444 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 445 CGF.BlockInfo = BlockInfo; 446 } 447 } 448 }; 449 450 /// Values for bit flags used in the ident_t to describe the fields. 451 /// All enumeric elements are named and described in accordance with the code 452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 453 enum OpenMPLocationFlags : unsigned { 454 /// Use trampoline for internal microtask. 455 OMP_IDENT_IMD = 0x01, 456 /// Use c-style ident structure. 457 OMP_IDENT_KMPC = 0x02, 458 /// Atomic reduction option for kmpc_reduce. 459 OMP_ATOMIC_REDUCE = 0x10, 460 /// Explicit 'barrier' directive. 461 OMP_IDENT_BARRIER_EXPL = 0x20, 462 /// Implicit barrier in code. 463 OMP_IDENT_BARRIER_IMPL = 0x40, 464 /// Implicit barrier in 'for' directive. 465 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 466 /// Implicit barrier in 'sections' directive. 467 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 468 /// Implicit barrier in 'single' directive. 469 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 470 /// Call of __kmp_for_static_init for static loop. 471 OMP_IDENT_WORK_LOOP = 0x200, 472 /// Call of __kmp_for_static_init for sections. 473 OMP_IDENT_WORK_SECTIONS = 0x400, 474 /// Call of __kmp_for_static_init for distribute. 475 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 477 }; 478 479 namespace { 480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 481 /// Values for bit flags for marking which requires clauses have been used. 482 enum OpenMPOffloadingRequiresDirFlags : int64_t { 483 /// flag undefined. 484 OMP_REQ_UNDEFINED = 0x000, 485 /// no requires clause present. 486 OMP_REQ_NONE = 0x001, 487 /// reverse_offload clause. 488 OMP_REQ_REVERSE_OFFLOAD = 0x002, 489 /// unified_address clause. 490 OMP_REQ_UNIFIED_ADDRESS = 0x004, 491 /// unified_shared_memory clause. 492 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 493 /// dynamic_allocators clause. 494 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 495 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 496 }; 497 498 enum OpenMPOffloadingReservedDeviceIDs { 499 /// Device ID if the device was not defined, runtime should get it 500 /// from environment variables in the spec. 501 OMP_DEVICEID_UNDEF = -1, 502 }; 503 } // anonymous namespace 504 505 /// Describes ident structure that describes a source location. 506 /// All descriptions are taken from 507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 508 /// Original structure: 509 /// typedef struct ident { 510 /// kmp_int32 reserved_1; /**< might be used in Fortran; 511 /// see above */ 512 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 513 /// KMP_IDENT_KMPC identifies this union 514 /// member */ 515 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 516 /// see above */ 517 ///#if USE_ITT_BUILD 518 /// /* but currently used for storing 519 /// region-specific ITT */ 520 /// /* contextual information. */ 521 ///#endif /* USE_ITT_BUILD */ 522 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 523 /// C++ */ 524 /// char const *psource; /**< String describing the source location. 525 /// The string is composed of semi-colon separated 526 // fields which describe the source file, 527 /// the function and a pair of line numbers that 528 /// delimit the construct. 529 /// */ 530 /// } ident_t; 531 enum IdentFieldIndex { 532 /// might be used in Fortran 533 IdentField_Reserved_1, 534 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 535 IdentField_Flags, 536 /// Not really used in Fortran any more 537 IdentField_Reserved_2, 538 /// Source[4] in Fortran, do not use for C++ 539 IdentField_Reserved_3, 540 /// String describing the source location. The string is composed of 541 /// semi-colon separated fields which describe the source file, the function 542 /// and a pair of line numbers that delimit the construct. 543 IdentField_PSource 544 }; 545 546 /// Schedule types for 'omp for' loops (these enumerators are taken from 547 /// the enum sched_type in kmp.h). 548 enum OpenMPSchedType { 549 /// Lower bound for default (unordered) versions. 550 OMP_sch_lower = 32, 551 OMP_sch_static_chunked = 33, 552 OMP_sch_static = 34, 553 OMP_sch_dynamic_chunked = 35, 554 OMP_sch_guided_chunked = 36, 555 OMP_sch_runtime = 37, 556 OMP_sch_auto = 38, 557 /// static with chunk adjustment (e.g., simd) 558 OMP_sch_static_balanced_chunked = 45, 559 /// Lower bound for 'ordered' versions. 560 OMP_ord_lower = 64, 561 OMP_ord_static_chunked = 65, 562 OMP_ord_static = 66, 563 OMP_ord_dynamic_chunked = 67, 564 OMP_ord_guided_chunked = 68, 565 OMP_ord_runtime = 69, 566 OMP_ord_auto = 70, 567 OMP_sch_default = OMP_sch_static, 568 /// dist_schedule types 569 OMP_dist_sch_static_chunked = 91, 570 OMP_dist_sch_static = 92, 571 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 572 /// Set if the monotonic schedule modifier was present. 573 OMP_sch_modifier_monotonic = (1 << 29), 574 /// Set if the nonmonotonic schedule modifier was present. 575 OMP_sch_modifier_nonmonotonic = (1 << 30), 576 }; 577 578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 579 /// region. 580 class CleanupTy final : public EHScopeStack::Cleanup { 581 PrePostActionTy *Action; 582 583 public: 584 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 585 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 586 if (!CGF.HaveInsertPoint()) 587 return; 588 Action->Exit(CGF); 589 } 590 }; 591 592 } // anonymous namespace 593 594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 595 CodeGenFunction::RunCleanupsScope Scope(CGF); 596 if (PrePostAction) { 597 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 598 Callback(CodeGen, CGF, *PrePostAction); 599 } else { 600 PrePostActionTy Action; 601 Callback(CodeGen, CGF, Action); 602 } 603 } 604 605 /// Check if the combiner is a call to UDR combiner and if it is so return the 606 /// UDR decl used for reduction. 607 static const OMPDeclareReductionDecl * 608 getReductionInit(const Expr *ReductionOp) { 609 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 610 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 611 if (const auto *DRE = 612 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 613 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 614 return DRD; 615 return nullptr; 616 } 617 618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 619 const OMPDeclareReductionDecl *DRD, 620 const Expr *InitOp, 621 Address Private, Address Original, 622 QualType Ty) { 623 if (DRD->getInitializer()) { 624 std::pair<llvm::Function *, llvm::Function *> Reduction = 625 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 626 const auto *CE = cast<CallExpr>(InitOp); 627 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 628 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 629 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 630 const auto *LHSDRE = 631 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 632 const auto *RHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 634 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 635 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 636 [=]() { return Private; }); 637 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 638 [=]() { return Original; }); 639 (void)PrivateScope.Privatize(); 640 RValue Func = RValue::get(Reduction.second); 641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 642 CGF.EmitIgnoredExpr(InitOp); 643 } else { 644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 646 auto *GV = new llvm::GlobalVariable( 647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 648 llvm::GlobalValue::PrivateLinkage, Init, Name); 649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 650 RValue InitRVal; 651 switch (CGF.getEvaluationKind(Ty)) { 652 case TEK_Scalar: 653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 654 break; 655 case TEK_Complex: 656 InitRVal = 657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 658 break; 659 case TEK_Aggregate: { 660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 663 /*IsInitializer=*/false); 664 return; 665 } 666 } 667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 670 /*IsInitializer=*/false); 671 } 672 } 673 674 /// Emit initialization of arrays of complex types. 675 /// \param DestAddr Address of the array. 676 /// \param Type Type of array. 677 /// \param Init Initial expression of array. 678 /// \param SrcAddr Address of the original array. 679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 680 QualType Type, bool EmitDeclareReductionInit, 681 const Expr *Init, 682 const OMPDeclareReductionDecl *DRD, 683 Address SrcAddr = Address::invalid()) { 684 // Perform element-by-element initialization. 685 QualType ElementTy; 686 687 // Drill down to the base element type on both arrays. 688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 690 DestAddr = 691 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 692 if (DRD) 693 SrcAddr = 694 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 695 696 llvm::Value *SrcBegin = nullptr; 697 if (DRD) 698 SrcBegin = SrcAddr.getPointer(); 699 llvm::Value *DestBegin = DestAddr.getPointer(); 700 // Cast from pointer to array type to pointer to single element. 701 llvm::Value *DestEnd = 702 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 703 // The basic structure here is a while-do loop. 704 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 705 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 706 llvm::Value *IsEmpty = 707 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 708 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 709 710 // Enter the loop body, making that address the current address. 711 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 712 CGF.EmitBlock(BodyBB); 713 714 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 715 716 llvm::PHINode *SrcElementPHI = nullptr; 717 Address SrcElementCurrent = Address::invalid(); 718 if (DRD) { 719 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 720 "omp.arraycpy.srcElementPast"); 721 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 722 SrcElementCurrent = 723 Address(SrcElementPHI, 724 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 725 } 726 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 727 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 728 DestElementPHI->addIncoming(DestBegin, EntryBB); 729 Address DestElementCurrent = 730 Address(DestElementPHI, 731 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 732 733 // Emit copy. 734 { 735 CodeGenFunction::RunCleanupsScope InitScope(CGF); 736 if (EmitDeclareReductionInit) { 737 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 738 SrcElementCurrent, ElementTy); 739 } else 740 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 741 /*IsInitializer=*/false); 742 } 743 744 if (DRD) { 745 // Shift the address forward by one element. 746 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 747 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 748 "omp.arraycpy.dest.element"); 749 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 750 } 751 752 // Shift the address forward by one element. 753 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 754 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 755 "omp.arraycpy.dest.element"); 756 // Check whether we've reached the end. 757 llvm::Value *Done = 758 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 759 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 760 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 761 762 // Done. 763 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 764 } 765 766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 767 return CGF.EmitOMPSharedLValue(E); 768 } 769 770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 771 const Expr *E) { 772 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 773 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 774 return LValue(); 775 } 776 777 void ReductionCodeGen::emitAggregateInitialization( 778 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 779 const OMPDeclareReductionDecl *DRD) { 780 // Emit VarDecl with copy init for arrays. 781 // Get the address of the original variable captured in current 782 // captured region. 783 const auto *PrivateVD = 784 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 785 bool EmitDeclareReductionInit = 786 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 787 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 788 EmitDeclareReductionInit, 789 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 790 : PrivateVD->getInit(), 791 DRD, SharedLVal.getAddress(CGF)); 792 } 793 794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 795 ArrayRef<const Expr *> Origs, 796 ArrayRef<const Expr *> Privates, 797 ArrayRef<const Expr *> ReductionOps) { 798 ClausesData.reserve(Shareds.size()); 799 SharedAddresses.reserve(Shareds.size()); 800 Sizes.reserve(Shareds.size()); 801 BaseDecls.reserve(Shareds.size()); 802 const auto *IOrig = Origs.begin(); 803 const auto *IPriv = Privates.begin(); 804 const auto *IRed = ReductionOps.begin(); 805 for (const Expr *Ref : Shareds) { 806 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 807 std::advance(IOrig, 1); 808 std::advance(IPriv, 1); 809 std::advance(IRed, 1); 810 } 811 } 812 813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 814 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 815 "Number of generated lvalues must be exactly N."); 816 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 817 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 818 SharedAddresses.emplace_back(First, Second); 819 if (ClausesData[N].Shared == ClausesData[N].Ref) { 820 OrigAddresses.emplace_back(First, Second); 821 } else { 822 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 823 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 824 OrigAddresses.emplace_back(First, Second); 825 } 826 } 827 828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 829 const auto *PrivateVD = 830 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 831 QualType PrivateType = PrivateVD->getType(); 832 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 833 if (!PrivateType->isVariablyModifiedType()) { 834 Sizes.emplace_back( 835 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 836 nullptr); 837 return; 838 } 839 llvm::Value *Size; 840 llvm::Value *SizeInChars; 841 auto *ElemType = 842 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 843 ->getElementType(); 844 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 845 if (AsArraySection) { 846 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 847 OrigAddresses[N].first.getPointer(CGF)); 848 Size = CGF.Builder.CreateNUWAdd( 849 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 850 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 851 } else { 852 SizeInChars = 853 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 854 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 855 } 856 Sizes.emplace_back(SizeInChars, Size); 857 CodeGenFunction::OpaqueValueMapping OpaqueMap( 858 CGF, 859 cast<OpaqueValueExpr>( 860 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 861 RValue::get(Size)); 862 CGF.EmitVariablyModifiedType(PrivateType); 863 } 864 865 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 866 llvm::Value *Size) { 867 const auto *PrivateVD = 868 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 869 QualType PrivateType = PrivateVD->getType(); 870 if (!PrivateType->isVariablyModifiedType()) { 871 assert(!Size && !Sizes[N].second && 872 "Size should be nullptr for non-variably modified reduction " 873 "items."); 874 return; 875 } 876 CodeGenFunction::OpaqueValueMapping OpaqueMap( 877 CGF, 878 cast<OpaqueValueExpr>( 879 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 880 RValue::get(Size)); 881 CGF.EmitVariablyModifiedType(PrivateType); 882 } 883 884 void ReductionCodeGen::emitInitialization( 885 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 886 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 887 assert(SharedAddresses.size() > N && "No variable was generated"); 888 const auto *PrivateVD = 889 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 890 const OMPDeclareReductionDecl *DRD = 891 getReductionInit(ClausesData[N].ReductionOp); 892 QualType PrivateType = PrivateVD->getType(); 893 PrivateAddr = CGF.Builder.CreateElementBitCast( 894 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 895 QualType SharedType = SharedAddresses[N].first.getType(); 896 SharedLVal = CGF.MakeAddrLValue( 897 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 898 CGF.ConvertTypeForMem(SharedType)), 899 SharedType, SharedAddresses[N].first.getBaseInfo(), 900 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 901 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 902 if (DRD && DRD->getInitializer()) 903 (void)DefaultInit(CGF); 904 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 905 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 906 (void)DefaultInit(CGF); 907 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 908 PrivateAddr, SharedLVal.getAddress(CGF), 909 SharedLVal.getType()); 910 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 911 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 912 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 913 PrivateVD->getType().getQualifiers(), 914 /*IsInitializer=*/false); 915 } 916 } 917 918 bool ReductionCodeGen::needCleanups(unsigned N) { 919 const auto *PrivateVD = 920 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 921 QualType PrivateType = PrivateVD->getType(); 922 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 923 return DTorKind != QualType::DK_none; 924 } 925 926 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 927 Address PrivateAddr) { 928 const auto *PrivateVD = 929 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 930 QualType PrivateType = PrivateVD->getType(); 931 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 932 if (needCleanups(N)) { 933 PrivateAddr = CGF.Builder.CreateElementBitCast( 934 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 935 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 936 } 937 } 938 939 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 940 LValue BaseLV) { 941 BaseTy = BaseTy.getNonReferenceType(); 942 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 943 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 944 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 945 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 946 } else { 947 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 948 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 949 } 950 BaseTy = BaseTy->getPointeeType(); 951 } 952 return CGF.MakeAddrLValue( 953 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 954 CGF.ConvertTypeForMem(ElTy)), 955 BaseLV.getType(), BaseLV.getBaseInfo(), 956 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 957 } 958 959 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 960 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 961 llvm::Value *Addr) { 962 Address Tmp = Address::invalid(); 963 Address TopTmp = Address::invalid(); 964 Address MostTopTmp = Address::invalid(); 965 BaseTy = BaseTy.getNonReferenceType(); 966 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 967 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 968 Tmp = CGF.CreateMemTemp(BaseTy); 969 if (TopTmp.isValid()) 970 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 971 else 972 MostTopTmp = Tmp; 973 TopTmp = Tmp; 974 BaseTy = BaseTy->getPointeeType(); 975 } 976 llvm::Type *Ty = BaseLVType; 977 if (Tmp.isValid()) 978 Ty = Tmp.getElementType(); 979 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 980 if (Tmp.isValid()) { 981 CGF.Builder.CreateStore(Addr, Tmp); 982 return MostTopTmp; 983 } 984 return Address(Addr, BaseLVAlignment); 985 } 986 987 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 988 const VarDecl *OrigVD = nullptr; 989 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 990 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 991 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 992 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 993 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 994 Base = TempASE->getBase()->IgnoreParenImpCasts(); 995 DE = cast<DeclRefExpr>(Base); 996 OrigVD = cast<VarDecl>(DE->getDecl()); 997 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 998 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 999 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1000 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1001 DE = cast<DeclRefExpr>(Base); 1002 OrigVD = cast<VarDecl>(DE->getDecl()); 1003 } 1004 return OrigVD; 1005 } 1006 1007 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1008 Address PrivateAddr) { 1009 const DeclRefExpr *DE; 1010 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1011 BaseDecls.emplace_back(OrigVD); 1012 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1013 LValue BaseLValue = 1014 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1015 OriginalBaseLValue); 1016 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1017 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1018 BaseLValue.getPointer(CGF), SharedAddr.getPointer()); 1019 llvm::Value *PrivatePointer = 1020 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1021 PrivateAddr.getPointer(), SharedAddr.getType()); 1022 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1023 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1024 return castToBase(CGF, OrigVD->getType(), 1025 SharedAddresses[N].first.getType(), 1026 OriginalBaseLValue.getAddress(CGF).getType(), 1027 OriginalBaseLValue.getAlignment(), Ptr); 1028 } 1029 BaseDecls.emplace_back( 1030 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1031 return PrivateAddr; 1032 } 1033 1034 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1035 const OMPDeclareReductionDecl *DRD = 1036 getReductionInit(ClausesData[N].ReductionOp); 1037 return DRD && DRD->getInitializer(); 1038 } 1039 1040 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1041 return CGF.EmitLoadOfPointerLValue( 1042 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1043 getThreadIDVariable()->getType()->castAs<PointerType>()); 1044 } 1045 1046 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1047 if (!CGF.HaveInsertPoint()) 1048 return; 1049 // 1.2.2 OpenMP Language Terminology 1050 // Structured block - An executable statement with a single entry at the 1051 // top and a single exit at the bottom. 1052 // The point of exit cannot be a branch out of the structured block. 1053 // longjmp() and throw() must not violate the entry/exit criteria. 1054 CGF.EHStack.pushTerminate(); 1055 if (S) 1056 CGF.incrementProfileCounter(S); 1057 CodeGen(CGF); 1058 CGF.EHStack.popTerminate(); 1059 } 1060 1061 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1062 CodeGenFunction &CGF) { 1063 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1064 getThreadIDVariable()->getType(), 1065 AlignmentSource::Decl); 1066 } 1067 1068 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1069 QualType FieldTy) { 1070 auto *Field = FieldDecl::Create( 1071 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1072 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1073 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1074 Field->setAccess(AS_public); 1075 DC->addDecl(Field); 1076 return Field; 1077 } 1078 1079 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1080 StringRef Separator) 1081 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1082 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1083 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1084 1085 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1086 OMPBuilder.initialize(); 1087 loadOffloadInfoMetadata(); 1088 } 1089 1090 void CGOpenMPRuntime::clear() { 1091 InternalVars.clear(); 1092 // Clean non-target variable declarations possibly used only in debug info. 1093 for (const auto &Data : EmittedNonTargetVariables) { 1094 if (!Data.getValue().pointsToAliveValue()) 1095 continue; 1096 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1097 if (!GV) 1098 continue; 1099 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1100 continue; 1101 GV->eraseFromParent(); 1102 } 1103 } 1104 1105 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1106 SmallString<128> Buffer; 1107 llvm::raw_svector_ostream OS(Buffer); 1108 StringRef Sep = FirstSeparator; 1109 for (StringRef Part : Parts) { 1110 OS << Sep << Part; 1111 Sep = Separator; 1112 } 1113 return std::string(OS.str()); 1114 } 1115 1116 static llvm::Function * 1117 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1118 const Expr *CombinerInitializer, const VarDecl *In, 1119 const VarDecl *Out, bool IsCombiner) { 1120 // void .omp_combiner.(Ty *in, Ty *out); 1121 ASTContext &C = CGM.getContext(); 1122 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1123 FunctionArgList Args; 1124 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1125 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1126 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1127 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1128 Args.push_back(&OmpOutParm); 1129 Args.push_back(&OmpInParm); 1130 const CGFunctionInfo &FnInfo = 1131 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1132 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1133 std::string Name = CGM.getOpenMPRuntime().getName( 1134 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1135 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1136 Name, &CGM.getModule()); 1137 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1138 if (CGM.getLangOpts().Optimize) { 1139 Fn->removeFnAttr(llvm::Attribute::NoInline); 1140 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1141 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1142 } 1143 CodeGenFunction CGF(CGM); 1144 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1145 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1146 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1147 Out->getLocation()); 1148 CodeGenFunction::OMPPrivateScope Scope(CGF); 1149 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1150 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1151 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1152 .getAddress(CGF); 1153 }); 1154 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1155 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1156 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1157 .getAddress(CGF); 1158 }); 1159 (void)Scope.Privatize(); 1160 if (!IsCombiner && Out->hasInit() && 1161 !CGF.isTrivialInitializer(Out->getInit())) { 1162 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1163 Out->getType().getQualifiers(), 1164 /*IsInitializer=*/true); 1165 } 1166 if (CombinerInitializer) 1167 CGF.EmitIgnoredExpr(CombinerInitializer); 1168 Scope.ForceCleanup(); 1169 CGF.FinishFunction(); 1170 return Fn; 1171 } 1172 1173 void CGOpenMPRuntime::emitUserDefinedReduction( 1174 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1175 if (UDRMap.count(D) > 0) 1176 return; 1177 llvm::Function *Combiner = emitCombinerOrInitializer( 1178 CGM, D->getType(), D->getCombiner(), 1179 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1180 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1181 /*IsCombiner=*/true); 1182 llvm::Function *Initializer = nullptr; 1183 if (const Expr *Init = D->getInitializer()) { 1184 Initializer = emitCombinerOrInitializer( 1185 CGM, D->getType(), 1186 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1187 : nullptr, 1188 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1189 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1190 /*IsCombiner=*/false); 1191 } 1192 UDRMap.try_emplace(D, Combiner, Initializer); 1193 if (CGF) { 1194 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1195 Decls.second.push_back(D); 1196 } 1197 } 1198 1199 std::pair<llvm::Function *, llvm::Function *> 1200 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1201 auto I = UDRMap.find(D); 1202 if (I != UDRMap.end()) 1203 return I->second; 1204 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1205 return UDRMap.lookup(D); 1206 } 1207 1208 namespace { 1209 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1210 // Builder if one is present. 1211 struct PushAndPopStackRAII { 1212 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1213 bool HasCancel, llvm::omp::Directive Kind) 1214 : OMPBuilder(OMPBuilder) { 1215 if (!OMPBuilder) 1216 return; 1217 1218 // The following callback is the crucial part of clangs cleanup process. 1219 // 1220 // NOTE: 1221 // Once the OpenMPIRBuilder is used to create parallel regions (and 1222 // similar), the cancellation destination (Dest below) is determined via 1223 // IP. That means if we have variables to finalize we split the block at IP, 1224 // use the new block (=BB) as destination to build a JumpDest (via 1225 // getJumpDestInCurrentScope(BB)) which then is fed to 1226 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1227 // to push & pop an FinalizationInfo object. 1228 // The FiniCB will still be needed but at the point where the 1229 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1230 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1231 assert(IP.getBlock()->end() == IP.getPoint() && 1232 "Clang CG should cause non-terminated block!"); 1233 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1234 CGF.Builder.restoreIP(IP); 1235 CodeGenFunction::JumpDest Dest = 1236 CGF.getOMPCancelDestination(OMPD_parallel); 1237 CGF.EmitBranchThroughCleanup(Dest); 1238 }; 1239 1240 // TODO: Remove this once we emit parallel regions through the 1241 // OpenMPIRBuilder as it can do this setup internally. 1242 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1243 OMPBuilder->pushFinalizationCB(std::move(FI)); 1244 } 1245 ~PushAndPopStackRAII() { 1246 if (OMPBuilder) 1247 OMPBuilder->popFinalizationCB(); 1248 } 1249 llvm::OpenMPIRBuilder *OMPBuilder; 1250 }; 1251 } // namespace 1252 1253 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1254 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1255 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1256 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1257 assert(ThreadIDVar->getType()->isPointerType() && 1258 "thread id variable must be of type kmp_int32 *"); 1259 CodeGenFunction CGF(CGM, true); 1260 bool HasCancel = false; 1261 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1262 HasCancel = OPD->hasCancel(); 1263 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1264 HasCancel = OPD->hasCancel(); 1265 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1266 HasCancel = OPSD->hasCancel(); 1267 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1270 HasCancel = OPFD->hasCancel(); 1271 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1272 HasCancel = OPFD->hasCancel(); 1273 else if (const auto *OPFD = 1274 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1275 HasCancel = OPFD->hasCancel(); 1276 else if (const auto *OPFD = 1277 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1278 HasCancel = OPFD->hasCancel(); 1279 1280 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1281 // parallel region to make cancellation barriers work properly. 1282 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1283 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1284 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1285 HasCancel, OutlinedHelperName); 1286 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1287 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1288 } 1289 1290 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1292 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1293 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1294 return emitParallelOrTeamsOutlinedFunction( 1295 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1296 } 1297 1298 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1299 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1300 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1301 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1302 return emitParallelOrTeamsOutlinedFunction( 1303 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1304 } 1305 1306 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1307 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1308 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1309 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1310 bool Tied, unsigned &NumberOfParts) { 1311 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1312 PrePostActionTy &) { 1313 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1314 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1315 llvm::Value *TaskArgs[] = { 1316 UpLoc, ThreadID, 1317 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1318 TaskTVar->getType()->castAs<PointerType>()) 1319 .getPointer(CGF)}; 1320 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1321 CGM.getModule(), OMPRTL___kmpc_omp_task), 1322 TaskArgs); 1323 }; 1324 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1325 UntiedCodeGen); 1326 CodeGen.setAction(Action); 1327 assert(!ThreadIDVar->getType()->isPointerType() && 1328 "thread id variable must be of type kmp_int32 for tasks"); 1329 const OpenMPDirectiveKind Region = 1330 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1331 : OMPD_task; 1332 const CapturedStmt *CS = D.getCapturedStmt(Region); 1333 bool HasCancel = false; 1334 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1335 HasCancel = TD->hasCancel(); 1336 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1337 HasCancel = TD->hasCancel(); 1338 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1339 HasCancel = TD->hasCancel(); 1340 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1341 HasCancel = TD->hasCancel(); 1342 1343 CodeGenFunction CGF(CGM, true); 1344 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1345 InnermostKind, HasCancel, Action); 1346 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1347 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1348 if (!Tied) 1349 NumberOfParts = Action.getNumberOfParts(); 1350 return Res; 1351 } 1352 1353 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1354 const RecordDecl *RD, const CGRecordLayout &RL, 1355 ArrayRef<llvm::Constant *> Data) { 1356 llvm::StructType *StructTy = RL.getLLVMType(); 1357 unsigned PrevIdx = 0; 1358 ConstantInitBuilder CIBuilder(CGM); 1359 auto DI = Data.begin(); 1360 for (const FieldDecl *FD : RD->fields()) { 1361 unsigned Idx = RL.getLLVMFieldNo(FD); 1362 // Fill the alignment. 1363 for (unsigned I = PrevIdx; I < Idx; ++I) 1364 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1365 PrevIdx = Idx + 1; 1366 Fields.add(*DI); 1367 ++DI; 1368 } 1369 } 1370 1371 template <class... As> 1372 static llvm::GlobalVariable * 1373 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1374 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1375 As &&... Args) { 1376 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1377 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1378 ConstantInitBuilder CIBuilder(CGM); 1379 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1380 buildStructValue(Fields, CGM, RD, RL, Data); 1381 return Fields.finishAndCreateGlobal( 1382 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1383 std::forward<As>(Args)...); 1384 } 1385 1386 template <typename T> 1387 static void 1388 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1389 ArrayRef<llvm::Constant *> Data, 1390 T &Parent) { 1391 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1392 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1393 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1394 buildStructValue(Fields, CGM, RD, RL, Data); 1395 Fields.finishAndAddTo(Parent); 1396 } 1397 1398 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1399 bool AtCurrentPoint) { 1400 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1401 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1402 1403 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1404 if (AtCurrentPoint) { 1405 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1406 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1407 } else { 1408 Elem.second.ServiceInsertPt = 1409 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1410 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1411 } 1412 } 1413 1414 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1415 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1416 if (Elem.second.ServiceInsertPt) { 1417 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1418 Elem.second.ServiceInsertPt = nullptr; 1419 Ptr->eraseFromParent(); 1420 } 1421 } 1422 1423 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1424 SourceLocation Loc, 1425 SmallString<128> &Buffer) { 1426 llvm::raw_svector_ostream OS(Buffer); 1427 // Build debug location 1428 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1429 OS << ";" << PLoc.getFilename() << ";"; 1430 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1431 OS << FD->getQualifiedNameAsString(); 1432 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1433 return OS.str(); 1434 } 1435 1436 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1437 SourceLocation Loc, 1438 unsigned Flags) { 1439 llvm::Constant *SrcLocStr; 1440 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1441 Loc.isInvalid()) { 1442 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1443 } else { 1444 std::string FunctionName = ""; 1445 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1446 FunctionName = FD->getQualifiedNameAsString(); 1447 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1448 const char *FileName = PLoc.getFilename(); 1449 unsigned Line = PLoc.getLine(); 1450 unsigned Column = PLoc.getColumn(); 1451 SrcLocStr = 1452 OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column); 1453 } 1454 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1455 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1456 Reserved2Flags); 1457 } 1458 1459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1460 SourceLocation Loc) { 1461 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1462 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1463 // the clang invariants used below might be broken. 1464 if (CGM.getLangOpts().OpenMPIRBuilder) { 1465 SmallString<128> Buffer; 1466 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1467 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1468 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1469 return OMPBuilder.getOrCreateThreadID( 1470 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1471 } 1472 1473 llvm::Value *ThreadID = nullptr; 1474 // Check whether we've already cached a load of the thread id in this 1475 // function. 1476 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1477 if (I != OpenMPLocThreadIDMap.end()) { 1478 ThreadID = I->second.ThreadID; 1479 if (ThreadID != nullptr) 1480 return ThreadID; 1481 } 1482 // If exceptions are enabled, do not use parameter to avoid possible crash. 1483 if (auto *OMPRegionInfo = 1484 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1485 if (OMPRegionInfo->getThreadIDVariable()) { 1486 // Check if this an outlined function with thread id passed as argument. 1487 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1488 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1489 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1490 !CGF.getLangOpts().CXXExceptions || 1491 CGF.Builder.GetInsertBlock() == TopBlock || 1492 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1493 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1494 TopBlock || 1495 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1496 CGF.Builder.GetInsertBlock()) { 1497 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1498 // If value loaded in entry block, cache it and use it everywhere in 1499 // function. 1500 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1501 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1502 Elem.second.ThreadID = ThreadID; 1503 } 1504 return ThreadID; 1505 } 1506 } 1507 } 1508 1509 // This is not an outlined function region - need to call __kmpc_int32 1510 // kmpc_global_thread_num(ident_t *loc). 1511 // Generate thread id value and cache this value for use across the 1512 // function. 1513 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1514 if (!Elem.second.ServiceInsertPt) 1515 setLocThreadIdInsertPt(CGF); 1516 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1517 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1518 llvm::CallInst *Call = CGF.Builder.CreateCall( 1519 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1520 OMPRTL___kmpc_global_thread_num), 1521 emitUpdateLocation(CGF, Loc)); 1522 Call->setCallingConv(CGF.getRuntimeCC()); 1523 Elem.second.ThreadID = Call; 1524 return Call; 1525 } 1526 1527 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1528 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1529 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1530 clearLocThreadIdInsertPt(CGF); 1531 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1532 } 1533 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1534 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1535 UDRMap.erase(D); 1536 FunctionUDRMap.erase(CGF.CurFn); 1537 } 1538 auto I = FunctionUDMMap.find(CGF.CurFn); 1539 if (I != FunctionUDMMap.end()) { 1540 for(const auto *D : I->second) 1541 UDMMap.erase(D); 1542 FunctionUDMMap.erase(I); 1543 } 1544 LastprivateConditionalToTypes.erase(CGF.CurFn); 1545 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1546 } 1547 1548 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1549 return OMPBuilder.IdentPtr; 1550 } 1551 1552 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1553 if (!Kmpc_MicroTy) { 1554 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1555 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1556 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1557 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1558 } 1559 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1560 } 1561 1562 llvm::FunctionCallee 1563 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1564 bool IsGPUDistribute) { 1565 assert((IVSize == 32 || IVSize == 64) && 1566 "IV size is not compatible with the omp runtime"); 1567 StringRef Name; 1568 if (IsGPUDistribute) 1569 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1570 : "__kmpc_distribute_static_init_4u") 1571 : (IVSigned ? "__kmpc_distribute_static_init_8" 1572 : "__kmpc_distribute_static_init_8u"); 1573 else 1574 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1575 : "__kmpc_for_static_init_4u") 1576 : (IVSigned ? "__kmpc_for_static_init_8" 1577 : "__kmpc_for_static_init_8u"); 1578 1579 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1580 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1581 llvm::Type *TypeParams[] = { 1582 getIdentTyPointerTy(), // loc 1583 CGM.Int32Ty, // tid 1584 CGM.Int32Ty, // schedtype 1585 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1586 PtrTy, // p_lower 1587 PtrTy, // p_upper 1588 PtrTy, // p_stride 1589 ITy, // incr 1590 ITy // chunk 1591 }; 1592 auto *FnTy = 1593 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1594 return CGM.CreateRuntimeFunction(FnTy, Name); 1595 } 1596 1597 llvm::FunctionCallee 1598 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1599 assert((IVSize == 32 || IVSize == 64) && 1600 "IV size is not compatible with the omp runtime"); 1601 StringRef Name = 1602 IVSize == 32 1603 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1604 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1605 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1606 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1607 CGM.Int32Ty, // tid 1608 CGM.Int32Ty, // schedtype 1609 ITy, // lower 1610 ITy, // upper 1611 ITy, // stride 1612 ITy // chunk 1613 }; 1614 auto *FnTy = 1615 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1616 return CGM.CreateRuntimeFunction(FnTy, Name); 1617 } 1618 1619 llvm::FunctionCallee 1620 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1621 assert((IVSize == 32 || IVSize == 64) && 1622 "IV size is not compatible with the omp runtime"); 1623 StringRef Name = 1624 IVSize == 32 1625 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1626 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1627 llvm::Type *TypeParams[] = { 1628 getIdentTyPointerTy(), // loc 1629 CGM.Int32Ty, // tid 1630 }; 1631 auto *FnTy = 1632 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1633 return CGM.CreateRuntimeFunction(FnTy, Name); 1634 } 1635 1636 llvm::FunctionCallee 1637 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1638 assert((IVSize == 32 || IVSize == 64) && 1639 "IV size is not compatible with the omp runtime"); 1640 StringRef Name = 1641 IVSize == 32 1642 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1643 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1644 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1645 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1646 llvm::Type *TypeParams[] = { 1647 getIdentTyPointerTy(), // loc 1648 CGM.Int32Ty, // tid 1649 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1650 PtrTy, // p_lower 1651 PtrTy, // p_upper 1652 PtrTy // p_stride 1653 }; 1654 auto *FnTy = 1655 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1656 return CGM.CreateRuntimeFunction(FnTy, Name); 1657 } 1658 1659 /// Obtain information that uniquely identifies a target entry. This 1660 /// consists of the file and device IDs as well as line number associated with 1661 /// the relevant entry source location. 1662 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1663 unsigned &DeviceID, unsigned &FileID, 1664 unsigned &LineNum) { 1665 SourceManager &SM = C.getSourceManager(); 1666 1667 // The loc should be always valid and have a file ID (the user cannot use 1668 // #pragma directives in macros) 1669 1670 assert(Loc.isValid() && "Source location is expected to be always valid."); 1671 1672 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1673 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1674 1675 llvm::sys::fs::UniqueID ID; 1676 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1677 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1678 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1679 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1680 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1681 << PLoc.getFilename() << EC.message(); 1682 } 1683 1684 DeviceID = ID.getDevice(); 1685 FileID = ID.getFile(); 1686 LineNum = PLoc.getLine(); 1687 } 1688 1689 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1690 if (CGM.getLangOpts().OpenMPSimd) 1691 return Address::invalid(); 1692 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1693 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1694 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1695 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1696 HasRequiresUnifiedSharedMemory))) { 1697 SmallString<64> PtrName; 1698 { 1699 llvm::raw_svector_ostream OS(PtrName); 1700 OS << CGM.getMangledName(GlobalDecl(VD)); 1701 if (!VD->isExternallyVisible()) { 1702 unsigned DeviceID, FileID, Line; 1703 getTargetEntryUniqueInfo(CGM.getContext(), 1704 VD->getCanonicalDecl()->getBeginLoc(), 1705 DeviceID, FileID, Line); 1706 OS << llvm::format("_%x", FileID); 1707 } 1708 OS << "_decl_tgt_ref_ptr"; 1709 } 1710 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1711 if (!Ptr) { 1712 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1713 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1714 PtrName); 1715 1716 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1717 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1718 1719 if (!CGM.getLangOpts().OpenMPIsDevice) 1720 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1721 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1722 } 1723 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1724 } 1725 return Address::invalid(); 1726 } 1727 1728 llvm::Constant * 1729 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1730 assert(!CGM.getLangOpts().OpenMPUseTLS || 1731 !CGM.getContext().getTargetInfo().isTLSSupported()); 1732 // Lookup the entry, lazily creating it if necessary. 1733 std::string Suffix = getName({"cache", ""}); 1734 return getOrCreateInternalVariable( 1735 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1736 } 1737 1738 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1739 const VarDecl *VD, 1740 Address VDAddr, 1741 SourceLocation Loc) { 1742 if (CGM.getLangOpts().OpenMPUseTLS && 1743 CGM.getContext().getTargetInfo().isTLSSupported()) 1744 return VDAddr; 1745 1746 llvm::Type *VarTy = VDAddr.getElementType(); 1747 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1748 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1749 CGM.Int8PtrTy), 1750 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1751 getOrCreateThreadPrivateCache(VD)}; 1752 return Address(CGF.EmitRuntimeCall( 1753 OMPBuilder.getOrCreateRuntimeFunction( 1754 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1755 Args), 1756 VDAddr.getAlignment()); 1757 } 1758 1759 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1760 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1761 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1762 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1763 // library. 1764 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1765 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1766 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1767 OMPLoc); 1768 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1769 // to register constructor/destructor for variable. 1770 llvm::Value *Args[] = { 1771 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1772 Ctor, CopyCtor, Dtor}; 1773 CGF.EmitRuntimeCall( 1774 OMPBuilder.getOrCreateRuntimeFunction( 1775 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1776 Args); 1777 } 1778 1779 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1780 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1781 bool PerformInit, CodeGenFunction *CGF) { 1782 if (CGM.getLangOpts().OpenMPUseTLS && 1783 CGM.getContext().getTargetInfo().isTLSSupported()) 1784 return nullptr; 1785 1786 VD = VD->getDefinition(CGM.getContext()); 1787 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1788 QualType ASTTy = VD->getType(); 1789 1790 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1791 const Expr *Init = VD->getAnyInitializer(); 1792 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1793 // Generate function that re-emits the declaration's initializer into the 1794 // threadprivate copy of the variable VD 1795 CodeGenFunction CtorCGF(CGM); 1796 FunctionArgList Args; 1797 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1798 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1799 ImplicitParamDecl::Other); 1800 Args.push_back(&Dst); 1801 1802 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1803 CGM.getContext().VoidPtrTy, Args); 1804 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1805 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1806 llvm::Function *Fn = 1807 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1808 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1809 Args, Loc, Loc); 1810 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1811 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1812 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1813 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1814 Arg = CtorCGF.Builder.CreateElementBitCast( 1815 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1816 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1817 /*IsInitializer=*/true); 1818 ArgVal = CtorCGF.EmitLoadOfScalar( 1819 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1820 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1821 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1822 CtorCGF.FinishFunction(); 1823 Ctor = Fn; 1824 } 1825 if (VD->getType().isDestructedType() != QualType::DK_none) { 1826 // Generate function that emits destructor call for the threadprivate copy 1827 // of the variable VD 1828 CodeGenFunction DtorCGF(CGM); 1829 FunctionArgList Args; 1830 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1831 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1832 ImplicitParamDecl::Other); 1833 Args.push_back(&Dst); 1834 1835 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1836 CGM.getContext().VoidTy, Args); 1837 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1838 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1839 llvm::Function *Fn = 1840 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1841 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1842 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1843 Loc, Loc); 1844 // Create a scope with an artificial location for the body of this function. 1845 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1846 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1847 DtorCGF.GetAddrOfLocalVar(&Dst), 1848 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1849 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1850 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1851 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1852 DtorCGF.FinishFunction(); 1853 Dtor = Fn; 1854 } 1855 // Do not emit init function if it is not required. 1856 if (!Ctor && !Dtor) 1857 return nullptr; 1858 1859 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1860 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1861 /*isVarArg=*/false) 1862 ->getPointerTo(); 1863 // Copying constructor for the threadprivate variable. 1864 // Must be NULL - reserved by runtime, but currently it requires that this 1865 // parameter is always NULL. Otherwise it fires assertion. 1866 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1867 if (Ctor == nullptr) { 1868 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1869 /*isVarArg=*/false) 1870 ->getPointerTo(); 1871 Ctor = llvm::Constant::getNullValue(CtorTy); 1872 } 1873 if (Dtor == nullptr) { 1874 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1875 /*isVarArg=*/false) 1876 ->getPointerTo(); 1877 Dtor = llvm::Constant::getNullValue(DtorTy); 1878 } 1879 if (!CGF) { 1880 auto *InitFunctionTy = 1881 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1882 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1883 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1884 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1885 CodeGenFunction InitCGF(CGM); 1886 FunctionArgList ArgList; 1887 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1888 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1889 Loc, Loc); 1890 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1891 InitCGF.FinishFunction(); 1892 return InitFunction; 1893 } 1894 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1895 } 1896 return nullptr; 1897 } 1898 1899 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1900 llvm::GlobalVariable *Addr, 1901 bool PerformInit) { 1902 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1903 !CGM.getLangOpts().OpenMPIsDevice) 1904 return false; 1905 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1906 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1907 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1908 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1909 HasRequiresUnifiedSharedMemory)) 1910 return CGM.getLangOpts().OpenMPIsDevice; 1911 VD = VD->getDefinition(CGM.getContext()); 1912 assert(VD && "Unknown VarDecl"); 1913 1914 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1915 return CGM.getLangOpts().OpenMPIsDevice; 1916 1917 QualType ASTTy = VD->getType(); 1918 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1919 1920 // Produce the unique prefix to identify the new target regions. We use 1921 // the source location of the variable declaration which we know to not 1922 // conflict with any target region. 1923 unsigned DeviceID; 1924 unsigned FileID; 1925 unsigned Line; 1926 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1927 SmallString<128> Buffer, Out; 1928 { 1929 llvm::raw_svector_ostream OS(Buffer); 1930 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1931 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1932 } 1933 1934 const Expr *Init = VD->getAnyInitializer(); 1935 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1936 llvm::Constant *Ctor; 1937 llvm::Constant *ID; 1938 if (CGM.getLangOpts().OpenMPIsDevice) { 1939 // Generate function that re-emits the declaration's initializer into 1940 // the threadprivate copy of the variable VD 1941 CodeGenFunction CtorCGF(CGM); 1942 1943 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1944 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1945 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1946 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1947 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1948 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1949 FunctionArgList(), Loc, Loc); 1950 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1951 CtorCGF.EmitAnyExprToMem(Init, 1952 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1953 Init->getType().getQualifiers(), 1954 /*IsInitializer=*/true); 1955 CtorCGF.FinishFunction(); 1956 Ctor = Fn; 1957 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1958 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1959 } else { 1960 Ctor = new llvm::GlobalVariable( 1961 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1962 llvm::GlobalValue::PrivateLinkage, 1963 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1964 ID = Ctor; 1965 } 1966 1967 // Register the information for the entry associated with the constructor. 1968 Out.clear(); 1969 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1970 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1971 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1972 } 1973 if (VD->getType().isDestructedType() != QualType::DK_none) { 1974 llvm::Constant *Dtor; 1975 llvm::Constant *ID; 1976 if (CGM.getLangOpts().OpenMPIsDevice) { 1977 // Generate function that emits destructor call for the threadprivate 1978 // copy of the variable VD 1979 CodeGenFunction DtorCGF(CGM); 1980 1981 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1982 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1983 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1984 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1985 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1986 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1987 FunctionArgList(), Loc, Loc); 1988 // Create a scope with an artificial location for the body of this 1989 // function. 1990 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1991 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1992 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1993 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1994 DtorCGF.FinishFunction(); 1995 Dtor = Fn; 1996 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1997 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1998 } else { 1999 Dtor = new llvm::GlobalVariable( 2000 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2001 llvm::GlobalValue::PrivateLinkage, 2002 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2003 ID = Dtor; 2004 } 2005 // Register the information for the entry associated with the destructor. 2006 Out.clear(); 2007 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2008 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2009 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2010 } 2011 return CGM.getLangOpts().OpenMPIsDevice; 2012 } 2013 2014 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2015 QualType VarType, 2016 StringRef Name) { 2017 std::string Suffix = getName({"artificial", ""}); 2018 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2019 llvm::Value *GAddr = 2020 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2021 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2022 CGM.getTarget().isTLSSupported()) { 2023 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2024 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2025 } 2026 std::string CacheSuffix = getName({"cache", ""}); 2027 llvm::Value *Args[] = { 2028 emitUpdateLocation(CGF, SourceLocation()), 2029 getThreadID(CGF, SourceLocation()), 2030 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2031 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2032 /*isSigned=*/false), 2033 getOrCreateInternalVariable( 2034 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2035 return Address( 2036 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2037 CGF.EmitRuntimeCall( 2038 OMPBuilder.getOrCreateRuntimeFunction( 2039 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2040 Args), 2041 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2042 CGM.getContext().getTypeAlignInChars(VarType)); 2043 } 2044 2045 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2046 const RegionCodeGenTy &ThenGen, 2047 const RegionCodeGenTy &ElseGen) { 2048 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2049 2050 // If the condition constant folds and can be elided, try to avoid emitting 2051 // the condition and the dead arm of the if/else. 2052 bool CondConstant; 2053 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2054 if (CondConstant) 2055 ThenGen(CGF); 2056 else 2057 ElseGen(CGF); 2058 return; 2059 } 2060 2061 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2062 // emit the conditional branch. 2063 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2064 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2065 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2066 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2067 2068 // Emit the 'then' code. 2069 CGF.EmitBlock(ThenBlock); 2070 ThenGen(CGF); 2071 CGF.EmitBranch(ContBlock); 2072 // Emit the 'else' code if present. 2073 // There is no need to emit line number for unconditional branch. 2074 (void)ApplyDebugLocation::CreateEmpty(CGF); 2075 CGF.EmitBlock(ElseBlock); 2076 ElseGen(CGF); 2077 // There is no need to emit line number for unconditional branch. 2078 (void)ApplyDebugLocation::CreateEmpty(CGF); 2079 CGF.EmitBranch(ContBlock); 2080 // Emit the continuation block for code after the if. 2081 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2082 } 2083 2084 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2085 llvm::Function *OutlinedFn, 2086 ArrayRef<llvm::Value *> CapturedVars, 2087 const Expr *IfCond) { 2088 if (!CGF.HaveInsertPoint()) 2089 return; 2090 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2091 auto &M = CGM.getModule(); 2092 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2093 this](CodeGenFunction &CGF, PrePostActionTy &) { 2094 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2095 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2096 llvm::Value *Args[] = { 2097 RTLoc, 2098 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2099 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2100 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2101 RealArgs.append(std::begin(Args), std::end(Args)); 2102 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2103 2104 llvm::FunctionCallee RTLFn = 2105 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2106 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2107 }; 2108 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2109 this](CodeGenFunction &CGF, PrePostActionTy &) { 2110 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2111 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2112 // Build calls: 2113 // __kmpc_serialized_parallel(&Loc, GTid); 2114 llvm::Value *Args[] = {RTLoc, ThreadID}; 2115 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2116 M, OMPRTL___kmpc_serialized_parallel), 2117 Args); 2118 2119 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2120 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2121 Address ZeroAddrBound = 2122 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2123 /*Name=*/".bound.zero.addr"); 2124 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2125 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2126 // ThreadId for serialized parallels is 0. 2127 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2128 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2129 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2130 2131 // Ensure we do not inline the function. This is trivially true for the ones 2132 // passed to __kmpc_fork_call but the ones called in serialized regions 2133 // could be inlined. This is not a perfect but it is closer to the invariant 2134 // we want, namely, every data environment starts with a new function. 2135 // TODO: We should pass the if condition to the runtime function and do the 2136 // handling there. Much cleaner code. 2137 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2138 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2139 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2140 2141 // __kmpc_end_serialized_parallel(&Loc, GTid); 2142 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2143 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2144 M, OMPRTL___kmpc_end_serialized_parallel), 2145 EndArgs); 2146 }; 2147 if (IfCond) { 2148 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2149 } else { 2150 RegionCodeGenTy ThenRCG(ThenGen); 2151 ThenRCG(CGF); 2152 } 2153 } 2154 2155 // If we're inside an (outlined) parallel region, use the region info's 2156 // thread-ID variable (it is passed in a first argument of the outlined function 2157 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2158 // regular serial code region, get thread ID by calling kmp_int32 2159 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2160 // return the address of that temp. 2161 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2162 SourceLocation Loc) { 2163 if (auto *OMPRegionInfo = 2164 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2165 if (OMPRegionInfo->getThreadIDVariable()) 2166 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2167 2168 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2169 QualType Int32Ty = 2170 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2171 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2172 CGF.EmitStoreOfScalar(ThreadID, 2173 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2174 2175 return ThreadIDTemp; 2176 } 2177 2178 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2179 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2180 SmallString<256> Buffer; 2181 llvm::raw_svector_ostream Out(Buffer); 2182 Out << Name; 2183 StringRef RuntimeName = Out.str(); 2184 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2185 if (Elem.second) { 2186 assert(Elem.second->getType()->getPointerElementType() == Ty && 2187 "OMP internal variable has different type than requested"); 2188 return &*Elem.second; 2189 } 2190 2191 return Elem.second = new llvm::GlobalVariable( 2192 CGM.getModule(), Ty, /*IsConstant*/ false, 2193 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2194 Elem.first(), /*InsertBefore=*/nullptr, 2195 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2196 } 2197 2198 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2199 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2200 std::string Name = getName({Prefix, "var"}); 2201 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2202 } 2203 2204 namespace { 2205 /// Common pre(post)-action for different OpenMP constructs. 2206 class CommonActionTy final : public PrePostActionTy { 2207 llvm::FunctionCallee EnterCallee; 2208 ArrayRef<llvm::Value *> EnterArgs; 2209 llvm::FunctionCallee ExitCallee; 2210 ArrayRef<llvm::Value *> ExitArgs; 2211 bool Conditional; 2212 llvm::BasicBlock *ContBlock = nullptr; 2213 2214 public: 2215 CommonActionTy(llvm::FunctionCallee EnterCallee, 2216 ArrayRef<llvm::Value *> EnterArgs, 2217 llvm::FunctionCallee ExitCallee, 2218 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2219 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2220 ExitArgs(ExitArgs), Conditional(Conditional) {} 2221 void Enter(CodeGenFunction &CGF) override { 2222 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2223 if (Conditional) { 2224 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2225 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2226 ContBlock = CGF.createBasicBlock("omp_if.end"); 2227 // Generate the branch (If-stmt) 2228 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2229 CGF.EmitBlock(ThenBlock); 2230 } 2231 } 2232 void Done(CodeGenFunction &CGF) { 2233 // Emit the rest of blocks/branches 2234 CGF.EmitBranch(ContBlock); 2235 CGF.EmitBlock(ContBlock, true); 2236 } 2237 void Exit(CodeGenFunction &CGF) override { 2238 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2239 } 2240 }; 2241 } // anonymous namespace 2242 2243 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2244 StringRef CriticalName, 2245 const RegionCodeGenTy &CriticalOpGen, 2246 SourceLocation Loc, const Expr *Hint) { 2247 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2248 // CriticalOpGen(); 2249 // __kmpc_end_critical(ident_t *, gtid, Lock); 2250 // Prepare arguments and build a call to __kmpc_critical 2251 if (!CGF.HaveInsertPoint()) 2252 return; 2253 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2254 getCriticalRegionLock(CriticalName)}; 2255 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2256 std::end(Args)); 2257 if (Hint) { 2258 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2259 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2260 } 2261 CommonActionTy Action( 2262 OMPBuilder.getOrCreateRuntimeFunction( 2263 CGM.getModule(), 2264 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2265 EnterArgs, 2266 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2267 OMPRTL___kmpc_end_critical), 2268 Args); 2269 CriticalOpGen.setAction(Action); 2270 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2271 } 2272 2273 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2274 const RegionCodeGenTy &MasterOpGen, 2275 SourceLocation Loc) { 2276 if (!CGF.HaveInsertPoint()) 2277 return; 2278 // if(__kmpc_master(ident_t *, gtid)) { 2279 // MasterOpGen(); 2280 // __kmpc_end_master(ident_t *, gtid); 2281 // } 2282 // Prepare arguments and build a call to __kmpc_master 2283 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2284 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2285 CGM.getModule(), OMPRTL___kmpc_master), 2286 Args, 2287 OMPBuilder.getOrCreateRuntimeFunction( 2288 CGM.getModule(), OMPRTL___kmpc_end_master), 2289 Args, 2290 /*Conditional=*/true); 2291 MasterOpGen.setAction(Action); 2292 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2293 Action.Done(CGF); 2294 } 2295 2296 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2297 const RegionCodeGenTy &MaskedOpGen, 2298 SourceLocation Loc, const Expr *Filter) { 2299 if (!CGF.HaveInsertPoint()) 2300 return; 2301 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2302 // MaskedOpGen(); 2303 // __kmpc_end_masked(iden_t *, gtid); 2304 // } 2305 // Prepare arguments and build a call to __kmpc_masked 2306 llvm::Value *FilterVal = Filter 2307 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2308 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2309 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2310 FilterVal}; 2311 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2312 getThreadID(CGF, Loc)}; 2313 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2314 CGM.getModule(), OMPRTL___kmpc_masked), 2315 Args, 2316 OMPBuilder.getOrCreateRuntimeFunction( 2317 CGM.getModule(), OMPRTL___kmpc_end_masked), 2318 ArgsEnd, 2319 /*Conditional=*/true); 2320 MaskedOpGen.setAction(Action); 2321 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2322 Action.Done(CGF); 2323 } 2324 2325 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2326 SourceLocation Loc) { 2327 if (!CGF.HaveInsertPoint()) 2328 return; 2329 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2330 OMPBuilder.createTaskyield(CGF.Builder); 2331 } else { 2332 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2333 llvm::Value *Args[] = { 2334 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2335 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2336 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2337 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2338 Args); 2339 } 2340 2341 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2342 Region->emitUntiedSwitch(CGF); 2343 } 2344 2345 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2346 const RegionCodeGenTy &TaskgroupOpGen, 2347 SourceLocation Loc) { 2348 if (!CGF.HaveInsertPoint()) 2349 return; 2350 // __kmpc_taskgroup(ident_t *, gtid); 2351 // TaskgroupOpGen(); 2352 // __kmpc_end_taskgroup(ident_t *, gtid); 2353 // Prepare arguments and build a call to __kmpc_taskgroup 2354 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2355 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2356 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2357 Args, 2358 OMPBuilder.getOrCreateRuntimeFunction( 2359 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2360 Args); 2361 TaskgroupOpGen.setAction(Action); 2362 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2363 } 2364 2365 /// Given an array of pointers to variables, project the address of a 2366 /// given variable. 2367 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2368 unsigned Index, const VarDecl *Var) { 2369 // Pull out the pointer to the variable. 2370 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2371 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2372 2373 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2374 Addr = CGF.Builder.CreateElementBitCast( 2375 Addr, CGF.ConvertTypeForMem(Var->getType())); 2376 return Addr; 2377 } 2378 2379 static llvm::Value *emitCopyprivateCopyFunction( 2380 CodeGenModule &CGM, llvm::Type *ArgsType, 2381 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2382 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2383 SourceLocation Loc) { 2384 ASTContext &C = CGM.getContext(); 2385 // void copy_func(void *LHSArg, void *RHSArg); 2386 FunctionArgList Args; 2387 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2388 ImplicitParamDecl::Other); 2389 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2390 ImplicitParamDecl::Other); 2391 Args.push_back(&LHSArg); 2392 Args.push_back(&RHSArg); 2393 const auto &CGFI = 2394 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2395 std::string Name = 2396 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2397 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2398 llvm::GlobalValue::InternalLinkage, Name, 2399 &CGM.getModule()); 2400 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2401 Fn->setDoesNotRecurse(); 2402 CodeGenFunction CGF(CGM); 2403 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2404 // Dest = (void*[n])(LHSArg); 2405 // Src = (void*[n])(RHSArg); 2406 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2407 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2408 ArgsType), CGF.getPointerAlign()); 2409 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2410 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2411 ArgsType), CGF.getPointerAlign()); 2412 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2413 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2414 // ... 2415 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2416 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2417 const auto *DestVar = 2418 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2419 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2420 2421 const auto *SrcVar = 2422 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2423 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2424 2425 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2426 QualType Type = VD->getType(); 2427 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2428 } 2429 CGF.FinishFunction(); 2430 return Fn; 2431 } 2432 2433 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2434 const RegionCodeGenTy &SingleOpGen, 2435 SourceLocation Loc, 2436 ArrayRef<const Expr *> CopyprivateVars, 2437 ArrayRef<const Expr *> SrcExprs, 2438 ArrayRef<const Expr *> DstExprs, 2439 ArrayRef<const Expr *> AssignmentOps) { 2440 if (!CGF.HaveInsertPoint()) 2441 return; 2442 assert(CopyprivateVars.size() == SrcExprs.size() && 2443 CopyprivateVars.size() == DstExprs.size() && 2444 CopyprivateVars.size() == AssignmentOps.size()); 2445 ASTContext &C = CGM.getContext(); 2446 // int32 did_it = 0; 2447 // if(__kmpc_single(ident_t *, gtid)) { 2448 // SingleOpGen(); 2449 // __kmpc_end_single(ident_t *, gtid); 2450 // did_it = 1; 2451 // } 2452 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2453 // <copy_func>, did_it); 2454 2455 Address DidIt = Address::invalid(); 2456 if (!CopyprivateVars.empty()) { 2457 // int32 did_it = 0; 2458 QualType KmpInt32Ty = 2459 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2460 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2461 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2462 } 2463 // Prepare arguments and build a call to __kmpc_single 2464 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2465 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_single), 2467 Args, 2468 OMPBuilder.getOrCreateRuntimeFunction( 2469 CGM.getModule(), OMPRTL___kmpc_end_single), 2470 Args, 2471 /*Conditional=*/true); 2472 SingleOpGen.setAction(Action); 2473 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2474 if (DidIt.isValid()) { 2475 // did_it = 1; 2476 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2477 } 2478 Action.Done(CGF); 2479 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2480 // <copy_func>, did_it); 2481 if (DidIt.isValid()) { 2482 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2483 QualType CopyprivateArrayTy = C.getConstantArrayType( 2484 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2485 /*IndexTypeQuals=*/0); 2486 // Create a list of all private variables for copyprivate. 2487 Address CopyprivateList = 2488 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2489 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2490 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2491 CGF.Builder.CreateStore( 2492 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2493 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2494 CGF.VoidPtrTy), 2495 Elem); 2496 } 2497 // Build function that copies private values from single region to all other 2498 // threads in the corresponding parallel region. 2499 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2500 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2501 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2502 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2503 Address CL = 2504 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2505 CGF.VoidPtrTy); 2506 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2507 llvm::Value *Args[] = { 2508 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2509 getThreadID(CGF, Loc), // i32 <gtid> 2510 BufSize, // size_t <buf_size> 2511 CL.getPointer(), // void *<copyprivate list> 2512 CpyFn, // void (*) (void *, void *) <copy_func> 2513 DidItVal // i32 did_it 2514 }; 2515 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2516 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2517 Args); 2518 } 2519 } 2520 2521 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2522 const RegionCodeGenTy &OrderedOpGen, 2523 SourceLocation Loc, bool IsThreads) { 2524 if (!CGF.HaveInsertPoint()) 2525 return; 2526 // __kmpc_ordered(ident_t *, gtid); 2527 // OrderedOpGen(); 2528 // __kmpc_end_ordered(ident_t *, gtid); 2529 // Prepare arguments and build a call to __kmpc_ordered 2530 if (IsThreads) { 2531 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2532 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2533 CGM.getModule(), OMPRTL___kmpc_ordered), 2534 Args, 2535 OMPBuilder.getOrCreateRuntimeFunction( 2536 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2537 Args); 2538 OrderedOpGen.setAction(Action); 2539 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2540 return; 2541 } 2542 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2543 } 2544 2545 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2546 unsigned Flags; 2547 if (Kind == OMPD_for) 2548 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2549 else if (Kind == OMPD_sections) 2550 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2551 else if (Kind == OMPD_single) 2552 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2553 else if (Kind == OMPD_barrier) 2554 Flags = OMP_IDENT_BARRIER_EXPL; 2555 else 2556 Flags = OMP_IDENT_BARRIER_IMPL; 2557 return Flags; 2558 } 2559 2560 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2561 CodeGenFunction &CGF, const OMPLoopDirective &S, 2562 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2563 // Check if the loop directive is actually a doacross loop directive. In this 2564 // case choose static, 1 schedule. 2565 if (llvm::any_of( 2566 S.getClausesOfKind<OMPOrderedClause>(), 2567 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2568 ScheduleKind = OMPC_SCHEDULE_static; 2569 // Chunk size is 1 in this case. 2570 llvm::APInt ChunkSize(32, 1); 2571 ChunkExpr = IntegerLiteral::Create( 2572 CGF.getContext(), ChunkSize, 2573 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2574 SourceLocation()); 2575 } 2576 } 2577 2578 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2579 OpenMPDirectiveKind Kind, bool EmitChecks, 2580 bool ForceSimpleCall) { 2581 // Check if we should use the OMPBuilder 2582 auto *OMPRegionInfo = 2583 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2584 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2585 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2586 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2587 return; 2588 } 2589 2590 if (!CGF.HaveInsertPoint()) 2591 return; 2592 // Build call __kmpc_cancel_barrier(loc, thread_id); 2593 // Build call __kmpc_barrier(loc, thread_id); 2594 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2595 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2596 // thread_id); 2597 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2598 getThreadID(CGF, Loc)}; 2599 if (OMPRegionInfo) { 2600 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2601 llvm::Value *Result = CGF.EmitRuntimeCall( 2602 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2603 OMPRTL___kmpc_cancel_barrier), 2604 Args); 2605 if (EmitChecks) { 2606 // if (__kmpc_cancel_barrier()) { 2607 // exit from construct; 2608 // } 2609 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2610 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2611 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2612 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2613 CGF.EmitBlock(ExitBB); 2614 // exit from construct; 2615 CodeGenFunction::JumpDest CancelDestination = 2616 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2617 CGF.EmitBranchThroughCleanup(CancelDestination); 2618 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2619 } 2620 return; 2621 } 2622 } 2623 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2624 CGM.getModule(), OMPRTL___kmpc_barrier), 2625 Args); 2626 } 2627 2628 /// Map the OpenMP loop schedule to the runtime enumeration. 2629 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2630 bool Chunked, bool Ordered) { 2631 switch (ScheduleKind) { 2632 case OMPC_SCHEDULE_static: 2633 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2634 : (Ordered ? OMP_ord_static : OMP_sch_static); 2635 case OMPC_SCHEDULE_dynamic: 2636 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2637 case OMPC_SCHEDULE_guided: 2638 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2639 case OMPC_SCHEDULE_runtime: 2640 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2641 case OMPC_SCHEDULE_auto: 2642 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2643 case OMPC_SCHEDULE_unknown: 2644 assert(!Chunked && "chunk was specified but schedule kind not known"); 2645 return Ordered ? OMP_ord_static : OMP_sch_static; 2646 } 2647 llvm_unreachable("Unexpected runtime schedule"); 2648 } 2649 2650 /// Map the OpenMP distribute schedule to the runtime enumeration. 2651 static OpenMPSchedType 2652 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2653 // only static is allowed for dist_schedule 2654 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2655 } 2656 2657 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2658 bool Chunked) const { 2659 OpenMPSchedType Schedule = 2660 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2661 return Schedule == OMP_sch_static; 2662 } 2663 2664 bool CGOpenMPRuntime::isStaticNonchunked( 2665 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2666 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2667 return Schedule == OMP_dist_sch_static; 2668 } 2669 2670 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2671 bool Chunked) const { 2672 OpenMPSchedType Schedule = 2673 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2674 return Schedule == OMP_sch_static_chunked; 2675 } 2676 2677 bool CGOpenMPRuntime::isStaticChunked( 2678 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2679 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2680 return Schedule == OMP_dist_sch_static_chunked; 2681 } 2682 2683 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2684 OpenMPSchedType Schedule = 2685 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2686 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2687 return Schedule != OMP_sch_static; 2688 } 2689 2690 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2691 OpenMPScheduleClauseModifier M1, 2692 OpenMPScheduleClauseModifier M2) { 2693 int Modifier = 0; 2694 switch (M1) { 2695 case OMPC_SCHEDULE_MODIFIER_monotonic: 2696 Modifier = OMP_sch_modifier_monotonic; 2697 break; 2698 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2699 Modifier = OMP_sch_modifier_nonmonotonic; 2700 break; 2701 case OMPC_SCHEDULE_MODIFIER_simd: 2702 if (Schedule == OMP_sch_static_chunked) 2703 Schedule = OMP_sch_static_balanced_chunked; 2704 break; 2705 case OMPC_SCHEDULE_MODIFIER_last: 2706 case OMPC_SCHEDULE_MODIFIER_unknown: 2707 break; 2708 } 2709 switch (M2) { 2710 case OMPC_SCHEDULE_MODIFIER_monotonic: 2711 Modifier = OMP_sch_modifier_monotonic; 2712 break; 2713 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2714 Modifier = OMP_sch_modifier_nonmonotonic; 2715 break; 2716 case OMPC_SCHEDULE_MODIFIER_simd: 2717 if (Schedule == OMP_sch_static_chunked) 2718 Schedule = OMP_sch_static_balanced_chunked; 2719 break; 2720 case OMPC_SCHEDULE_MODIFIER_last: 2721 case OMPC_SCHEDULE_MODIFIER_unknown: 2722 break; 2723 } 2724 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2725 // If the static schedule kind is specified or if the ordered clause is 2726 // specified, and if the nonmonotonic modifier is not specified, the effect is 2727 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2728 // modifier is specified, the effect is as if the nonmonotonic modifier is 2729 // specified. 2730 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2731 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2732 Schedule == OMP_sch_static_balanced_chunked || 2733 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2734 Schedule == OMP_dist_sch_static_chunked || 2735 Schedule == OMP_dist_sch_static)) 2736 Modifier = OMP_sch_modifier_nonmonotonic; 2737 } 2738 return Schedule | Modifier; 2739 } 2740 2741 void CGOpenMPRuntime::emitForDispatchInit( 2742 CodeGenFunction &CGF, SourceLocation Loc, 2743 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2744 bool Ordered, const DispatchRTInput &DispatchValues) { 2745 if (!CGF.HaveInsertPoint()) 2746 return; 2747 OpenMPSchedType Schedule = getRuntimeSchedule( 2748 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2749 assert(Ordered || 2750 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2751 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2752 Schedule != OMP_sch_static_balanced_chunked)); 2753 // Call __kmpc_dispatch_init( 2754 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2755 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2756 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2757 2758 // If the Chunk was not specified in the clause - use default value 1. 2759 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2760 : CGF.Builder.getIntN(IVSize, 1); 2761 llvm::Value *Args[] = { 2762 emitUpdateLocation(CGF, Loc), 2763 getThreadID(CGF, Loc), 2764 CGF.Builder.getInt32(addMonoNonMonoModifier( 2765 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2766 DispatchValues.LB, // Lower 2767 DispatchValues.UB, // Upper 2768 CGF.Builder.getIntN(IVSize, 1), // Stride 2769 Chunk // Chunk 2770 }; 2771 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2772 } 2773 2774 static void emitForStaticInitCall( 2775 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2776 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2777 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2778 const CGOpenMPRuntime::StaticRTInput &Values) { 2779 if (!CGF.HaveInsertPoint()) 2780 return; 2781 2782 assert(!Values.Ordered); 2783 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2784 Schedule == OMP_sch_static_balanced_chunked || 2785 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2786 Schedule == OMP_dist_sch_static || 2787 Schedule == OMP_dist_sch_static_chunked); 2788 2789 // Call __kmpc_for_static_init( 2790 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2791 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2792 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2793 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2794 llvm::Value *Chunk = Values.Chunk; 2795 if (Chunk == nullptr) { 2796 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2797 Schedule == OMP_dist_sch_static) && 2798 "expected static non-chunked schedule"); 2799 // If the Chunk was not specified in the clause - use default value 1. 2800 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2801 } else { 2802 assert((Schedule == OMP_sch_static_chunked || 2803 Schedule == OMP_sch_static_balanced_chunked || 2804 Schedule == OMP_ord_static_chunked || 2805 Schedule == OMP_dist_sch_static_chunked) && 2806 "expected static chunked schedule"); 2807 } 2808 llvm::Value *Args[] = { 2809 UpdateLocation, 2810 ThreadId, 2811 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2812 M2)), // Schedule type 2813 Values.IL.getPointer(), // &isLastIter 2814 Values.LB.getPointer(), // &LB 2815 Values.UB.getPointer(), // &UB 2816 Values.ST.getPointer(), // &Stride 2817 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2818 Chunk // Chunk 2819 }; 2820 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2821 } 2822 2823 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2824 SourceLocation Loc, 2825 OpenMPDirectiveKind DKind, 2826 const OpenMPScheduleTy &ScheduleKind, 2827 const StaticRTInput &Values) { 2828 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2829 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2830 assert(isOpenMPWorksharingDirective(DKind) && 2831 "Expected loop-based or sections-based directive."); 2832 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2833 isOpenMPLoopDirective(DKind) 2834 ? OMP_IDENT_WORK_LOOP 2835 : OMP_IDENT_WORK_SECTIONS); 2836 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2837 llvm::FunctionCallee StaticInitFunction = 2838 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2839 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2840 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2841 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2842 } 2843 2844 void CGOpenMPRuntime::emitDistributeStaticInit( 2845 CodeGenFunction &CGF, SourceLocation Loc, 2846 OpenMPDistScheduleClauseKind SchedKind, 2847 const CGOpenMPRuntime::StaticRTInput &Values) { 2848 OpenMPSchedType ScheduleNum = 2849 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2850 llvm::Value *UpdatedLocation = 2851 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2852 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2853 llvm::FunctionCallee StaticInitFunction; 2854 bool isGPUDistribute = 2855 CGM.getLangOpts().OpenMPIsDevice && 2856 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2857 StaticInitFunction = createForStaticInitFunction( 2858 Values.IVSize, Values.IVSigned, isGPUDistribute); 2859 2860 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2861 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2862 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2863 } 2864 2865 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2866 SourceLocation Loc, 2867 OpenMPDirectiveKind DKind) { 2868 if (!CGF.HaveInsertPoint()) 2869 return; 2870 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2871 llvm::Value *Args[] = { 2872 emitUpdateLocation(CGF, Loc, 2873 isOpenMPDistributeDirective(DKind) 2874 ? OMP_IDENT_WORK_DISTRIBUTE 2875 : isOpenMPLoopDirective(DKind) 2876 ? OMP_IDENT_WORK_LOOP 2877 : OMP_IDENT_WORK_SECTIONS), 2878 getThreadID(CGF, Loc)}; 2879 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2880 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2881 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2882 CGF.EmitRuntimeCall( 2883 OMPBuilder.getOrCreateRuntimeFunction( 2884 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2885 Args); 2886 else 2887 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2888 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2889 Args); 2890 } 2891 2892 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2893 SourceLocation Loc, 2894 unsigned IVSize, 2895 bool IVSigned) { 2896 if (!CGF.HaveInsertPoint()) 2897 return; 2898 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2899 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2900 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2901 } 2902 2903 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2904 SourceLocation Loc, unsigned IVSize, 2905 bool IVSigned, Address IL, 2906 Address LB, Address UB, 2907 Address ST) { 2908 // Call __kmpc_dispatch_next( 2909 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2910 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2911 // kmp_int[32|64] *p_stride); 2912 llvm::Value *Args[] = { 2913 emitUpdateLocation(CGF, Loc), 2914 getThreadID(CGF, Loc), 2915 IL.getPointer(), // &isLastIter 2916 LB.getPointer(), // &Lower 2917 UB.getPointer(), // &Upper 2918 ST.getPointer() // &Stride 2919 }; 2920 llvm::Value *Call = 2921 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2922 return CGF.EmitScalarConversion( 2923 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2924 CGF.getContext().BoolTy, Loc); 2925 } 2926 2927 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2928 llvm::Value *NumThreads, 2929 SourceLocation Loc) { 2930 if (!CGF.HaveInsertPoint()) 2931 return; 2932 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2933 llvm::Value *Args[] = { 2934 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2935 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2936 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2937 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2938 Args); 2939 } 2940 2941 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2942 ProcBindKind ProcBind, 2943 SourceLocation Loc) { 2944 if (!CGF.HaveInsertPoint()) 2945 return; 2946 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2947 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2948 llvm::Value *Args[] = { 2949 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2950 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2951 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2952 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2953 Args); 2954 } 2955 2956 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2957 SourceLocation Loc, llvm::AtomicOrdering AO) { 2958 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2959 OMPBuilder.createFlush(CGF.Builder); 2960 } else { 2961 if (!CGF.HaveInsertPoint()) 2962 return; 2963 // Build call void __kmpc_flush(ident_t *loc) 2964 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2965 CGM.getModule(), OMPRTL___kmpc_flush), 2966 emitUpdateLocation(CGF, Loc)); 2967 } 2968 } 2969 2970 namespace { 2971 /// Indexes of fields for type kmp_task_t. 2972 enum KmpTaskTFields { 2973 /// List of shared variables. 2974 KmpTaskTShareds, 2975 /// Task routine. 2976 KmpTaskTRoutine, 2977 /// Partition id for the untied tasks. 2978 KmpTaskTPartId, 2979 /// Function with call of destructors for private variables. 2980 Data1, 2981 /// Task priority. 2982 Data2, 2983 /// (Taskloops only) Lower bound. 2984 KmpTaskTLowerBound, 2985 /// (Taskloops only) Upper bound. 2986 KmpTaskTUpperBound, 2987 /// (Taskloops only) Stride. 2988 KmpTaskTStride, 2989 /// (Taskloops only) Is last iteration flag. 2990 KmpTaskTLastIter, 2991 /// (Taskloops only) Reduction data. 2992 KmpTaskTReductions, 2993 }; 2994 } // anonymous namespace 2995 2996 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2997 return OffloadEntriesTargetRegion.empty() && 2998 OffloadEntriesDeviceGlobalVar.empty(); 2999 } 3000 3001 /// Initialize target region entry. 3002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3003 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3004 StringRef ParentName, unsigned LineNum, 3005 unsigned Order) { 3006 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3007 "only required for the device " 3008 "code generation."); 3009 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3010 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3011 OMPTargetRegionEntryTargetRegion); 3012 ++OffloadingEntriesNum; 3013 } 3014 3015 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3016 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3017 StringRef ParentName, unsigned LineNum, 3018 llvm::Constant *Addr, llvm::Constant *ID, 3019 OMPTargetRegionEntryKind Flags) { 3020 // If we are emitting code for a target, the entry is already initialized, 3021 // only has to be registered. 3022 if (CGM.getLangOpts().OpenMPIsDevice) { 3023 // This could happen if the device compilation is invoked standalone. 3024 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3025 return; 3026 auto &Entry = 3027 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3028 Entry.setAddress(Addr); 3029 Entry.setID(ID); 3030 Entry.setFlags(Flags); 3031 } else { 3032 if (Flags == 3033 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3034 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3035 /*IgnoreAddressId*/ true)) 3036 return; 3037 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3038 "Target region entry already registered!"); 3039 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3040 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3041 ++OffloadingEntriesNum; 3042 } 3043 } 3044 3045 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3046 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3047 bool IgnoreAddressId) const { 3048 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3049 if (PerDevice == OffloadEntriesTargetRegion.end()) 3050 return false; 3051 auto PerFile = PerDevice->second.find(FileID); 3052 if (PerFile == PerDevice->second.end()) 3053 return false; 3054 auto PerParentName = PerFile->second.find(ParentName); 3055 if (PerParentName == PerFile->second.end()) 3056 return false; 3057 auto PerLine = PerParentName->second.find(LineNum); 3058 if (PerLine == PerParentName->second.end()) 3059 return false; 3060 // Fail if this entry is already registered. 3061 if (!IgnoreAddressId && 3062 (PerLine->second.getAddress() || PerLine->second.getID())) 3063 return false; 3064 return true; 3065 } 3066 3067 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3068 const OffloadTargetRegionEntryInfoActTy &Action) { 3069 // Scan all target region entries and perform the provided action. 3070 for (const auto &D : OffloadEntriesTargetRegion) 3071 for (const auto &F : D.second) 3072 for (const auto &P : F.second) 3073 for (const auto &L : P.second) 3074 Action(D.first, F.first, P.first(), L.first, L.second); 3075 } 3076 3077 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3078 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3079 OMPTargetGlobalVarEntryKind Flags, 3080 unsigned Order) { 3081 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3082 "only required for the device " 3083 "code generation."); 3084 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3085 ++OffloadingEntriesNum; 3086 } 3087 3088 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3089 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3090 CharUnits VarSize, 3091 OMPTargetGlobalVarEntryKind Flags, 3092 llvm::GlobalValue::LinkageTypes Linkage) { 3093 if (CGM.getLangOpts().OpenMPIsDevice) { 3094 // This could happen if the device compilation is invoked standalone. 3095 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3096 return; 3097 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3098 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3099 if (Entry.getVarSize().isZero()) { 3100 Entry.setVarSize(VarSize); 3101 Entry.setLinkage(Linkage); 3102 } 3103 return; 3104 } 3105 Entry.setVarSize(VarSize); 3106 Entry.setLinkage(Linkage); 3107 Entry.setAddress(Addr); 3108 } else { 3109 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3110 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3111 assert(Entry.isValid() && Entry.getFlags() == Flags && 3112 "Entry not initialized!"); 3113 if (Entry.getVarSize().isZero()) { 3114 Entry.setVarSize(VarSize); 3115 Entry.setLinkage(Linkage); 3116 } 3117 return; 3118 } 3119 OffloadEntriesDeviceGlobalVar.try_emplace( 3120 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3121 ++OffloadingEntriesNum; 3122 } 3123 } 3124 3125 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3126 actOnDeviceGlobalVarEntriesInfo( 3127 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3128 // Scan all target region entries and perform the provided action. 3129 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3130 Action(E.getKey(), E.getValue()); 3131 } 3132 3133 void CGOpenMPRuntime::createOffloadEntry( 3134 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3135 llvm::GlobalValue::LinkageTypes Linkage) { 3136 StringRef Name = Addr->getName(); 3137 llvm::Module &M = CGM.getModule(); 3138 llvm::LLVMContext &C = M.getContext(); 3139 3140 // Create constant string with the name. 3141 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3142 3143 std::string StringName = getName({"omp_offloading", "entry_name"}); 3144 auto *Str = new llvm::GlobalVariable( 3145 M, StrPtrInit->getType(), /*isConstant=*/true, 3146 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3147 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3148 3149 llvm::Constant *Data[] = { 3150 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3151 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3152 llvm::ConstantInt::get(CGM.SizeTy, Size), 3153 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3154 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3155 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3156 llvm::GlobalVariable *Entry = createGlobalStruct( 3157 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3158 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3159 3160 // The entry has to be created in the section the linker expects it to be. 3161 Entry->setSection("omp_offloading_entries"); 3162 } 3163 3164 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3165 // Emit the offloading entries and metadata so that the device codegen side 3166 // can easily figure out what to emit. The produced metadata looks like 3167 // this: 3168 // 3169 // !omp_offload.info = !{!1, ...} 3170 // 3171 // Right now we only generate metadata for function that contain target 3172 // regions. 3173 3174 // If we are in simd mode or there are no entries, we don't need to do 3175 // anything. 3176 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3177 return; 3178 3179 llvm::Module &M = CGM.getModule(); 3180 llvm::LLVMContext &C = M.getContext(); 3181 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3182 SourceLocation, StringRef>, 3183 16> 3184 OrderedEntries(OffloadEntriesInfoManager.size()); 3185 llvm::SmallVector<StringRef, 16> ParentFunctions( 3186 OffloadEntriesInfoManager.size()); 3187 3188 // Auxiliary methods to create metadata values and strings. 3189 auto &&GetMDInt = [this](unsigned V) { 3190 return llvm::ConstantAsMetadata::get( 3191 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3192 }; 3193 3194 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3195 3196 // Create the offloading info metadata node. 3197 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3198 3199 // Create function that emits metadata for each target region entry; 3200 auto &&TargetRegionMetadataEmitter = 3201 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3202 &GetMDString]( 3203 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3204 unsigned Line, 3205 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3206 // Generate metadata for target regions. Each entry of this metadata 3207 // contains: 3208 // - Entry 0 -> Kind of this type of metadata (0). 3209 // - Entry 1 -> Device ID of the file where the entry was identified. 3210 // - Entry 2 -> File ID of the file where the entry was identified. 3211 // - Entry 3 -> Mangled name of the function where the entry was 3212 // identified. 3213 // - Entry 4 -> Line in the file where the entry was identified. 3214 // - Entry 5 -> Order the entry was created. 3215 // The first element of the metadata node is the kind. 3216 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3217 GetMDInt(FileID), GetMDString(ParentName), 3218 GetMDInt(Line), GetMDInt(E.getOrder())}; 3219 3220 SourceLocation Loc; 3221 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3222 E = CGM.getContext().getSourceManager().fileinfo_end(); 3223 I != E; ++I) { 3224 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3225 I->getFirst()->getUniqueID().getFile() == FileID) { 3226 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3227 I->getFirst(), Line, 1); 3228 break; 3229 } 3230 } 3231 // Save this entry in the right position of the ordered entries array. 3232 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3233 ParentFunctions[E.getOrder()] = ParentName; 3234 3235 // Add metadata to the named metadata node. 3236 MD->addOperand(llvm::MDNode::get(C, Ops)); 3237 }; 3238 3239 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3240 TargetRegionMetadataEmitter); 3241 3242 // Create function that emits metadata for each device global variable entry; 3243 auto &&DeviceGlobalVarMetadataEmitter = 3244 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3245 MD](StringRef MangledName, 3246 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3247 &E) { 3248 // Generate metadata for global variables. Each entry of this metadata 3249 // contains: 3250 // - Entry 0 -> Kind of this type of metadata (1). 3251 // - Entry 1 -> Mangled name of the variable. 3252 // - Entry 2 -> Declare target kind. 3253 // - Entry 3 -> Order the entry was created. 3254 // The first element of the metadata node is the kind. 3255 llvm::Metadata *Ops[] = { 3256 GetMDInt(E.getKind()), GetMDString(MangledName), 3257 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3258 3259 // Save this entry in the right position of the ordered entries array. 3260 OrderedEntries[E.getOrder()] = 3261 std::make_tuple(&E, SourceLocation(), MangledName); 3262 3263 // Add metadata to the named metadata node. 3264 MD->addOperand(llvm::MDNode::get(C, Ops)); 3265 }; 3266 3267 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3268 DeviceGlobalVarMetadataEmitter); 3269 3270 for (const auto &E : OrderedEntries) { 3271 assert(std::get<0>(E) && "All ordered entries must exist!"); 3272 if (const auto *CE = 3273 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3274 std::get<0>(E))) { 3275 if (!CE->getID() || !CE->getAddress()) { 3276 // Do not blame the entry if the parent funtion is not emitted. 3277 StringRef FnName = ParentFunctions[CE->getOrder()]; 3278 if (!CGM.GetGlobalValue(FnName)) 3279 continue; 3280 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3281 DiagnosticsEngine::Error, 3282 "Offloading entry for target region in %0 is incorrect: either the " 3283 "address or the ID is invalid."); 3284 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3285 continue; 3286 } 3287 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3288 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3289 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3290 OffloadEntryInfoDeviceGlobalVar>( 3291 std::get<0>(E))) { 3292 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3293 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3294 CE->getFlags()); 3295 switch (Flags) { 3296 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3297 if (CGM.getLangOpts().OpenMPIsDevice && 3298 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3299 continue; 3300 if (!CE->getAddress()) { 3301 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3302 DiagnosticsEngine::Error, "Offloading entry for declare target " 3303 "variable %0 is incorrect: the " 3304 "address is invalid."); 3305 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3306 continue; 3307 } 3308 // The vaiable has no definition - no need to add the entry. 3309 if (CE->getVarSize().isZero()) 3310 continue; 3311 break; 3312 } 3313 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3314 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3315 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3316 "Declaret target link address is set."); 3317 if (CGM.getLangOpts().OpenMPIsDevice) 3318 continue; 3319 if (!CE->getAddress()) { 3320 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3321 DiagnosticsEngine::Error, 3322 "Offloading entry for declare target variable is incorrect: the " 3323 "address is invalid."); 3324 CGM.getDiags().Report(DiagID); 3325 continue; 3326 } 3327 break; 3328 } 3329 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3330 CE->getVarSize().getQuantity(), Flags, 3331 CE->getLinkage()); 3332 } else { 3333 llvm_unreachable("Unsupported entry kind."); 3334 } 3335 } 3336 } 3337 3338 /// Loads all the offload entries information from the host IR 3339 /// metadata. 3340 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3341 // If we are in target mode, load the metadata from the host IR. This code has 3342 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3343 3344 if (!CGM.getLangOpts().OpenMPIsDevice) 3345 return; 3346 3347 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3348 return; 3349 3350 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3351 if (auto EC = Buf.getError()) { 3352 CGM.getDiags().Report(diag::err_cannot_open_file) 3353 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3354 return; 3355 } 3356 3357 llvm::LLVMContext C; 3358 auto ME = expectedToErrorOrAndEmitErrors( 3359 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3360 3361 if (auto EC = ME.getError()) { 3362 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3363 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3364 CGM.getDiags().Report(DiagID) 3365 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3366 return; 3367 } 3368 3369 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3370 if (!MD) 3371 return; 3372 3373 for (llvm::MDNode *MN : MD->operands()) { 3374 auto &&GetMDInt = [MN](unsigned Idx) { 3375 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3376 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3377 }; 3378 3379 auto &&GetMDString = [MN](unsigned Idx) { 3380 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3381 return V->getString(); 3382 }; 3383 3384 switch (GetMDInt(0)) { 3385 default: 3386 llvm_unreachable("Unexpected metadata!"); 3387 break; 3388 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3389 OffloadingEntryInfoTargetRegion: 3390 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3391 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3392 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3393 /*Order=*/GetMDInt(5)); 3394 break; 3395 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3396 OffloadingEntryInfoDeviceGlobalVar: 3397 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3398 /*MangledName=*/GetMDString(1), 3399 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3400 /*Flags=*/GetMDInt(2)), 3401 /*Order=*/GetMDInt(3)); 3402 break; 3403 } 3404 } 3405 } 3406 3407 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3408 if (!KmpRoutineEntryPtrTy) { 3409 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3410 ASTContext &C = CGM.getContext(); 3411 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3412 FunctionProtoType::ExtProtoInfo EPI; 3413 KmpRoutineEntryPtrQTy = C.getPointerType( 3414 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3415 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3416 } 3417 } 3418 3419 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3420 // Make sure the type of the entry is already created. This is the type we 3421 // have to create: 3422 // struct __tgt_offload_entry{ 3423 // void *addr; // Pointer to the offload entry info. 3424 // // (function or global) 3425 // char *name; // Name of the function or global. 3426 // size_t size; // Size of the entry info (0 if it a function). 3427 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3428 // int32_t reserved; // Reserved, to use by the runtime library. 3429 // }; 3430 if (TgtOffloadEntryQTy.isNull()) { 3431 ASTContext &C = CGM.getContext(); 3432 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3433 RD->startDefinition(); 3434 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3435 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3436 addFieldToRecordDecl(C, RD, C.getSizeType()); 3437 addFieldToRecordDecl( 3438 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3439 addFieldToRecordDecl( 3440 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3441 RD->completeDefinition(); 3442 RD->addAttr(PackedAttr::CreateImplicit(C)); 3443 TgtOffloadEntryQTy = C.getRecordType(RD); 3444 } 3445 return TgtOffloadEntryQTy; 3446 } 3447 3448 namespace { 3449 struct PrivateHelpersTy { 3450 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3451 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3452 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3453 PrivateElemInit(PrivateElemInit) {} 3454 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3455 const Expr *OriginalRef = nullptr; 3456 const VarDecl *Original = nullptr; 3457 const VarDecl *PrivateCopy = nullptr; 3458 const VarDecl *PrivateElemInit = nullptr; 3459 bool isLocalPrivate() const { 3460 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3461 } 3462 }; 3463 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3464 } // anonymous namespace 3465 3466 static bool isAllocatableDecl(const VarDecl *VD) { 3467 const VarDecl *CVD = VD->getCanonicalDecl(); 3468 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3469 return false; 3470 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3471 // Use the default allocation. 3472 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3473 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3474 !AA->getAllocator()); 3475 } 3476 3477 static RecordDecl * 3478 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3479 if (!Privates.empty()) { 3480 ASTContext &C = CGM.getContext(); 3481 // Build struct .kmp_privates_t. { 3482 // /* private vars */ 3483 // }; 3484 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3485 RD->startDefinition(); 3486 for (const auto &Pair : Privates) { 3487 const VarDecl *VD = Pair.second.Original; 3488 QualType Type = VD->getType().getNonReferenceType(); 3489 // If the private variable is a local variable with lvalue ref type, 3490 // allocate the pointer instead of the pointee type. 3491 if (Pair.second.isLocalPrivate()) { 3492 if (VD->getType()->isLValueReferenceType()) 3493 Type = C.getPointerType(Type); 3494 if (isAllocatableDecl(VD)) 3495 Type = C.getPointerType(Type); 3496 } 3497 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3498 if (VD->hasAttrs()) { 3499 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3500 E(VD->getAttrs().end()); 3501 I != E; ++I) 3502 FD->addAttr(*I); 3503 } 3504 } 3505 RD->completeDefinition(); 3506 return RD; 3507 } 3508 return nullptr; 3509 } 3510 3511 static RecordDecl * 3512 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3513 QualType KmpInt32Ty, 3514 QualType KmpRoutineEntryPointerQTy) { 3515 ASTContext &C = CGM.getContext(); 3516 // Build struct kmp_task_t { 3517 // void * shareds; 3518 // kmp_routine_entry_t routine; 3519 // kmp_int32 part_id; 3520 // kmp_cmplrdata_t data1; 3521 // kmp_cmplrdata_t data2; 3522 // For taskloops additional fields: 3523 // kmp_uint64 lb; 3524 // kmp_uint64 ub; 3525 // kmp_int64 st; 3526 // kmp_int32 liter; 3527 // void * reductions; 3528 // }; 3529 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3530 UD->startDefinition(); 3531 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3532 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3533 UD->completeDefinition(); 3534 QualType KmpCmplrdataTy = C.getRecordType(UD); 3535 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3536 RD->startDefinition(); 3537 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3538 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3539 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3540 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3541 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3542 if (isOpenMPTaskLoopDirective(Kind)) { 3543 QualType KmpUInt64Ty = 3544 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3545 QualType KmpInt64Ty = 3546 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3547 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3548 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3549 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3550 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3551 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3552 } 3553 RD->completeDefinition(); 3554 return RD; 3555 } 3556 3557 static RecordDecl * 3558 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3559 ArrayRef<PrivateDataTy> Privates) { 3560 ASTContext &C = CGM.getContext(); 3561 // Build struct kmp_task_t_with_privates { 3562 // kmp_task_t task_data; 3563 // .kmp_privates_t. privates; 3564 // }; 3565 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3566 RD->startDefinition(); 3567 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3568 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3569 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3570 RD->completeDefinition(); 3571 return RD; 3572 } 3573 3574 /// Emit a proxy function which accepts kmp_task_t as the second 3575 /// argument. 3576 /// \code 3577 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3578 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3579 /// For taskloops: 3580 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3581 /// tt->reductions, tt->shareds); 3582 /// return 0; 3583 /// } 3584 /// \endcode 3585 static llvm::Function * 3586 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3587 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3588 QualType KmpTaskTWithPrivatesPtrQTy, 3589 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3590 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3591 llvm::Value *TaskPrivatesMap) { 3592 ASTContext &C = CGM.getContext(); 3593 FunctionArgList Args; 3594 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3595 ImplicitParamDecl::Other); 3596 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3597 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3598 ImplicitParamDecl::Other); 3599 Args.push_back(&GtidArg); 3600 Args.push_back(&TaskTypeArg); 3601 const auto &TaskEntryFnInfo = 3602 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3603 llvm::FunctionType *TaskEntryTy = 3604 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3605 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3606 auto *TaskEntry = llvm::Function::Create( 3607 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3608 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3609 TaskEntry->setDoesNotRecurse(); 3610 CodeGenFunction CGF(CGM); 3611 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3612 Loc, Loc); 3613 3614 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3615 // tt, 3616 // For taskloops: 3617 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3618 // tt->task_data.shareds); 3619 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3620 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3621 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3622 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3623 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3624 const auto *KmpTaskTWithPrivatesQTyRD = 3625 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3626 LValue Base = 3627 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3628 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3629 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3630 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3631 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3632 3633 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3634 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3635 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3636 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3637 CGF.ConvertTypeForMem(SharedsPtrTy)); 3638 3639 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3640 llvm::Value *PrivatesParam; 3641 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3642 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3643 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3644 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3645 } else { 3646 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3647 } 3648 3649 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3650 TaskPrivatesMap, 3651 CGF.Builder 3652 .CreatePointerBitCastOrAddrSpaceCast( 3653 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3654 .getPointer()}; 3655 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3656 std::end(CommonArgs)); 3657 if (isOpenMPTaskLoopDirective(Kind)) { 3658 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3659 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3660 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3661 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3662 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3663 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3664 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3665 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3666 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3667 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3668 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3669 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3670 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3671 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3672 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3673 CallArgs.push_back(LBParam); 3674 CallArgs.push_back(UBParam); 3675 CallArgs.push_back(StParam); 3676 CallArgs.push_back(LIParam); 3677 CallArgs.push_back(RParam); 3678 } 3679 CallArgs.push_back(SharedsParam); 3680 3681 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3682 CallArgs); 3683 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3684 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3685 CGF.FinishFunction(); 3686 return TaskEntry; 3687 } 3688 3689 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3690 SourceLocation Loc, 3691 QualType KmpInt32Ty, 3692 QualType KmpTaskTWithPrivatesPtrQTy, 3693 QualType KmpTaskTWithPrivatesQTy) { 3694 ASTContext &C = CGM.getContext(); 3695 FunctionArgList Args; 3696 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3697 ImplicitParamDecl::Other); 3698 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3699 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3700 ImplicitParamDecl::Other); 3701 Args.push_back(&GtidArg); 3702 Args.push_back(&TaskTypeArg); 3703 const auto &DestructorFnInfo = 3704 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3705 llvm::FunctionType *DestructorFnTy = 3706 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3707 std::string Name = 3708 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3709 auto *DestructorFn = 3710 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3711 Name, &CGM.getModule()); 3712 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3713 DestructorFnInfo); 3714 DestructorFn->setDoesNotRecurse(); 3715 CodeGenFunction CGF(CGM); 3716 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3717 Args, Loc, Loc); 3718 3719 LValue Base = CGF.EmitLoadOfPointerLValue( 3720 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3721 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3722 const auto *KmpTaskTWithPrivatesQTyRD = 3723 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3724 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3725 Base = CGF.EmitLValueForField(Base, *FI); 3726 for (const auto *Field : 3727 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3728 if (QualType::DestructionKind DtorKind = 3729 Field->getType().isDestructedType()) { 3730 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3731 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3732 } 3733 } 3734 CGF.FinishFunction(); 3735 return DestructorFn; 3736 } 3737 3738 /// Emit a privates mapping function for correct handling of private and 3739 /// firstprivate variables. 3740 /// \code 3741 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3742 /// **noalias priv1,..., <tyn> **noalias privn) { 3743 /// *priv1 = &.privates.priv1; 3744 /// ...; 3745 /// *privn = &.privates.privn; 3746 /// } 3747 /// \endcode 3748 static llvm::Value * 3749 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3750 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3751 ArrayRef<PrivateDataTy> Privates) { 3752 ASTContext &C = CGM.getContext(); 3753 FunctionArgList Args; 3754 ImplicitParamDecl TaskPrivatesArg( 3755 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3756 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3757 ImplicitParamDecl::Other); 3758 Args.push_back(&TaskPrivatesArg); 3759 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3760 unsigned Counter = 1; 3761 for (const Expr *E : Data.PrivateVars) { 3762 Args.push_back(ImplicitParamDecl::Create( 3763 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3764 C.getPointerType(C.getPointerType(E->getType())) 3765 .withConst() 3766 .withRestrict(), 3767 ImplicitParamDecl::Other)); 3768 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3769 PrivateVarsPos[VD] = Counter; 3770 ++Counter; 3771 } 3772 for (const Expr *E : Data.FirstprivateVars) { 3773 Args.push_back(ImplicitParamDecl::Create( 3774 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3775 C.getPointerType(C.getPointerType(E->getType())) 3776 .withConst() 3777 .withRestrict(), 3778 ImplicitParamDecl::Other)); 3779 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3780 PrivateVarsPos[VD] = Counter; 3781 ++Counter; 3782 } 3783 for (const Expr *E : Data.LastprivateVars) { 3784 Args.push_back(ImplicitParamDecl::Create( 3785 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3786 C.getPointerType(C.getPointerType(E->getType())) 3787 .withConst() 3788 .withRestrict(), 3789 ImplicitParamDecl::Other)); 3790 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3791 PrivateVarsPos[VD] = Counter; 3792 ++Counter; 3793 } 3794 for (const VarDecl *VD : Data.PrivateLocals) { 3795 QualType Ty = VD->getType().getNonReferenceType(); 3796 if (VD->getType()->isLValueReferenceType()) 3797 Ty = C.getPointerType(Ty); 3798 if (isAllocatableDecl(VD)) 3799 Ty = C.getPointerType(Ty); 3800 Args.push_back(ImplicitParamDecl::Create( 3801 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3802 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3803 ImplicitParamDecl::Other)); 3804 PrivateVarsPos[VD] = Counter; 3805 ++Counter; 3806 } 3807 const auto &TaskPrivatesMapFnInfo = 3808 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3809 llvm::FunctionType *TaskPrivatesMapTy = 3810 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3811 std::string Name = 3812 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3813 auto *TaskPrivatesMap = llvm::Function::Create( 3814 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3815 &CGM.getModule()); 3816 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3817 TaskPrivatesMapFnInfo); 3818 if (CGM.getLangOpts().Optimize) { 3819 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3820 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3821 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3822 } 3823 CodeGenFunction CGF(CGM); 3824 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3825 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3826 3827 // *privi = &.privates.privi; 3828 LValue Base = CGF.EmitLoadOfPointerLValue( 3829 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3830 TaskPrivatesArg.getType()->castAs<PointerType>()); 3831 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3832 Counter = 0; 3833 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3834 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3835 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3836 LValue RefLVal = 3837 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3838 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3839 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3840 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3841 ++Counter; 3842 } 3843 CGF.FinishFunction(); 3844 return TaskPrivatesMap; 3845 } 3846 3847 /// Emit initialization for private variables in task-based directives. 3848 static void emitPrivatesInit(CodeGenFunction &CGF, 3849 const OMPExecutableDirective &D, 3850 Address KmpTaskSharedsPtr, LValue TDBase, 3851 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3852 QualType SharedsTy, QualType SharedsPtrTy, 3853 const OMPTaskDataTy &Data, 3854 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3855 ASTContext &C = CGF.getContext(); 3856 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3857 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3858 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3859 ? OMPD_taskloop 3860 : OMPD_task; 3861 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3862 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3863 LValue SrcBase; 3864 bool IsTargetTask = 3865 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3866 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3867 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3868 // PointersArray, SizesArray, and MappersArray. The original variables for 3869 // these arrays are not captured and we get their addresses explicitly. 3870 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3871 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3872 SrcBase = CGF.MakeAddrLValue( 3873 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3874 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3875 SharedsTy); 3876 } 3877 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3878 for (const PrivateDataTy &Pair : Privates) { 3879 // Do not initialize private locals. 3880 if (Pair.second.isLocalPrivate()) { 3881 ++FI; 3882 continue; 3883 } 3884 const VarDecl *VD = Pair.second.PrivateCopy; 3885 const Expr *Init = VD->getAnyInitializer(); 3886 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3887 !CGF.isTrivialInitializer(Init)))) { 3888 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3889 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3890 const VarDecl *OriginalVD = Pair.second.Original; 3891 // Check if the variable is the target-based BasePointersArray, 3892 // PointersArray, SizesArray, or MappersArray. 3893 LValue SharedRefLValue; 3894 QualType Type = PrivateLValue.getType(); 3895 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3896 if (IsTargetTask && !SharedField) { 3897 assert(isa<ImplicitParamDecl>(OriginalVD) && 3898 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3899 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3900 ->getNumParams() == 0 && 3901 isa<TranslationUnitDecl>( 3902 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3903 ->getDeclContext()) && 3904 "Expected artificial target data variable."); 3905 SharedRefLValue = 3906 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3907 } else if (ForDup) { 3908 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3909 SharedRefLValue = CGF.MakeAddrLValue( 3910 Address(SharedRefLValue.getPointer(CGF), 3911 C.getDeclAlign(OriginalVD)), 3912 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3913 SharedRefLValue.getTBAAInfo()); 3914 } else if (CGF.LambdaCaptureFields.count( 3915 Pair.second.Original->getCanonicalDecl()) > 0 || 3916 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3917 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3918 } else { 3919 // Processing for implicitly captured variables. 3920 InlinedOpenMPRegionRAII Region( 3921 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3922 /*HasCancel=*/false, /*NoInheritance=*/true); 3923 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3924 } 3925 if (Type->isArrayType()) { 3926 // Initialize firstprivate array. 3927 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3928 // Perform simple memcpy. 3929 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3930 } else { 3931 // Initialize firstprivate array using element-by-element 3932 // initialization. 3933 CGF.EmitOMPAggregateAssign( 3934 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3935 Type, 3936 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3937 Address SrcElement) { 3938 // Clean up any temporaries needed by the initialization. 3939 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3940 InitScope.addPrivate( 3941 Elem, [SrcElement]() -> Address { return SrcElement; }); 3942 (void)InitScope.Privatize(); 3943 // Emit initialization for single element. 3944 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3945 CGF, &CapturesInfo); 3946 CGF.EmitAnyExprToMem(Init, DestElement, 3947 Init->getType().getQualifiers(), 3948 /*IsInitializer=*/false); 3949 }); 3950 } 3951 } else { 3952 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3953 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3954 return SharedRefLValue.getAddress(CGF); 3955 }); 3956 (void)InitScope.Privatize(); 3957 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3958 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3959 /*capturedByInit=*/false); 3960 } 3961 } else { 3962 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3963 } 3964 } 3965 ++FI; 3966 } 3967 } 3968 3969 /// Check if duplication function is required for taskloops. 3970 static bool checkInitIsRequired(CodeGenFunction &CGF, 3971 ArrayRef<PrivateDataTy> Privates) { 3972 bool InitRequired = false; 3973 for (const PrivateDataTy &Pair : Privates) { 3974 if (Pair.second.isLocalPrivate()) 3975 continue; 3976 const VarDecl *VD = Pair.second.PrivateCopy; 3977 const Expr *Init = VD->getAnyInitializer(); 3978 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3979 !CGF.isTrivialInitializer(Init)); 3980 if (InitRequired) 3981 break; 3982 } 3983 return InitRequired; 3984 } 3985 3986 3987 /// Emit task_dup function (for initialization of 3988 /// private/firstprivate/lastprivate vars and last_iter flag) 3989 /// \code 3990 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3991 /// lastpriv) { 3992 /// // setup lastprivate flag 3993 /// task_dst->last = lastpriv; 3994 /// // could be constructor calls here... 3995 /// } 3996 /// \endcode 3997 static llvm::Value * 3998 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3999 const OMPExecutableDirective &D, 4000 QualType KmpTaskTWithPrivatesPtrQTy, 4001 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4002 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4003 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4004 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4005 ASTContext &C = CGM.getContext(); 4006 FunctionArgList Args; 4007 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4008 KmpTaskTWithPrivatesPtrQTy, 4009 ImplicitParamDecl::Other); 4010 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4011 KmpTaskTWithPrivatesPtrQTy, 4012 ImplicitParamDecl::Other); 4013 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4014 ImplicitParamDecl::Other); 4015 Args.push_back(&DstArg); 4016 Args.push_back(&SrcArg); 4017 Args.push_back(&LastprivArg); 4018 const auto &TaskDupFnInfo = 4019 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4020 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4021 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4022 auto *TaskDup = llvm::Function::Create( 4023 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4024 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4025 TaskDup->setDoesNotRecurse(); 4026 CodeGenFunction CGF(CGM); 4027 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4028 Loc); 4029 4030 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4031 CGF.GetAddrOfLocalVar(&DstArg), 4032 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4033 // task_dst->liter = lastpriv; 4034 if (WithLastIter) { 4035 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4036 LValue Base = CGF.EmitLValueForField( 4037 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4038 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4039 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4040 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4041 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4042 } 4043 4044 // Emit initial values for private copies (if any). 4045 assert(!Privates.empty()); 4046 Address KmpTaskSharedsPtr = Address::invalid(); 4047 if (!Data.FirstprivateVars.empty()) { 4048 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4049 CGF.GetAddrOfLocalVar(&SrcArg), 4050 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4051 LValue Base = CGF.EmitLValueForField( 4052 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4053 KmpTaskSharedsPtr = Address( 4054 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4055 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4056 KmpTaskTShareds)), 4057 Loc), 4058 CGM.getNaturalTypeAlignment(SharedsTy)); 4059 } 4060 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4061 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4062 CGF.FinishFunction(); 4063 return TaskDup; 4064 } 4065 4066 /// Checks if destructor function is required to be generated. 4067 /// \return true if cleanups are required, false otherwise. 4068 static bool 4069 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4070 ArrayRef<PrivateDataTy> Privates) { 4071 for (const PrivateDataTy &P : Privates) { 4072 if (P.second.isLocalPrivate()) 4073 continue; 4074 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4075 if (Ty.isDestructedType()) 4076 return true; 4077 } 4078 return false; 4079 } 4080 4081 namespace { 4082 /// Loop generator for OpenMP iterator expression. 4083 class OMPIteratorGeneratorScope final 4084 : public CodeGenFunction::OMPPrivateScope { 4085 CodeGenFunction &CGF; 4086 const OMPIteratorExpr *E = nullptr; 4087 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4088 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4089 OMPIteratorGeneratorScope() = delete; 4090 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4091 4092 public: 4093 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4094 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4095 if (!E) 4096 return; 4097 SmallVector<llvm::Value *, 4> Uppers; 4098 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4099 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4100 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4101 addPrivate(VD, [&CGF, VD]() { 4102 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4103 }); 4104 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4105 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4106 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4107 "counter.addr"); 4108 }); 4109 } 4110 Privatize(); 4111 4112 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4113 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4114 LValue CLVal = 4115 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4116 HelperData.CounterVD->getType()); 4117 // Counter = 0; 4118 CGF.EmitStoreOfScalar( 4119 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4120 CLVal); 4121 CodeGenFunction::JumpDest &ContDest = 4122 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4123 CodeGenFunction::JumpDest &ExitDest = 4124 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4125 // N = <number-of_iterations>; 4126 llvm::Value *N = Uppers[I]; 4127 // cont: 4128 // if (Counter < N) goto body; else goto exit; 4129 CGF.EmitBlock(ContDest.getBlock()); 4130 auto *CVal = 4131 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4132 llvm::Value *Cmp = 4133 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4134 ? CGF.Builder.CreateICmpSLT(CVal, N) 4135 : CGF.Builder.CreateICmpULT(CVal, N); 4136 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4137 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4138 // body: 4139 CGF.EmitBlock(BodyBB); 4140 // Iteri = Begini + Counter * Stepi; 4141 CGF.EmitIgnoredExpr(HelperData.Update); 4142 } 4143 } 4144 ~OMPIteratorGeneratorScope() { 4145 if (!E) 4146 return; 4147 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4148 // Counter = Counter + 1; 4149 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4150 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4151 // goto cont; 4152 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4153 // exit: 4154 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4155 } 4156 } 4157 }; 4158 } // namespace 4159 4160 static std::pair<llvm::Value *, llvm::Value *> 4161 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4162 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4163 llvm::Value *Addr; 4164 if (OASE) { 4165 const Expr *Base = OASE->getBase(); 4166 Addr = CGF.EmitScalarExpr(Base); 4167 } else { 4168 Addr = CGF.EmitLValue(E).getPointer(CGF); 4169 } 4170 llvm::Value *SizeVal; 4171 QualType Ty = E->getType(); 4172 if (OASE) { 4173 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4174 for (const Expr *SE : OASE->getDimensions()) { 4175 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4176 Sz = CGF.EmitScalarConversion( 4177 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4178 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4179 } 4180 } else if (const auto *ASE = 4181 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4182 LValue UpAddrLVal = 4183 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4184 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4185 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4186 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4187 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4188 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4189 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4190 } else { 4191 SizeVal = CGF.getTypeSize(Ty); 4192 } 4193 return std::make_pair(Addr, SizeVal); 4194 } 4195 4196 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4197 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4198 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4199 if (KmpTaskAffinityInfoTy.isNull()) { 4200 RecordDecl *KmpAffinityInfoRD = 4201 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4202 KmpAffinityInfoRD->startDefinition(); 4203 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4204 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4205 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4206 KmpAffinityInfoRD->completeDefinition(); 4207 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4208 } 4209 } 4210 4211 CGOpenMPRuntime::TaskResultTy 4212 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4213 const OMPExecutableDirective &D, 4214 llvm::Function *TaskFunction, QualType SharedsTy, 4215 Address Shareds, const OMPTaskDataTy &Data) { 4216 ASTContext &C = CGM.getContext(); 4217 llvm::SmallVector<PrivateDataTy, 4> Privates; 4218 // Aggregate privates and sort them by the alignment. 4219 const auto *I = Data.PrivateCopies.begin(); 4220 for (const Expr *E : Data.PrivateVars) { 4221 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4222 Privates.emplace_back( 4223 C.getDeclAlign(VD), 4224 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4225 /*PrivateElemInit=*/nullptr)); 4226 ++I; 4227 } 4228 I = Data.FirstprivateCopies.begin(); 4229 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4230 for (const Expr *E : Data.FirstprivateVars) { 4231 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4232 Privates.emplace_back( 4233 C.getDeclAlign(VD), 4234 PrivateHelpersTy( 4235 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4236 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4237 ++I; 4238 ++IElemInitRef; 4239 } 4240 I = Data.LastprivateCopies.begin(); 4241 for (const Expr *E : Data.LastprivateVars) { 4242 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4243 Privates.emplace_back( 4244 C.getDeclAlign(VD), 4245 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4246 /*PrivateElemInit=*/nullptr)); 4247 ++I; 4248 } 4249 for (const VarDecl *VD : Data.PrivateLocals) { 4250 if (isAllocatableDecl(VD)) 4251 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4252 else 4253 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4254 } 4255 llvm::stable_sort(Privates, 4256 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4257 return L.first > R.first; 4258 }); 4259 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4260 // Build type kmp_routine_entry_t (if not built yet). 4261 emitKmpRoutineEntryT(KmpInt32Ty); 4262 // Build type kmp_task_t (if not built yet). 4263 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4264 if (SavedKmpTaskloopTQTy.isNull()) { 4265 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4266 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4267 } 4268 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4269 } else { 4270 assert((D.getDirectiveKind() == OMPD_task || 4271 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4272 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4273 "Expected taskloop, task or target directive"); 4274 if (SavedKmpTaskTQTy.isNull()) { 4275 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4276 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4277 } 4278 KmpTaskTQTy = SavedKmpTaskTQTy; 4279 } 4280 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4281 // Build particular struct kmp_task_t for the given task. 4282 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4283 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4284 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4285 QualType KmpTaskTWithPrivatesPtrQTy = 4286 C.getPointerType(KmpTaskTWithPrivatesQTy); 4287 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4288 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4289 KmpTaskTWithPrivatesTy->getPointerTo(); 4290 llvm::Value *KmpTaskTWithPrivatesTySize = 4291 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4292 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4293 4294 // Emit initial values for private copies (if any). 4295 llvm::Value *TaskPrivatesMap = nullptr; 4296 llvm::Type *TaskPrivatesMapTy = 4297 std::next(TaskFunction->arg_begin(), 3)->getType(); 4298 if (!Privates.empty()) { 4299 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4300 TaskPrivatesMap = 4301 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4302 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4303 TaskPrivatesMap, TaskPrivatesMapTy); 4304 } else { 4305 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4306 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4307 } 4308 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4309 // kmp_task_t *tt); 4310 llvm::Function *TaskEntry = emitProxyTaskFunction( 4311 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4312 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4313 TaskPrivatesMap); 4314 4315 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4316 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4317 // kmp_routine_entry_t *task_entry); 4318 // Task flags. Format is taken from 4319 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4320 // description of kmp_tasking_flags struct. 4321 enum { 4322 TiedFlag = 0x1, 4323 FinalFlag = 0x2, 4324 DestructorsFlag = 0x8, 4325 PriorityFlag = 0x20, 4326 DetachableFlag = 0x40, 4327 }; 4328 unsigned Flags = Data.Tied ? TiedFlag : 0; 4329 bool NeedsCleanup = false; 4330 if (!Privates.empty()) { 4331 NeedsCleanup = 4332 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4333 if (NeedsCleanup) 4334 Flags = Flags | DestructorsFlag; 4335 } 4336 if (Data.Priority.getInt()) 4337 Flags = Flags | PriorityFlag; 4338 if (D.hasClausesOfKind<OMPDetachClause>()) 4339 Flags = Flags | DetachableFlag; 4340 llvm::Value *TaskFlags = 4341 Data.Final.getPointer() 4342 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4343 CGF.Builder.getInt32(FinalFlag), 4344 CGF.Builder.getInt32(/*C=*/0)) 4345 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4346 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4347 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4348 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4349 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4350 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4351 TaskEntry, KmpRoutineEntryPtrTy)}; 4352 llvm::Value *NewTask; 4353 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4354 // Check if we have any device clause associated with the directive. 4355 const Expr *Device = nullptr; 4356 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4357 Device = C->getDevice(); 4358 // Emit device ID if any otherwise use default value. 4359 llvm::Value *DeviceID; 4360 if (Device) 4361 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4362 CGF.Int64Ty, /*isSigned=*/true); 4363 else 4364 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4365 AllocArgs.push_back(DeviceID); 4366 NewTask = CGF.EmitRuntimeCall( 4367 OMPBuilder.getOrCreateRuntimeFunction( 4368 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4369 AllocArgs); 4370 } else { 4371 NewTask = 4372 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4373 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4374 AllocArgs); 4375 } 4376 // Emit detach clause initialization. 4377 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4378 // task_descriptor); 4379 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4380 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4381 LValue EvtLVal = CGF.EmitLValue(Evt); 4382 4383 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4384 // int gtid, kmp_task_t *task); 4385 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4386 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4387 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4388 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4389 OMPBuilder.getOrCreateRuntimeFunction( 4390 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4391 {Loc, Tid, NewTask}); 4392 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4393 Evt->getExprLoc()); 4394 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4395 } 4396 // Process affinity clauses. 4397 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4398 // Process list of affinity data. 4399 ASTContext &C = CGM.getContext(); 4400 Address AffinitiesArray = Address::invalid(); 4401 // Calculate number of elements to form the array of affinity data. 4402 llvm::Value *NumOfElements = nullptr; 4403 unsigned NumAffinities = 0; 4404 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4405 if (const Expr *Modifier = C->getModifier()) { 4406 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4407 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4408 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4409 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4410 NumOfElements = 4411 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4412 } 4413 } else { 4414 NumAffinities += C->varlist_size(); 4415 } 4416 } 4417 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4418 // Fields ids in kmp_task_affinity_info record. 4419 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4420 4421 QualType KmpTaskAffinityInfoArrayTy; 4422 if (NumOfElements) { 4423 NumOfElements = CGF.Builder.CreateNUWAdd( 4424 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4425 auto *OVE = new (C) OpaqueValueExpr( 4426 Loc, 4427 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4428 VK_PRValue); 4429 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4430 RValue::get(NumOfElements)); 4431 KmpTaskAffinityInfoArrayTy = 4432 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4433 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4434 // Properly emit variable-sized array. 4435 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4436 ImplicitParamDecl::Other); 4437 CGF.EmitVarDecl(*PD); 4438 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4439 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4440 /*isSigned=*/false); 4441 } else { 4442 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4443 KmpTaskAffinityInfoTy, 4444 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4445 ArrayType::Normal, /*IndexTypeQuals=*/0); 4446 AffinitiesArray = 4447 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4448 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4449 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4450 /*isSigned=*/false); 4451 } 4452 4453 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4454 // Fill array by elements without iterators. 4455 unsigned Pos = 0; 4456 bool HasIterator = false; 4457 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4458 if (C->getModifier()) { 4459 HasIterator = true; 4460 continue; 4461 } 4462 for (const Expr *E : C->varlists()) { 4463 llvm::Value *Addr; 4464 llvm::Value *Size; 4465 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4466 LValue Base = 4467 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4468 KmpTaskAffinityInfoTy); 4469 // affs[i].base_addr = &<Affinities[i].second>; 4470 LValue BaseAddrLVal = CGF.EmitLValueForField( 4471 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4472 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4473 BaseAddrLVal); 4474 // affs[i].len = sizeof(<Affinities[i].second>); 4475 LValue LenLVal = CGF.EmitLValueForField( 4476 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4477 CGF.EmitStoreOfScalar(Size, LenLVal); 4478 ++Pos; 4479 } 4480 } 4481 LValue PosLVal; 4482 if (HasIterator) { 4483 PosLVal = CGF.MakeAddrLValue( 4484 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4485 C.getSizeType()); 4486 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4487 } 4488 // Process elements with iterators. 4489 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4490 const Expr *Modifier = C->getModifier(); 4491 if (!Modifier) 4492 continue; 4493 OMPIteratorGeneratorScope IteratorScope( 4494 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4495 for (const Expr *E : C->varlists()) { 4496 llvm::Value *Addr; 4497 llvm::Value *Size; 4498 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4499 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4500 LValue Base = CGF.MakeAddrLValue( 4501 Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(), 4502 AffinitiesArray.getPointer(), Idx), 4503 AffinitiesArray.getAlignment()), 4504 KmpTaskAffinityInfoTy); 4505 // affs[i].base_addr = &<Affinities[i].second>; 4506 LValue BaseAddrLVal = CGF.EmitLValueForField( 4507 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4508 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4509 BaseAddrLVal); 4510 // affs[i].len = sizeof(<Affinities[i].second>); 4511 LValue LenLVal = CGF.EmitLValueForField( 4512 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4513 CGF.EmitStoreOfScalar(Size, LenLVal); 4514 Idx = CGF.Builder.CreateNUWAdd( 4515 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4516 CGF.EmitStoreOfScalar(Idx, PosLVal); 4517 } 4518 } 4519 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4520 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4521 // naffins, kmp_task_affinity_info_t *affin_list); 4522 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4523 llvm::Value *GTid = getThreadID(CGF, Loc); 4524 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4525 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4526 // FIXME: Emit the function and ignore its result for now unless the 4527 // runtime function is properly implemented. 4528 (void)CGF.EmitRuntimeCall( 4529 OMPBuilder.getOrCreateRuntimeFunction( 4530 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4531 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4532 } 4533 llvm::Value *NewTaskNewTaskTTy = 4534 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4535 NewTask, KmpTaskTWithPrivatesPtrTy); 4536 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4537 KmpTaskTWithPrivatesQTy); 4538 LValue TDBase = 4539 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4540 // Fill the data in the resulting kmp_task_t record. 4541 // Copy shareds if there are any. 4542 Address KmpTaskSharedsPtr = Address::invalid(); 4543 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4544 KmpTaskSharedsPtr = 4545 Address(CGF.EmitLoadOfScalar( 4546 CGF.EmitLValueForField( 4547 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4548 KmpTaskTShareds)), 4549 Loc), 4550 CGM.getNaturalTypeAlignment(SharedsTy)); 4551 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4552 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4553 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4554 } 4555 // Emit initial values for private copies (if any). 4556 TaskResultTy Result; 4557 if (!Privates.empty()) { 4558 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4559 SharedsTy, SharedsPtrTy, Data, Privates, 4560 /*ForDup=*/false); 4561 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4562 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4563 Result.TaskDupFn = emitTaskDupFunction( 4564 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4565 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4566 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4567 } 4568 } 4569 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4570 enum { Priority = 0, Destructors = 1 }; 4571 // Provide pointer to function with destructors for privates. 4572 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4573 const RecordDecl *KmpCmplrdataUD = 4574 (*FI)->getType()->getAsUnionType()->getDecl(); 4575 if (NeedsCleanup) { 4576 llvm::Value *DestructorFn = emitDestructorsFunction( 4577 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4578 KmpTaskTWithPrivatesQTy); 4579 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4580 LValue DestructorsLV = CGF.EmitLValueForField( 4581 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4582 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4583 DestructorFn, KmpRoutineEntryPtrTy), 4584 DestructorsLV); 4585 } 4586 // Set priority. 4587 if (Data.Priority.getInt()) { 4588 LValue Data2LV = CGF.EmitLValueForField( 4589 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4590 LValue PriorityLV = CGF.EmitLValueForField( 4591 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4592 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4593 } 4594 Result.NewTask = NewTask; 4595 Result.TaskEntry = TaskEntry; 4596 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4597 Result.TDBase = TDBase; 4598 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4599 return Result; 4600 } 4601 4602 namespace { 4603 /// Dependence kind for RTL. 4604 enum RTLDependenceKindTy { 4605 DepIn = 0x01, 4606 DepInOut = 0x3, 4607 DepMutexInOutSet = 0x4 4608 }; 4609 /// Fields ids in kmp_depend_info record. 4610 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4611 } // namespace 4612 4613 /// Translates internal dependency kind into the runtime kind. 4614 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4615 RTLDependenceKindTy DepKind; 4616 switch (K) { 4617 case OMPC_DEPEND_in: 4618 DepKind = DepIn; 4619 break; 4620 // Out and InOut dependencies must use the same code. 4621 case OMPC_DEPEND_out: 4622 case OMPC_DEPEND_inout: 4623 DepKind = DepInOut; 4624 break; 4625 case OMPC_DEPEND_mutexinoutset: 4626 DepKind = DepMutexInOutSet; 4627 break; 4628 case OMPC_DEPEND_source: 4629 case OMPC_DEPEND_sink: 4630 case OMPC_DEPEND_depobj: 4631 case OMPC_DEPEND_unknown: 4632 llvm_unreachable("Unknown task dependence type"); 4633 } 4634 return DepKind; 4635 } 4636 4637 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4638 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4639 QualType &FlagsTy) { 4640 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4641 if (KmpDependInfoTy.isNull()) { 4642 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4643 KmpDependInfoRD->startDefinition(); 4644 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4645 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4646 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4647 KmpDependInfoRD->completeDefinition(); 4648 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4649 } 4650 } 4651 4652 std::pair<llvm::Value *, LValue> 4653 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4654 SourceLocation Loc) { 4655 ASTContext &C = CGM.getContext(); 4656 QualType FlagsTy; 4657 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4658 RecordDecl *KmpDependInfoRD = 4659 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4660 LValue Base = CGF.EmitLoadOfPointerLValue( 4661 DepobjLVal.getAddress(CGF), 4662 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4663 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4664 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4665 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4666 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4667 Base.getTBAAInfo()); 4668 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4669 Addr.getElementType(), Addr.getPointer(), 4670 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4671 LValue NumDepsBase = CGF.MakeAddrLValue( 4672 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4673 Base.getBaseInfo(), Base.getTBAAInfo()); 4674 // NumDeps = deps[i].base_addr; 4675 LValue BaseAddrLVal = CGF.EmitLValueForField( 4676 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4677 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4678 return std::make_pair(NumDeps, Base); 4679 } 4680 4681 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4682 llvm::PointerUnion<unsigned *, LValue *> Pos, 4683 const OMPTaskDataTy::DependData &Data, 4684 Address DependenciesArray) { 4685 CodeGenModule &CGM = CGF.CGM; 4686 ASTContext &C = CGM.getContext(); 4687 QualType FlagsTy; 4688 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4689 RecordDecl *KmpDependInfoRD = 4690 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4691 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4692 4693 OMPIteratorGeneratorScope IteratorScope( 4694 CGF, cast_or_null<OMPIteratorExpr>( 4695 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4696 : nullptr)); 4697 for (const Expr *E : Data.DepExprs) { 4698 llvm::Value *Addr; 4699 llvm::Value *Size; 4700 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4701 LValue Base; 4702 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4703 Base = CGF.MakeAddrLValue( 4704 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4705 } else { 4706 LValue &PosLVal = *Pos.get<LValue *>(); 4707 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4708 Base = CGF.MakeAddrLValue( 4709 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), 4710 DependenciesArray.getPointer(), Idx), 4711 DependenciesArray.getAlignment()), 4712 KmpDependInfoTy); 4713 } 4714 // deps[i].base_addr = &<Dependencies[i].second>; 4715 LValue BaseAddrLVal = CGF.EmitLValueForField( 4716 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4717 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4718 BaseAddrLVal); 4719 // deps[i].len = sizeof(<Dependencies[i].second>); 4720 LValue LenLVal = CGF.EmitLValueForField( 4721 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4722 CGF.EmitStoreOfScalar(Size, LenLVal); 4723 // deps[i].flags = <Dependencies[i].first>; 4724 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4725 LValue FlagsLVal = CGF.EmitLValueForField( 4726 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4727 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4728 FlagsLVal); 4729 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4730 ++(*P); 4731 } else { 4732 LValue &PosLVal = *Pos.get<LValue *>(); 4733 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4734 Idx = CGF.Builder.CreateNUWAdd(Idx, 4735 llvm::ConstantInt::get(Idx->getType(), 1)); 4736 CGF.EmitStoreOfScalar(Idx, PosLVal); 4737 } 4738 } 4739 } 4740 4741 static SmallVector<llvm::Value *, 4> 4742 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4743 const OMPTaskDataTy::DependData &Data) { 4744 assert(Data.DepKind == OMPC_DEPEND_depobj && 4745 "Expected depobj dependecy kind."); 4746 SmallVector<llvm::Value *, 4> Sizes; 4747 SmallVector<LValue, 4> SizeLVals; 4748 ASTContext &C = CGF.getContext(); 4749 QualType FlagsTy; 4750 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4751 RecordDecl *KmpDependInfoRD = 4752 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4753 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4754 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4755 { 4756 OMPIteratorGeneratorScope IteratorScope( 4757 CGF, cast_or_null<OMPIteratorExpr>( 4758 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4759 : nullptr)); 4760 for (const Expr *E : Data.DepExprs) { 4761 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4762 LValue Base = CGF.EmitLoadOfPointerLValue( 4763 DepobjLVal.getAddress(CGF), 4764 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4765 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4766 Base.getAddress(CGF), KmpDependInfoPtrT); 4767 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4768 Base.getTBAAInfo()); 4769 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4770 Addr.getElementType(), Addr.getPointer(), 4771 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4772 LValue NumDepsBase = CGF.MakeAddrLValue( 4773 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4774 Base.getBaseInfo(), Base.getTBAAInfo()); 4775 // NumDeps = deps[i].base_addr; 4776 LValue BaseAddrLVal = CGF.EmitLValueForField( 4777 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4778 llvm::Value *NumDeps = 4779 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4780 LValue NumLVal = CGF.MakeAddrLValue( 4781 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4782 C.getUIntPtrType()); 4783 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4784 NumLVal.getAddress(CGF)); 4785 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4786 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4787 CGF.EmitStoreOfScalar(Add, NumLVal); 4788 SizeLVals.push_back(NumLVal); 4789 } 4790 } 4791 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4792 llvm::Value *Size = 4793 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4794 Sizes.push_back(Size); 4795 } 4796 return Sizes; 4797 } 4798 4799 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4800 LValue PosLVal, 4801 const OMPTaskDataTy::DependData &Data, 4802 Address DependenciesArray) { 4803 assert(Data.DepKind == OMPC_DEPEND_depobj && 4804 "Expected depobj dependecy kind."); 4805 ASTContext &C = CGF.getContext(); 4806 QualType FlagsTy; 4807 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4808 RecordDecl *KmpDependInfoRD = 4809 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4810 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4811 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4812 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4813 { 4814 OMPIteratorGeneratorScope IteratorScope( 4815 CGF, cast_or_null<OMPIteratorExpr>( 4816 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4817 : nullptr)); 4818 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4819 const Expr *E = Data.DepExprs[I]; 4820 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4821 LValue Base = CGF.EmitLoadOfPointerLValue( 4822 DepobjLVal.getAddress(CGF), 4823 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4824 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4825 Base.getAddress(CGF), KmpDependInfoPtrT); 4826 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4827 Base.getTBAAInfo()); 4828 4829 // Get number of elements in a single depobj. 4830 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4831 Addr.getElementType(), Addr.getPointer(), 4832 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4833 LValue NumDepsBase = CGF.MakeAddrLValue( 4834 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4835 Base.getBaseInfo(), Base.getTBAAInfo()); 4836 // NumDeps = deps[i].base_addr; 4837 LValue BaseAddrLVal = CGF.EmitLValueForField( 4838 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4839 llvm::Value *NumDeps = 4840 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4841 4842 // memcopy dependency data. 4843 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4844 ElSize, 4845 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4846 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4847 Address DepAddr = 4848 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), 4849 DependenciesArray.getPointer(), Pos), 4850 DependenciesArray.getAlignment()); 4851 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4852 4853 // Increase pos. 4854 // pos += size; 4855 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4856 CGF.EmitStoreOfScalar(Add, PosLVal); 4857 } 4858 } 4859 } 4860 4861 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4862 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4863 SourceLocation Loc) { 4864 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4865 return D.DepExprs.empty(); 4866 })) 4867 return std::make_pair(nullptr, Address::invalid()); 4868 // Process list of dependencies. 4869 ASTContext &C = CGM.getContext(); 4870 Address DependenciesArray = Address::invalid(); 4871 llvm::Value *NumOfElements = nullptr; 4872 unsigned NumDependencies = std::accumulate( 4873 Dependencies.begin(), Dependencies.end(), 0, 4874 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4875 return D.DepKind == OMPC_DEPEND_depobj 4876 ? V 4877 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4878 }); 4879 QualType FlagsTy; 4880 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4881 bool HasDepobjDeps = false; 4882 bool HasRegularWithIterators = false; 4883 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4884 llvm::Value *NumOfRegularWithIterators = 4885 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4886 // Calculate number of depobj dependecies and regular deps with the iterators. 4887 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4888 if (D.DepKind == OMPC_DEPEND_depobj) { 4889 SmallVector<llvm::Value *, 4> Sizes = 4890 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4891 for (llvm::Value *Size : Sizes) { 4892 NumOfDepobjElements = 4893 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4894 } 4895 HasDepobjDeps = true; 4896 continue; 4897 } 4898 // Include number of iterations, if any. 4899 4900 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4901 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4902 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4903 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4904 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4905 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4906 NumOfRegularWithIterators = 4907 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4908 } 4909 HasRegularWithIterators = true; 4910 continue; 4911 } 4912 } 4913 4914 QualType KmpDependInfoArrayTy; 4915 if (HasDepobjDeps || HasRegularWithIterators) { 4916 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4917 /*isSigned=*/false); 4918 if (HasDepobjDeps) { 4919 NumOfElements = 4920 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4921 } 4922 if (HasRegularWithIterators) { 4923 NumOfElements = 4924 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4925 } 4926 auto *OVE = new (C) OpaqueValueExpr( 4927 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4928 VK_PRValue); 4929 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4930 RValue::get(NumOfElements)); 4931 KmpDependInfoArrayTy = 4932 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4933 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4934 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4935 // Properly emit variable-sized array. 4936 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4937 ImplicitParamDecl::Other); 4938 CGF.EmitVarDecl(*PD); 4939 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4940 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4941 /*isSigned=*/false); 4942 } else { 4943 KmpDependInfoArrayTy = C.getConstantArrayType( 4944 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4945 ArrayType::Normal, /*IndexTypeQuals=*/0); 4946 DependenciesArray = 4947 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4948 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4949 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4950 /*isSigned=*/false); 4951 } 4952 unsigned Pos = 0; 4953 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4954 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4955 Dependencies[I].IteratorExpr) 4956 continue; 4957 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4958 DependenciesArray); 4959 } 4960 // Copy regular dependecies with iterators. 4961 LValue PosLVal = CGF.MakeAddrLValue( 4962 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4963 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4964 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4965 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4966 !Dependencies[I].IteratorExpr) 4967 continue; 4968 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4969 DependenciesArray); 4970 } 4971 // Copy final depobj arrays without iterators. 4972 if (HasDepobjDeps) { 4973 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4974 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4975 continue; 4976 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4977 DependenciesArray); 4978 } 4979 } 4980 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4981 DependenciesArray, CGF.VoidPtrTy); 4982 return std::make_pair(NumOfElements, DependenciesArray); 4983 } 4984 4985 Address CGOpenMPRuntime::emitDepobjDependClause( 4986 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4987 SourceLocation Loc) { 4988 if (Dependencies.DepExprs.empty()) 4989 return Address::invalid(); 4990 // Process list of dependencies. 4991 ASTContext &C = CGM.getContext(); 4992 Address DependenciesArray = Address::invalid(); 4993 unsigned NumDependencies = Dependencies.DepExprs.size(); 4994 QualType FlagsTy; 4995 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4996 RecordDecl *KmpDependInfoRD = 4997 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4998 4999 llvm::Value *Size; 5000 // Define type kmp_depend_info[<Dependencies.size()>]; 5001 // For depobj reserve one extra element to store the number of elements. 5002 // It is required to handle depobj(x) update(in) construct. 5003 // kmp_depend_info[<Dependencies.size()>] deps; 5004 llvm::Value *NumDepsVal; 5005 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 5006 if (const auto *IE = 5007 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 5008 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 5009 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5010 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5011 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 5012 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 5013 } 5014 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 5015 NumDepsVal); 5016 CharUnits SizeInBytes = 5017 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5018 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5019 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5020 NumDepsVal = 5021 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5022 } else { 5023 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5024 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5025 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5026 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5027 Size = CGM.getSize(Sz.alignTo(Align)); 5028 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5029 } 5030 // Need to allocate on the dynamic memory. 5031 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5032 // Use default allocator. 5033 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5034 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5035 5036 llvm::Value *Addr = 5037 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5038 CGM.getModule(), OMPRTL___kmpc_alloc), 5039 Args, ".dep.arr.addr"); 5040 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5041 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5042 DependenciesArray = Address(Addr, Align); 5043 // Write number of elements in the first element of array for depobj. 5044 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5045 // deps[i].base_addr = NumDependencies; 5046 LValue BaseAddrLVal = CGF.EmitLValueForField( 5047 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5048 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5049 llvm::PointerUnion<unsigned *, LValue *> Pos; 5050 unsigned Idx = 1; 5051 LValue PosLVal; 5052 if (Dependencies.IteratorExpr) { 5053 PosLVal = CGF.MakeAddrLValue( 5054 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5055 C.getSizeType()); 5056 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5057 /*IsInit=*/true); 5058 Pos = &PosLVal; 5059 } else { 5060 Pos = &Idx; 5061 } 5062 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5063 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5064 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5065 return DependenciesArray; 5066 } 5067 5068 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5069 SourceLocation Loc) { 5070 ASTContext &C = CGM.getContext(); 5071 QualType FlagsTy; 5072 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5073 LValue Base = CGF.EmitLoadOfPointerLValue( 5074 DepobjLVal.getAddress(CGF), 5075 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5076 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5077 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5078 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5079 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5080 Addr.getElementType(), Addr.getPointer(), 5081 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5082 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5083 CGF.VoidPtrTy); 5084 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5085 // Use default allocator. 5086 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5087 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5088 5089 // _kmpc_free(gtid, addr, nullptr); 5090 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5091 CGM.getModule(), OMPRTL___kmpc_free), 5092 Args); 5093 } 5094 5095 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5096 OpenMPDependClauseKind NewDepKind, 5097 SourceLocation Loc) { 5098 ASTContext &C = CGM.getContext(); 5099 QualType FlagsTy; 5100 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5101 RecordDecl *KmpDependInfoRD = 5102 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5103 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5104 llvm::Value *NumDeps; 5105 LValue Base; 5106 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5107 5108 Address Begin = Base.getAddress(CGF); 5109 // Cast from pointer to array type to pointer to single element. 5110 llvm::Value *End = CGF.Builder.CreateGEP( 5111 Begin.getElementType(), Begin.getPointer(), NumDeps); 5112 // The basic structure here is a while-do loop. 5113 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5114 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5115 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5116 CGF.EmitBlock(BodyBB); 5117 llvm::PHINode *ElementPHI = 5118 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5119 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5120 Begin = Address(ElementPHI, Begin.getAlignment()); 5121 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5122 Base.getTBAAInfo()); 5123 // deps[i].flags = NewDepKind; 5124 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5125 LValue FlagsLVal = CGF.EmitLValueForField( 5126 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5127 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5128 FlagsLVal); 5129 5130 // Shift the address forward by one element. 5131 Address ElementNext = 5132 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5133 ElementPHI->addIncoming(ElementNext.getPointer(), 5134 CGF.Builder.GetInsertBlock()); 5135 llvm::Value *IsEmpty = 5136 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5137 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5138 // Done. 5139 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5140 } 5141 5142 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5143 const OMPExecutableDirective &D, 5144 llvm::Function *TaskFunction, 5145 QualType SharedsTy, Address Shareds, 5146 const Expr *IfCond, 5147 const OMPTaskDataTy &Data) { 5148 if (!CGF.HaveInsertPoint()) 5149 return; 5150 5151 TaskResultTy Result = 5152 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5153 llvm::Value *NewTask = Result.NewTask; 5154 llvm::Function *TaskEntry = Result.TaskEntry; 5155 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5156 LValue TDBase = Result.TDBase; 5157 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5158 // Process list of dependences. 5159 Address DependenciesArray = Address::invalid(); 5160 llvm::Value *NumOfElements; 5161 std::tie(NumOfElements, DependenciesArray) = 5162 emitDependClause(CGF, Data.Dependences, Loc); 5163 5164 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5165 // libcall. 5166 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5167 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5168 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5169 // list is not empty 5170 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5171 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5172 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5173 llvm::Value *DepTaskArgs[7]; 5174 if (!Data.Dependences.empty()) { 5175 DepTaskArgs[0] = UpLoc; 5176 DepTaskArgs[1] = ThreadID; 5177 DepTaskArgs[2] = NewTask; 5178 DepTaskArgs[3] = NumOfElements; 5179 DepTaskArgs[4] = DependenciesArray.getPointer(); 5180 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5181 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5182 } 5183 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5184 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5185 if (!Data.Tied) { 5186 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5187 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5188 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5189 } 5190 if (!Data.Dependences.empty()) { 5191 CGF.EmitRuntimeCall( 5192 OMPBuilder.getOrCreateRuntimeFunction( 5193 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5194 DepTaskArgs); 5195 } else { 5196 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5197 CGM.getModule(), OMPRTL___kmpc_omp_task), 5198 TaskArgs); 5199 } 5200 // Check if parent region is untied and build return for untied task; 5201 if (auto *Region = 5202 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5203 Region->emitUntiedSwitch(CGF); 5204 }; 5205 5206 llvm::Value *DepWaitTaskArgs[6]; 5207 if (!Data.Dependences.empty()) { 5208 DepWaitTaskArgs[0] = UpLoc; 5209 DepWaitTaskArgs[1] = ThreadID; 5210 DepWaitTaskArgs[2] = NumOfElements; 5211 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5212 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5213 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5214 } 5215 auto &M = CGM.getModule(); 5216 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5217 TaskEntry, &Data, &DepWaitTaskArgs, 5218 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5219 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5220 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5221 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5222 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5223 // is specified. 5224 if (!Data.Dependences.empty()) 5225 CGF.EmitRuntimeCall( 5226 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5227 DepWaitTaskArgs); 5228 // Call proxy_task_entry(gtid, new_task); 5229 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5230 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5231 Action.Enter(CGF); 5232 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5233 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5234 OutlinedFnArgs); 5235 }; 5236 5237 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5238 // kmp_task_t *new_task); 5239 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5240 // kmp_task_t *new_task); 5241 RegionCodeGenTy RCG(CodeGen); 5242 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5243 M, OMPRTL___kmpc_omp_task_begin_if0), 5244 TaskArgs, 5245 OMPBuilder.getOrCreateRuntimeFunction( 5246 M, OMPRTL___kmpc_omp_task_complete_if0), 5247 TaskArgs); 5248 RCG.setAction(Action); 5249 RCG(CGF); 5250 }; 5251 5252 if (IfCond) { 5253 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5254 } else { 5255 RegionCodeGenTy ThenRCG(ThenCodeGen); 5256 ThenRCG(CGF); 5257 } 5258 } 5259 5260 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5261 const OMPLoopDirective &D, 5262 llvm::Function *TaskFunction, 5263 QualType SharedsTy, Address Shareds, 5264 const Expr *IfCond, 5265 const OMPTaskDataTy &Data) { 5266 if (!CGF.HaveInsertPoint()) 5267 return; 5268 TaskResultTy Result = 5269 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5270 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5271 // libcall. 5272 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5273 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5274 // sched, kmp_uint64 grainsize, void *task_dup); 5275 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5276 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5277 llvm::Value *IfVal; 5278 if (IfCond) { 5279 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5280 /*isSigned=*/true); 5281 } else { 5282 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5283 } 5284 5285 LValue LBLVal = CGF.EmitLValueForField( 5286 Result.TDBase, 5287 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5288 const auto *LBVar = 5289 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5290 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5291 LBLVal.getQuals(), 5292 /*IsInitializer=*/true); 5293 LValue UBLVal = CGF.EmitLValueForField( 5294 Result.TDBase, 5295 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5296 const auto *UBVar = 5297 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5298 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5299 UBLVal.getQuals(), 5300 /*IsInitializer=*/true); 5301 LValue StLVal = CGF.EmitLValueForField( 5302 Result.TDBase, 5303 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5304 const auto *StVar = 5305 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5306 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5307 StLVal.getQuals(), 5308 /*IsInitializer=*/true); 5309 // Store reductions address. 5310 LValue RedLVal = CGF.EmitLValueForField( 5311 Result.TDBase, 5312 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5313 if (Data.Reductions) { 5314 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5315 } else { 5316 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5317 CGF.getContext().VoidPtrTy); 5318 } 5319 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5320 llvm::Value *TaskArgs[] = { 5321 UpLoc, 5322 ThreadID, 5323 Result.NewTask, 5324 IfVal, 5325 LBLVal.getPointer(CGF), 5326 UBLVal.getPointer(CGF), 5327 CGF.EmitLoadOfScalar(StLVal, Loc), 5328 llvm::ConstantInt::getSigned( 5329 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5330 llvm::ConstantInt::getSigned( 5331 CGF.IntTy, Data.Schedule.getPointer() 5332 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5333 : NoSchedule), 5334 Data.Schedule.getPointer() 5335 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5336 /*isSigned=*/false) 5337 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5338 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5339 Result.TaskDupFn, CGF.VoidPtrTy) 5340 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5341 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5342 CGM.getModule(), OMPRTL___kmpc_taskloop), 5343 TaskArgs); 5344 } 5345 5346 /// Emit reduction operation for each element of array (required for 5347 /// array sections) LHS op = RHS. 5348 /// \param Type Type of array. 5349 /// \param LHSVar Variable on the left side of the reduction operation 5350 /// (references element of array in original variable). 5351 /// \param RHSVar Variable on the right side of the reduction operation 5352 /// (references element of array in original variable). 5353 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5354 /// RHSVar. 5355 static void EmitOMPAggregateReduction( 5356 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5357 const VarDecl *RHSVar, 5358 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5359 const Expr *, const Expr *)> &RedOpGen, 5360 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5361 const Expr *UpExpr = nullptr) { 5362 // Perform element-by-element initialization. 5363 QualType ElementTy; 5364 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5365 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5366 5367 // Drill down to the base element type on both arrays. 5368 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5369 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5370 5371 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5372 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5373 // Cast from pointer to array type to pointer to single element. 5374 llvm::Value *LHSEnd = 5375 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5376 // The basic structure here is a while-do loop. 5377 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5378 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5379 llvm::Value *IsEmpty = 5380 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5381 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5382 5383 // Enter the loop body, making that address the current address. 5384 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5385 CGF.EmitBlock(BodyBB); 5386 5387 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5388 5389 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5390 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5391 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5392 Address RHSElementCurrent = 5393 Address(RHSElementPHI, 5394 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5395 5396 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5397 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5398 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5399 Address LHSElementCurrent = 5400 Address(LHSElementPHI, 5401 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5402 5403 // Emit copy. 5404 CodeGenFunction::OMPPrivateScope Scope(CGF); 5405 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5406 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5407 Scope.Privatize(); 5408 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5409 Scope.ForceCleanup(); 5410 5411 // Shift the address forward by one element. 5412 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5413 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5414 "omp.arraycpy.dest.element"); 5415 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5416 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5417 "omp.arraycpy.src.element"); 5418 // Check whether we've reached the end. 5419 llvm::Value *Done = 5420 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5421 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5422 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5423 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5424 5425 // Done. 5426 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5427 } 5428 5429 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5430 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5431 /// UDR combiner function. 5432 static void emitReductionCombiner(CodeGenFunction &CGF, 5433 const Expr *ReductionOp) { 5434 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5435 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5436 if (const auto *DRE = 5437 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5438 if (const auto *DRD = 5439 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5440 std::pair<llvm::Function *, llvm::Function *> Reduction = 5441 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5442 RValue Func = RValue::get(Reduction.first); 5443 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5444 CGF.EmitIgnoredExpr(ReductionOp); 5445 return; 5446 } 5447 CGF.EmitIgnoredExpr(ReductionOp); 5448 } 5449 5450 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5451 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5452 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5453 ArrayRef<const Expr *> ReductionOps) { 5454 ASTContext &C = CGM.getContext(); 5455 5456 // void reduction_func(void *LHSArg, void *RHSArg); 5457 FunctionArgList Args; 5458 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5459 ImplicitParamDecl::Other); 5460 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5461 ImplicitParamDecl::Other); 5462 Args.push_back(&LHSArg); 5463 Args.push_back(&RHSArg); 5464 const auto &CGFI = 5465 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5466 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5467 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5468 llvm::GlobalValue::InternalLinkage, Name, 5469 &CGM.getModule()); 5470 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5471 Fn->setDoesNotRecurse(); 5472 CodeGenFunction CGF(CGM); 5473 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5474 5475 // Dst = (void*[n])(LHSArg); 5476 // Src = (void*[n])(RHSArg); 5477 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5478 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5479 ArgsType), CGF.getPointerAlign()); 5480 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5481 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5482 ArgsType), CGF.getPointerAlign()); 5483 5484 // ... 5485 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5486 // ... 5487 CodeGenFunction::OMPPrivateScope Scope(CGF); 5488 auto IPriv = Privates.begin(); 5489 unsigned Idx = 0; 5490 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5491 const auto *RHSVar = 5492 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5493 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5494 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5495 }); 5496 const auto *LHSVar = 5497 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5498 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5499 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5500 }); 5501 QualType PrivTy = (*IPriv)->getType(); 5502 if (PrivTy->isVariablyModifiedType()) { 5503 // Get array size and emit VLA type. 5504 ++Idx; 5505 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5506 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5507 const VariableArrayType *VLA = 5508 CGF.getContext().getAsVariableArrayType(PrivTy); 5509 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5510 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5511 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5512 CGF.EmitVariablyModifiedType(PrivTy); 5513 } 5514 } 5515 Scope.Privatize(); 5516 IPriv = Privates.begin(); 5517 auto ILHS = LHSExprs.begin(); 5518 auto IRHS = RHSExprs.begin(); 5519 for (const Expr *E : ReductionOps) { 5520 if ((*IPriv)->getType()->isArrayType()) { 5521 // Emit reduction for array section. 5522 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5523 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5524 EmitOMPAggregateReduction( 5525 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5526 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5527 emitReductionCombiner(CGF, E); 5528 }); 5529 } else { 5530 // Emit reduction for array subscript or single variable. 5531 emitReductionCombiner(CGF, E); 5532 } 5533 ++IPriv; 5534 ++ILHS; 5535 ++IRHS; 5536 } 5537 Scope.ForceCleanup(); 5538 CGF.FinishFunction(); 5539 return Fn; 5540 } 5541 5542 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5543 const Expr *ReductionOp, 5544 const Expr *PrivateRef, 5545 const DeclRefExpr *LHS, 5546 const DeclRefExpr *RHS) { 5547 if (PrivateRef->getType()->isArrayType()) { 5548 // Emit reduction for array section. 5549 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5550 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5551 EmitOMPAggregateReduction( 5552 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5553 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5554 emitReductionCombiner(CGF, ReductionOp); 5555 }); 5556 } else { 5557 // Emit reduction for array subscript or single variable. 5558 emitReductionCombiner(CGF, ReductionOp); 5559 } 5560 } 5561 5562 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5563 ArrayRef<const Expr *> Privates, 5564 ArrayRef<const Expr *> LHSExprs, 5565 ArrayRef<const Expr *> RHSExprs, 5566 ArrayRef<const Expr *> ReductionOps, 5567 ReductionOptionsTy Options) { 5568 if (!CGF.HaveInsertPoint()) 5569 return; 5570 5571 bool WithNowait = Options.WithNowait; 5572 bool SimpleReduction = Options.SimpleReduction; 5573 5574 // Next code should be emitted for reduction: 5575 // 5576 // static kmp_critical_name lock = { 0 }; 5577 // 5578 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5579 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5580 // ... 5581 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5582 // *(Type<n>-1*)rhs[<n>-1]); 5583 // } 5584 // 5585 // ... 5586 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5587 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5588 // RedList, reduce_func, &<lock>)) { 5589 // case 1: 5590 // ... 5591 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5592 // ... 5593 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5594 // break; 5595 // case 2: 5596 // ... 5597 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5598 // ... 5599 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5600 // break; 5601 // default:; 5602 // } 5603 // 5604 // if SimpleReduction is true, only the next code is generated: 5605 // ... 5606 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5607 // ... 5608 5609 ASTContext &C = CGM.getContext(); 5610 5611 if (SimpleReduction) { 5612 CodeGenFunction::RunCleanupsScope Scope(CGF); 5613 auto IPriv = Privates.begin(); 5614 auto ILHS = LHSExprs.begin(); 5615 auto IRHS = RHSExprs.begin(); 5616 for (const Expr *E : ReductionOps) { 5617 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5618 cast<DeclRefExpr>(*IRHS)); 5619 ++IPriv; 5620 ++ILHS; 5621 ++IRHS; 5622 } 5623 return; 5624 } 5625 5626 // 1. Build a list of reduction variables. 5627 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5628 auto Size = RHSExprs.size(); 5629 for (const Expr *E : Privates) { 5630 if (E->getType()->isVariablyModifiedType()) 5631 // Reserve place for array size. 5632 ++Size; 5633 } 5634 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5635 QualType ReductionArrayTy = 5636 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5637 /*IndexTypeQuals=*/0); 5638 Address ReductionList = 5639 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5640 auto IPriv = Privates.begin(); 5641 unsigned Idx = 0; 5642 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5643 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5644 CGF.Builder.CreateStore( 5645 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5646 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5647 Elem); 5648 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5649 // Store array size. 5650 ++Idx; 5651 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5652 llvm::Value *Size = CGF.Builder.CreateIntCast( 5653 CGF.getVLASize( 5654 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5655 .NumElts, 5656 CGF.SizeTy, /*isSigned=*/false); 5657 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5658 Elem); 5659 } 5660 } 5661 5662 // 2. Emit reduce_func(). 5663 llvm::Function *ReductionFn = emitReductionFunction( 5664 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5665 LHSExprs, RHSExprs, ReductionOps); 5666 5667 // 3. Create static kmp_critical_name lock = { 0 }; 5668 std::string Name = getName({"reduction"}); 5669 llvm::Value *Lock = getCriticalRegionLock(Name); 5670 5671 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5672 // RedList, reduce_func, &<lock>); 5673 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5674 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5675 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5676 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5677 ReductionList.getPointer(), CGF.VoidPtrTy); 5678 llvm::Value *Args[] = { 5679 IdentTLoc, // ident_t *<loc> 5680 ThreadId, // i32 <gtid> 5681 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5682 ReductionArrayTySize, // size_type sizeof(RedList) 5683 RL, // void *RedList 5684 ReductionFn, // void (*) (void *, void *) <reduce_func> 5685 Lock // kmp_critical_name *&<lock> 5686 }; 5687 llvm::Value *Res = CGF.EmitRuntimeCall( 5688 OMPBuilder.getOrCreateRuntimeFunction( 5689 CGM.getModule(), 5690 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5691 Args); 5692 5693 // 5. Build switch(res) 5694 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5695 llvm::SwitchInst *SwInst = 5696 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5697 5698 // 6. Build case 1: 5699 // ... 5700 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5701 // ... 5702 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5703 // break; 5704 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5705 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5706 CGF.EmitBlock(Case1BB); 5707 5708 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5709 llvm::Value *EndArgs[] = { 5710 IdentTLoc, // ident_t *<loc> 5711 ThreadId, // i32 <gtid> 5712 Lock // kmp_critical_name *&<lock> 5713 }; 5714 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5715 CodeGenFunction &CGF, PrePostActionTy &Action) { 5716 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5717 auto IPriv = Privates.begin(); 5718 auto ILHS = LHSExprs.begin(); 5719 auto IRHS = RHSExprs.begin(); 5720 for (const Expr *E : ReductionOps) { 5721 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5722 cast<DeclRefExpr>(*IRHS)); 5723 ++IPriv; 5724 ++ILHS; 5725 ++IRHS; 5726 } 5727 }; 5728 RegionCodeGenTy RCG(CodeGen); 5729 CommonActionTy Action( 5730 nullptr, llvm::None, 5731 OMPBuilder.getOrCreateRuntimeFunction( 5732 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5733 : OMPRTL___kmpc_end_reduce), 5734 EndArgs); 5735 RCG.setAction(Action); 5736 RCG(CGF); 5737 5738 CGF.EmitBranch(DefaultBB); 5739 5740 // 7. Build case 2: 5741 // ... 5742 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5743 // ... 5744 // break; 5745 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5746 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5747 CGF.EmitBlock(Case2BB); 5748 5749 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5750 CodeGenFunction &CGF, PrePostActionTy &Action) { 5751 auto ILHS = LHSExprs.begin(); 5752 auto IRHS = RHSExprs.begin(); 5753 auto IPriv = Privates.begin(); 5754 for (const Expr *E : ReductionOps) { 5755 const Expr *XExpr = nullptr; 5756 const Expr *EExpr = nullptr; 5757 const Expr *UpExpr = nullptr; 5758 BinaryOperatorKind BO = BO_Comma; 5759 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5760 if (BO->getOpcode() == BO_Assign) { 5761 XExpr = BO->getLHS(); 5762 UpExpr = BO->getRHS(); 5763 } 5764 } 5765 // Try to emit update expression as a simple atomic. 5766 const Expr *RHSExpr = UpExpr; 5767 if (RHSExpr) { 5768 // Analyze RHS part of the whole expression. 5769 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5770 RHSExpr->IgnoreParenImpCasts())) { 5771 // If this is a conditional operator, analyze its condition for 5772 // min/max reduction operator. 5773 RHSExpr = ACO->getCond(); 5774 } 5775 if (const auto *BORHS = 5776 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5777 EExpr = BORHS->getRHS(); 5778 BO = BORHS->getOpcode(); 5779 } 5780 } 5781 if (XExpr) { 5782 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5783 auto &&AtomicRedGen = [BO, VD, 5784 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5785 const Expr *EExpr, const Expr *UpExpr) { 5786 LValue X = CGF.EmitLValue(XExpr); 5787 RValue E; 5788 if (EExpr) 5789 E = CGF.EmitAnyExpr(EExpr); 5790 CGF.EmitOMPAtomicSimpleUpdateExpr( 5791 X, E, BO, /*IsXLHSInRHSPart=*/true, 5792 llvm::AtomicOrdering::Monotonic, Loc, 5793 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5794 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5795 PrivateScope.addPrivate( 5796 VD, [&CGF, VD, XRValue, Loc]() { 5797 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5798 CGF.emitOMPSimpleStore( 5799 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5800 VD->getType().getNonReferenceType(), Loc); 5801 return LHSTemp; 5802 }); 5803 (void)PrivateScope.Privatize(); 5804 return CGF.EmitAnyExpr(UpExpr); 5805 }); 5806 }; 5807 if ((*IPriv)->getType()->isArrayType()) { 5808 // Emit atomic reduction for array section. 5809 const auto *RHSVar = 5810 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5811 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5812 AtomicRedGen, XExpr, EExpr, UpExpr); 5813 } else { 5814 // Emit atomic reduction for array subscript or single variable. 5815 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5816 } 5817 } else { 5818 // Emit as a critical region. 5819 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5820 const Expr *, const Expr *) { 5821 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5822 std::string Name = RT.getName({"atomic_reduction"}); 5823 RT.emitCriticalRegion( 5824 CGF, Name, 5825 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5826 Action.Enter(CGF); 5827 emitReductionCombiner(CGF, E); 5828 }, 5829 Loc); 5830 }; 5831 if ((*IPriv)->getType()->isArrayType()) { 5832 const auto *LHSVar = 5833 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5834 const auto *RHSVar = 5835 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5836 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5837 CritRedGen); 5838 } else { 5839 CritRedGen(CGF, nullptr, nullptr, nullptr); 5840 } 5841 } 5842 ++ILHS; 5843 ++IRHS; 5844 ++IPriv; 5845 } 5846 }; 5847 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5848 if (!WithNowait) { 5849 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5850 llvm::Value *EndArgs[] = { 5851 IdentTLoc, // ident_t *<loc> 5852 ThreadId, // i32 <gtid> 5853 Lock // kmp_critical_name *&<lock> 5854 }; 5855 CommonActionTy Action(nullptr, llvm::None, 5856 OMPBuilder.getOrCreateRuntimeFunction( 5857 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5858 EndArgs); 5859 AtomicRCG.setAction(Action); 5860 AtomicRCG(CGF); 5861 } else { 5862 AtomicRCG(CGF); 5863 } 5864 5865 CGF.EmitBranch(DefaultBB); 5866 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5867 } 5868 5869 /// Generates unique name for artificial threadprivate variables. 5870 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5871 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5872 const Expr *Ref) { 5873 SmallString<256> Buffer; 5874 llvm::raw_svector_ostream Out(Buffer); 5875 const clang::DeclRefExpr *DE; 5876 const VarDecl *D = ::getBaseDecl(Ref, DE); 5877 if (!D) 5878 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5879 D = D->getCanonicalDecl(); 5880 std::string Name = CGM.getOpenMPRuntime().getName( 5881 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5882 Out << Prefix << Name << "_" 5883 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5884 return std::string(Out.str()); 5885 } 5886 5887 /// Emits reduction initializer function: 5888 /// \code 5889 /// void @.red_init(void* %arg, void* %orig) { 5890 /// %0 = bitcast void* %arg to <type>* 5891 /// store <type> <init>, <type>* %0 5892 /// ret void 5893 /// } 5894 /// \endcode 5895 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5896 SourceLocation Loc, 5897 ReductionCodeGen &RCG, unsigned N) { 5898 ASTContext &C = CGM.getContext(); 5899 QualType VoidPtrTy = C.VoidPtrTy; 5900 VoidPtrTy.addRestrict(); 5901 FunctionArgList Args; 5902 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5903 ImplicitParamDecl::Other); 5904 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5905 ImplicitParamDecl::Other); 5906 Args.emplace_back(&Param); 5907 Args.emplace_back(&ParamOrig); 5908 const auto &FnInfo = 5909 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5910 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5911 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5912 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5913 Name, &CGM.getModule()); 5914 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5915 Fn->setDoesNotRecurse(); 5916 CodeGenFunction CGF(CGM); 5917 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5918 Address PrivateAddr = CGF.EmitLoadOfPointer( 5919 CGF.GetAddrOfLocalVar(&Param), 5920 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5921 llvm::Value *Size = nullptr; 5922 // If the size of the reduction item is non-constant, load it from global 5923 // threadprivate variable. 5924 if (RCG.getSizes(N).second) { 5925 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5926 CGF, CGM.getContext().getSizeType(), 5927 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5928 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5929 CGM.getContext().getSizeType(), Loc); 5930 } 5931 RCG.emitAggregateType(CGF, N, Size); 5932 LValue OrigLVal; 5933 // If initializer uses initializer from declare reduction construct, emit a 5934 // pointer to the address of the original reduction item (reuired by reduction 5935 // initializer) 5936 if (RCG.usesReductionInitializer(N)) { 5937 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5938 SharedAddr = CGF.EmitLoadOfPointer( 5939 SharedAddr, 5940 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5941 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5942 } else { 5943 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5944 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5945 CGM.getContext().VoidPtrTy); 5946 } 5947 // Emit the initializer: 5948 // %0 = bitcast void* %arg to <type>* 5949 // store <type> <init>, <type>* %0 5950 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5951 [](CodeGenFunction &) { return false; }); 5952 CGF.FinishFunction(); 5953 return Fn; 5954 } 5955 5956 /// Emits reduction combiner function: 5957 /// \code 5958 /// void @.red_comb(void* %arg0, void* %arg1) { 5959 /// %lhs = bitcast void* %arg0 to <type>* 5960 /// %rhs = bitcast void* %arg1 to <type>* 5961 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5962 /// store <type> %2, <type>* %lhs 5963 /// ret void 5964 /// } 5965 /// \endcode 5966 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5967 SourceLocation Loc, 5968 ReductionCodeGen &RCG, unsigned N, 5969 const Expr *ReductionOp, 5970 const Expr *LHS, const Expr *RHS, 5971 const Expr *PrivateRef) { 5972 ASTContext &C = CGM.getContext(); 5973 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5974 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5975 FunctionArgList Args; 5976 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5977 C.VoidPtrTy, ImplicitParamDecl::Other); 5978 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5979 ImplicitParamDecl::Other); 5980 Args.emplace_back(&ParamInOut); 5981 Args.emplace_back(&ParamIn); 5982 const auto &FnInfo = 5983 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5984 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5985 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5986 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5987 Name, &CGM.getModule()); 5988 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5989 Fn->setDoesNotRecurse(); 5990 CodeGenFunction CGF(CGM); 5991 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5992 llvm::Value *Size = nullptr; 5993 // If the size of the reduction item is non-constant, load it from global 5994 // threadprivate variable. 5995 if (RCG.getSizes(N).second) { 5996 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5997 CGF, CGM.getContext().getSizeType(), 5998 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5999 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6000 CGM.getContext().getSizeType(), Loc); 6001 } 6002 RCG.emitAggregateType(CGF, N, Size); 6003 // Remap lhs and rhs variables to the addresses of the function arguments. 6004 // %lhs = bitcast void* %arg0 to <type>* 6005 // %rhs = bitcast void* %arg1 to <type>* 6006 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6007 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6008 // Pull out the pointer to the variable. 6009 Address PtrAddr = CGF.EmitLoadOfPointer( 6010 CGF.GetAddrOfLocalVar(&ParamInOut), 6011 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6012 return CGF.Builder.CreateElementBitCast( 6013 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6014 }); 6015 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6016 // Pull out the pointer to the variable. 6017 Address PtrAddr = CGF.EmitLoadOfPointer( 6018 CGF.GetAddrOfLocalVar(&ParamIn), 6019 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6020 return CGF.Builder.CreateElementBitCast( 6021 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6022 }); 6023 PrivateScope.Privatize(); 6024 // Emit the combiner body: 6025 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6026 // store <type> %2, <type>* %lhs 6027 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6028 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6029 cast<DeclRefExpr>(RHS)); 6030 CGF.FinishFunction(); 6031 return Fn; 6032 } 6033 6034 /// Emits reduction finalizer function: 6035 /// \code 6036 /// void @.red_fini(void* %arg) { 6037 /// %0 = bitcast void* %arg to <type>* 6038 /// <destroy>(<type>* %0) 6039 /// ret void 6040 /// } 6041 /// \endcode 6042 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6043 SourceLocation Loc, 6044 ReductionCodeGen &RCG, unsigned N) { 6045 if (!RCG.needCleanups(N)) 6046 return nullptr; 6047 ASTContext &C = CGM.getContext(); 6048 FunctionArgList Args; 6049 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6050 ImplicitParamDecl::Other); 6051 Args.emplace_back(&Param); 6052 const auto &FnInfo = 6053 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6054 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6055 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6056 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6057 Name, &CGM.getModule()); 6058 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6059 Fn->setDoesNotRecurse(); 6060 CodeGenFunction CGF(CGM); 6061 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6062 Address PrivateAddr = CGF.EmitLoadOfPointer( 6063 CGF.GetAddrOfLocalVar(&Param), 6064 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6065 llvm::Value *Size = nullptr; 6066 // If the size of the reduction item is non-constant, load it from global 6067 // threadprivate variable. 6068 if (RCG.getSizes(N).second) { 6069 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6070 CGF, CGM.getContext().getSizeType(), 6071 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6072 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6073 CGM.getContext().getSizeType(), Loc); 6074 } 6075 RCG.emitAggregateType(CGF, N, Size); 6076 // Emit the finalizer body: 6077 // <destroy>(<type>* %0) 6078 RCG.emitCleanups(CGF, N, PrivateAddr); 6079 CGF.FinishFunction(Loc); 6080 return Fn; 6081 } 6082 6083 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6084 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6085 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6086 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6087 return nullptr; 6088 6089 // Build typedef struct: 6090 // kmp_taskred_input { 6091 // void *reduce_shar; // shared reduction item 6092 // void *reduce_orig; // original reduction item used for initialization 6093 // size_t reduce_size; // size of data item 6094 // void *reduce_init; // data initialization routine 6095 // void *reduce_fini; // data finalization routine 6096 // void *reduce_comb; // data combiner routine 6097 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6098 // } kmp_taskred_input_t; 6099 ASTContext &C = CGM.getContext(); 6100 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6101 RD->startDefinition(); 6102 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6103 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6104 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6105 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6106 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6107 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6108 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6109 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6110 RD->completeDefinition(); 6111 QualType RDType = C.getRecordType(RD); 6112 unsigned Size = Data.ReductionVars.size(); 6113 llvm::APInt ArraySize(/*numBits=*/64, Size); 6114 QualType ArrayRDType = C.getConstantArrayType( 6115 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6116 // kmp_task_red_input_t .rd_input.[Size]; 6117 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6118 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6119 Data.ReductionCopies, Data.ReductionOps); 6120 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6121 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6122 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6123 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6124 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6125 TaskRedInput.getPointer(), Idxs, 6126 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6127 ".rd_input.gep."); 6128 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6129 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6130 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6131 RCG.emitSharedOrigLValue(CGF, Cnt); 6132 llvm::Value *CastedShared = 6133 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6134 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6135 // ElemLVal.reduce_orig = &Origs[Cnt]; 6136 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6137 llvm::Value *CastedOrig = 6138 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6139 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6140 RCG.emitAggregateType(CGF, Cnt); 6141 llvm::Value *SizeValInChars; 6142 llvm::Value *SizeVal; 6143 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6144 // We use delayed creation/initialization for VLAs and array sections. It is 6145 // required because runtime does not provide the way to pass the sizes of 6146 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6147 // threadprivate global variables are used to store these values and use 6148 // them in the functions. 6149 bool DelayedCreation = !!SizeVal; 6150 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6151 /*isSigned=*/false); 6152 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6153 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6154 // ElemLVal.reduce_init = init; 6155 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6156 llvm::Value *InitAddr = 6157 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6158 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6159 // ElemLVal.reduce_fini = fini; 6160 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6161 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6162 llvm::Value *FiniAddr = Fini 6163 ? CGF.EmitCastToVoidPtr(Fini) 6164 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6165 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6166 // ElemLVal.reduce_comb = comb; 6167 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6168 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6169 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6170 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6171 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6172 // ElemLVal.flags = 0; 6173 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6174 if (DelayedCreation) { 6175 CGF.EmitStoreOfScalar( 6176 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6177 FlagsLVal); 6178 } else 6179 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6180 FlagsLVal.getType()); 6181 } 6182 if (Data.IsReductionWithTaskMod) { 6183 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6184 // is_ws, int num, void *data); 6185 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6186 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6187 CGM.IntTy, /*isSigned=*/true); 6188 llvm::Value *Args[] = { 6189 IdentTLoc, GTid, 6190 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6191 /*isSigned=*/true), 6192 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6193 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6194 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6195 return CGF.EmitRuntimeCall( 6196 OMPBuilder.getOrCreateRuntimeFunction( 6197 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6198 Args); 6199 } 6200 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6201 llvm::Value *Args[] = { 6202 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6203 /*isSigned=*/true), 6204 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6205 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6206 CGM.VoidPtrTy)}; 6207 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6208 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6209 Args); 6210 } 6211 6212 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6213 SourceLocation Loc, 6214 bool IsWorksharingReduction) { 6215 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6216 // is_ws, int num, void *data); 6217 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6218 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6219 CGM.IntTy, /*isSigned=*/true); 6220 llvm::Value *Args[] = {IdentTLoc, GTid, 6221 llvm::ConstantInt::get(CGM.IntTy, 6222 IsWorksharingReduction ? 1 : 0, 6223 /*isSigned=*/true)}; 6224 (void)CGF.EmitRuntimeCall( 6225 OMPBuilder.getOrCreateRuntimeFunction( 6226 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6227 Args); 6228 } 6229 6230 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6231 SourceLocation Loc, 6232 ReductionCodeGen &RCG, 6233 unsigned N) { 6234 auto Sizes = RCG.getSizes(N); 6235 // Emit threadprivate global variable if the type is non-constant 6236 // (Sizes.second = nullptr). 6237 if (Sizes.second) { 6238 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6239 /*isSigned=*/false); 6240 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6241 CGF, CGM.getContext().getSizeType(), 6242 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6243 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6244 } 6245 } 6246 6247 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6248 SourceLocation Loc, 6249 llvm::Value *ReductionsPtr, 6250 LValue SharedLVal) { 6251 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6252 // *d); 6253 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6254 CGM.IntTy, 6255 /*isSigned=*/true), 6256 ReductionsPtr, 6257 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6258 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6259 return Address( 6260 CGF.EmitRuntimeCall( 6261 OMPBuilder.getOrCreateRuntimeFunction( 6262 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6263 Args), 6264 SharedLVal.getAlignment()); 6265 } 6266 6267 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6268 SourceLocation Loc) { 6269 if (!CGF.HaveInsertPoint()) 6270 return; 6271 6272 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6273 OMPBuilder.createTaskwait(CGF.Builder); 6274 } else { 6275 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6276 // global_tid); 6277 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6278 // Ignore return result until untied tasks are supported. 6279 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6280 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6281 Args); 6282 } 6283 6284 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6285 Region->emitUntiedSwitch(CGF); 6286 } 6287 6288 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6289 OpenMPDirectiveKind InnerKind, 6290 const RegionCodeGenTy &CodeGen, 6291 bool HasCancel) { 6292 if (!CGF.HaveInsertPoint()) 6293 return; 6294 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6295 InnerKind != OMPD_critical && 6296 InnerKind != OMPD_master && 6297 InnerKind != OMPD_masked); 6298 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6299 } 6300 6301 namespace { 6302 enum RTCancelKind { 6303 CancelNoreq = 0, 6304 CancelParallel = 1, 6305 CancelLoop = 2, 6306 CancelSections = 3, 6307 CancelTaskgroup = 4 6308 }; 6309 } // anonymous namespace 6310 6311 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6312 RTCancelKind CancelKind = CancelNoreq; 6313 if (CancelRegion == OMPD_parallel) 6314 CancelKind = CancelParallel; 6315 else if (CancelRegion == OMPD_for) 6316 CancelKind = CancelLoop; 6317 else if (CancelRegion == OMPD_sections) 6318 CancelKind = CancelSections; 6319 else { 6320 assert(CancelRegion == OMPD_taskgroup); 6321 CancelKind = CancelTaskgroup; 6322 } 6323 return CancelKind; 6324 } 6325 6326 void CGOpenMPRuntime::emitCancellationPointCall( 6327 CodeGenFunction &CGF, SourceLocation Loc, 6328 OpenMPDirectiveKind CancelRegion) { 6329 if (!CGF.HaveInsertPoint()) 6330 return; 6331 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6332 // global_tid, kmp_int32 cncl_kind); 6333 if (auto *OMPRegionInfo = 6334 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6335 // For 'cancellation point taskgroup', the task region info may not have a 6336 // cancel. This may instead happen in another adjacent task. 6337 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6338 llvm::Value *Args[] = { 6339 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6340 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6341 // Ignore return result until untied tasks are supported. 6342 llvm::Value *Result = CGF.EmitRuntimeCall( 6343 OMPBuilder.getOrCreateRuntimeFunction( 6344 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6345 Args); 6346 // if (__kmpc_cancellationpoint()) { 6347 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6348 // exit from construct; 6349 // } 6350 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6351 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6352 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6353 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6354 CGF.EmitBlock(ExitBB); 6355 if (CancelRegion == OMPD_parallel) 6356 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6357 // exit from construct; 6358 CodeGenFunction::JumpDest CancelDest = 6359 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6360 CGF.EmitBranchThroughCleanup(CancelDest); 6361 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6362 } 6363 } 6364 } 6365 6366 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6367 const Expr *IfCond, 6368 OpenMPDirectiveKind CancelRegion) { 6369 if (!CGF.HaveInsertPoint()) 6370 return; 6371 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6372 // kmp_int32 cncl_kind); 6373 auto &M = CGM.getModule(); 6374 if (auto *OMPRegionInfo = 6375 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6376 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6377 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6378 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6379 llvm::Value *Args[] = { 6380 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6381 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6382 // Ignore return result until untied tasks are supported. 6383 llvm::Value *Result = CGF.EmitRuntimeCall( 6384 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6385 // if (__kmpc_cancel()) { 6386 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6387 // exit from construct; 6388 // } 6389 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6390 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6391 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6392 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6393 CGF.EmitBlock(ExitBB); 6394 if (CancelRegion == OMPD_parallel) 6395 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6396 // exit from construct; 6397 CodeGenFunction::JumpDest CancelDest = 6398 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6399 CGF.EmitBranchThroughCleanup(CancelDest); 6400 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6401 }; 6402 if (IfCond) { 6403 emitIfClause(CGF, IfCond, ThenGen, 6404 [](CodeGenFunction &, PrePostActionTy &) {}); 6405 } else { 6406 RegionCodeGenTy ThenRCG(ThenGen); 6407 ThenRCG(CGF); 6408 } 6409 } 6410 } 6411 6412 namespace { 6413 /// Cleanup action for uses_allocators support. 6414 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6415 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6416 6417 public: 6418 OMPUsesAllocatorsActionTy( 6419 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6420 : Allocators(Allocators) {} 6421 void Enter(CodeGenFunction &CGF) override { 6422 if (!CGF.HaveInsertPoint()) 6423 return; 6424 for (const auto &AllocatorData : Allocators) { 6425 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6426 CGF, AllocatorData.first, AllocatorData.second); 6427 } 6428 } 6429 void Exit(CodeGenFunction &CGF) override { 6430 if (!CGF.HaveInsertPoint()) 6431 return; 6432 for (const auto &AllocatorData : Allocators) { 6433 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6434 AllocatorData.first); 6435 } 6436 } 6437 }; 6438 } // namespace 6439 6440 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6441 const OMPExecutableDirective &D, StringRef ParentName, 6442 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6443 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6444 assert(!ParentName.empty() && "Invalid target region parent name!"); 6445 HasEmittedTargetRegion = true; 6446 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6447 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6448 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6449 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6450 if (!D.AllocatorTraits) 6451 continue; 6452 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6453 } 6454 } 6455 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6456 CodeGen.setAction(UsesAllocatorAction); 6457 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6458 IsOffloadEntry, CodeGen); 6459 } 6460 6461 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6462 const Expr *Allocator, 6463 const Expr *AllocatorTraits) { 6464 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6465 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6466 // Use default memspace handle. 6467 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6468 llvm::Value *NumTraits = llvm::ConstantInt::get( 6469 CGF.IntTy, cast<ConstantArrayType>( 6470 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6471 ->getSize() 6472 .getLimitedValue()); 6473 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6474 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6475 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6476 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6477 AllocatorTraitsLVal.getBaseInfo(), 6478 AllocatorTraitsLVal.getTBAAInfo()); 6479 llvm::Value *Traits = 6480 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6481 6482 llvm::Value *AllocatorVal = 6483 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6484 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6485 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6486 // Store to allocator. 6487 CGF.EmitVarDecl(*cast<VarDecl>( 6488 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6489 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6490 AllocatorVal = 6491 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6492 Allocator->getType(), Allocator->getExprLoc()); 6493 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6494 } 6495 6496 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6497 const Expr *Allocator) { 6498 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6499 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6500 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6501 llvm::Value *AllocatorVal = 6502 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6503 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6504 CGF.getContext().VoidPtrTy, 6505 Allocator->getExprLoc()); 6506 (void)CGF.EmitRuntimeCall( 6507 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6508 OMPRTL___kmpc_destroy_allocator), 6509 {ThreadId, AllocatorVal}); 6510 } 6511 6512 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6513 const OMPExecutableDirective &D, StringRef ParentName, 6514 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6515 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6516 // Create a unique name for the entry function using the source location 6517 // information of the current target region. The name will be something like: 6518 // 6519 // __omp_offloading_DD_FFFF_PP_lBB 6520 // 6521 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6522 // mangled name of the function that encloses the target region and BB is the 6523 // line number of the target region. 6524 6525 unsigned DeviceID; 6526 unsigned FileID; 6527 unsigned Line; 6528 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6529 Line); 6530 SmallString<64> EntryFnName; 6531 { 6532 llvm::raw_svector_ostream OS(EntryFnName); 6533 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6534 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6535 } 6536 6537 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6538 6539 CodeGenFunction CGF(CGM, true); 6540 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6541 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6542 6543 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6544 6545 // If this target outline function is not an offload entry, we don't need to 6546 // register it. 6547 if (!IsOffloadEntry) 6548 return; 6549 6550 // The target region ID is used by the runtime library to identify the current 6551 // target region, so it only has to be unique and not necessarily point to 6552 // anything. It could be the pointer to the outlined function that implements 6553 // the target region, but we aren't using that so that the compiler doesn't 6554 // need to keep that, and could therefore inline the host function if proven 6555 // worthwhile during optimization. In the other hand, if emitting code for the 6556 // device, the ID has to be the function address so that it can retrieved from 6557 // the offloading entry and launched by the runtime library. We also mark the 6558 // outlined function to have external linkage in case we are emitting code for 6559 // the device, because these functions will be entry points to the device. 6560 6561 if (CGM.getLangOpts().OpenMPIsDevice) { 6562 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6563 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6564 OutlinedFn->setDSOLocal(false); 6565 if (CGM.getTriple().isAMDGCN()) 6566 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6567 } else { 6568 std::string Name = getName({EntryFnName, "region_id"}); 6569 OutlinedFnID = new llvm::GlobalVariable( 6570 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6571 llvm::GlobalValue::WeakAnyLinkage, 6572 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6573 } 6574 6575 // Register the information for the entry associated with this target region. 6576 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6577 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6578 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6579 6580 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6581 int32_t DefaultValTeams = -1; 6582 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6583 if (DefaultValTeams > 0) { 6584 OutlinedFn->addFnAttr("omp_target_num_teams", 6585 std::to_string(DefaultValTeams)); 6586 } 6587 int32_t DefaultValThreads = -1; 6588 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6589 if (DefaultValThreads > 0) { 6590 OutlinedFn->addFnAttr("omp_target_thread_limit", 6591 std::to_string(DefaultValThreads)); 6592 } 6593 } 6594 6595 /// Checks if the expression is constant or does not have non-trivial function 6596 /// calls. 6597 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6598 // We can skip constant expressions. 6599 // We can skip expressions with trivial calls or simple expressions. 6600 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6601 !E->hasNonTrivialCall(Ctx)) && 6602 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6603 } 6604 6605 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6606 const Stmt *Body) { 6607 const Stmt *Child = Body->IgnoreContainers(); 6608 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6609 Child = nullptr; 6610 for (const Stmt *S : C->body()) { 6611 if (const auto *E = dyn_cast<Expr>(S)) { 6612 if (isTrivial(Ctx, E)) 6613 continue; 6614 } 6615 // Some of the statements can be ignored. 6616 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6617 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6618 continue; 6619 // Analyze declarations. 6620 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6621 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6622 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6623 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6624 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6625 isa<UsingDirectiveDecl>(D) || 6626 isa<OMPDeclareReductionDecl>(D) || 6627 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6628 return true; 6629 const auto *VD = dyn_cast<VarDecl>(D); 6630 if (!VD) 6631 return false; 6632 return VD->hasGlobalStorage() || !VD->isUsed(); 6633 })) 6634 continue; 6635 } 6636 // Found multiple children - cannot get the one child only. 6637 if (Child) 6638 return nullptr; 6639 Child = S; 6640 } 6641 if (Child) 6642 Child = Child->IgnoreContainers(); 6643 } 6644 return Child; 6645 } 6646 6647 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6648 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6649 int32_t &DefaultVal) { 6650 6651 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6652 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6653 "Expected target-based executable directive."); 6654 switch (DirectiveKind) { 6655 case OMPD_target: { 6656 const auto *CS = D.getInnermostCapturedStmt(); 6657 const auto *Body = 6658 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6659 const Stmt *ChildStmt = 6660 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6661 if (const auto *NestedDir = 6662 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6663 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6664 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6665 const Expr *NumTeams = 6666 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6667 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6668 if (auto Constant = 6669 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6670 DefaultVal = Constant->getExtValue(); 6671 return NumTeams; 6672 } 6673 DefaultVal = 0; 6674 return nullptr; 6675 } 6676 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6677 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6678 DefaultVal = 1; 6679 return nullptr; 6680 } 6681 DefaultVal = 1; 6682 return nullptr; 6683 } 6684 // A value of -1 is used to check if we need to emit no teams region 6685 DefaultVal = -1; 6686 return nullptr; 6687 } 6688 case OMPD_target_teams: 6689 case OMPD_target_teams_distribute: 6690 case OMPD_target_teams_distribute_simd: 6691 case OMPD_target_teams_distribute_parallel_for: 6692 case OMPD_target_teams_distribute_parallel_for_simd: { 6693 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6694 const Expr *NumTeams = 6695 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6696 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6697 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6698 DefaultVal = Constant->getExtValue(); 6699 return NumTeams; 6700 } 6701 DefaultVal = 0; 6702 return nullptr; 6703 } 6704 case OMPD_target_parallel: 6705 case OMPD_target_parallel_for: 6706 case OMPD_target_parallel_for_simd: 6707 case OMPD_target_simd: 6708 DefaultVal = 1; 6709 return nullptr; 6710 case OMPD_parallel: 6711 case OMPD_for: 6712 case OMPD_parallel_for: 6713 case OMPD_parallel_master: 6714 case OMPD_parallel_sections: 6715 case OMPD_for_simd: 6716 case OMPD_parallel_for_simd: 6717 case OMPD_cancel: 6718 case OMPD_cancellation_point: 6719 case OMPD_ordered: 6720 case OMPD_threadprivate: 6721 case OMPD_allocate: 6722 case OMPD_task: 6723 case OMPD_simd: 6724 case OMPD_tile: 6725 case OMPD_unroll: 6726 case OMPD_sections: 6727 case OMPD_section: 6728 case OMPD_single: 6729 case OMPD_master: 6730 case OMPD_critical: 6731 case OMPD_taskyield: 6732 case OMPD_barrier: 6733 case OMPD_taskwait: 6734 case OMPD_taskgroup: 6735 case OMPD_atomic: 6736 case OMPD_flush: 6737 case OMPD_depobj: 6738 case OMPD_scan: 6739 case OMPD_teams: 6740 case OMPD_target_data: 6741 case OMPD_target_exit_data: 6742 case OMPD_target_enter_data: 6743 case OMPD_distribute: 6744 case OMPD_distribute_simd: 6745 case OMPD_distribute_parallel_for: 6746 case OMPD_distribute_parallel_for_simd: 6747 case OMPD_teams_distribute: 6748 case OMPD_teams_distribute_simd: 6749 case OMPD_teams_distribute_parallel_for: 6750 case OMPD_teams_distribute_parallel_for_simd: 6751 case OMPD_target_update: 6752 case OMPD_declare_simd: 6753 case OMPD_declare_variant: 6754 case OMPD_begin_declare_variant: 6755 case OMPD_end_declare_variant: 6756 case OMPD_declare_target: 6757 case OMPD_end_declare_target: 6758 case OMPD_declare_reduction: 6759 case OMPD_declare_mapper: 6760 case OMPD_taskloop: 6761 case OMPD_taskloop_simd: 6762 case OMPD_master_taskloop: 6763 case OMPD_master_taskloop_simd: 6764 case OMPD_parallel_master_taskloop: 6765 case OMPD_parallel_master_taskloop_simd: 6766 case OMPD_requires: 6767 case OMPD_metadirective: 6768 case OMPD_unknown: 6769 break; 6770 default: 6771 break; 6772 } 6773 llvm_unreachable("Unexpected directive kind."); 6774 } 6775 6776 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6777 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6778 assert(!CGF.getLangOpts().OpenMPIsDevice && 6779 "Clauses associated with the teams directive expected to be emitted " 6780 "only for the host!"); 6781 CGBuilderTy &Bld = CGF.Builder; 6782 int32_t DefaultNT = -1; 6783 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6784 if (NumTeams != nullptr) { 6785 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6786 6787 switch (DirectiveKind) { 6788 case OMPD_target: { 6789 const auto *CS = D.getInnermostCapturedStmt(); 6790 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6791 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6792 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6793 /*IgnoreResultAssign*/ true); 6794 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6795 /*isSigned=*/true); 6796 } 6797 case OMPD_target_teams: 6798 case OMPD_target_teams_distribute: 6799 case OMPD_target_teams_distribute_simd: 6800 case OMPD_target_teams_distribute_parallel_for: 6801 case OMPD_target_teams_distribute_parallel_for_simd: { 6802 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6803 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6804 /*IgnoreResultAssign*/ true); 6805 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6806 /*isSigned=*/true); 6807 } 6808 default: 6809 break; 6810 } 6811 } else if (DefaultNT == -1) { 6812 return nullptr; 6813 } 6814 6815 return Bld.getInt32(DefaultNT); 6816 } 6817 6818 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6819 llvm::Value *DefaultThreadLimitVal) { 6820 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6821 CGF.getContext(), CS->getCapturedStmt()); 6822 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6823 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6824 llvm::Value *NumThreads = nullptr; 6825 llvm::Value *CondVal = nullptr; 6826 // Handle if clause. If if clause present, the number of threads is 6827 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6828 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6829 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6830 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6831 const OMPIfClause *IfClause = nullptr; 6832 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6833 if (C->getNameModifier() == OMPD_unknown || 6834 C->getNameModifier() == OMPD_parallel) { 6835 IfClause = C; 6836 break; 6837 } 6838 } 6839 if (IfClause) { 6840 const Expr *Cond = IfClause->getCondition(); 6841 bool Result; 6842 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6843 if (!Result) 6844 return CGF.Builder.getInt32(1); 6845 } else { 6846 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6847 if (const auto *PreInit = 6848 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6849 for (const auto *I : PreInit->decls()) { 6850 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6851 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6852 } else { 6853 CodeGenFunction::AutoVarEmission Emission = 6854 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6855 CGF.EmitAutoVarCleanups(Emission); 6856 } 6857 } 6858 } 6859 CondVal = CGF.EvaluateExprAsBool(Cond); 6860 } 6861 } 6862 } 6863 // Check the value of num_threads clause iff if clause was not specified 6864 // or is not evaluated to false. 6865 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6866 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6867 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6868 const auto *NumThreadsClause = 6869 Dir->getSingleClause<OMPNumThreadsClause>(); 6870 CodeGenFunction::LexicalScope Scope( 6871 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6872 if (const auto *PreInit = 6873 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6874 for (const auto *I : PreInit->decls()) { 6875 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6876 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6877 } else { 6878 CodeGenFunction::AutoVarEmission Emission = 6879 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6880 CGF.EmitAutoVarCleanups(Emission); 6881 } 6882 } 6883 } 6884 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6885 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6886 /*isSigned=*/false); 6887 if (DefaultThreadLimitVal) 6888 NumThreads = CGF.Builder.CreateSelect( 6889 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6890 DefaultThreadLimitVal, NumThreads); 6891 } else { 6892 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6893 : CGF.Builder.getInt32(0); 6894 } 6895 // Process condition of the if clause. 6896 if (CondVal) { 6897 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6898 CGF.Builder.getInt32(1)); 6899 } 6900 return NumThreads; 6901 } 6902 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6903 return CGF.Builder.getInt32(1); 6904 return DefaultThreadLimitVal; 6905 } 6906 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6907 : CGF.Builder.getInt32(0); 6908 } 6909 6910 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6911 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6912 int32_t &DefaultVal) { 6913 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6914 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6915 "Expected target-based executable directive."); 6916 6917 switch (DirectiveKind) { 6918 case OMPD_target: 6919 // Teams have no clause thread_limit 6920 return nullptr; 6921 case OMPD_target_teams: 6922 case OMPD_target_teams_distribute: 6923 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6924 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6925 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6926 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6927 if (auto Constant = 6928 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6929 DefaultVal = Constant->getExtValue(); 6930 return ThreadLimit; 6931 } 6932 return nullptr; 6933 case OMPD_target_parallel: 6934 case OMPD_target_parallel_for: 6935 case OMPD_target_parallel_for_simd: 6936 case OMPD_target_teams_distribute_parallel_for: 6937 case OMPD_target_teams_distribute_parallel_for_simd: { 6938 Expr *ThreadLimit = nullptr; 6939 Expr *NumThreads = nullptr; 6940 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6941 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6942 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6943 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6944 if (auto Constant = 6945 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6946 DefaultVal = Constant->getExtValue(); 6947 } 6948 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6949 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6950 NumThreads = NumThreadsClause->getNumThreads(); 6951 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6952 if (auto Constant = 6953 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6954 if (Constant->getExtValue() < DefaultVal) { 6955 DefaultVal = Constant->getExtValue(); 6956 ThreadLimit = NumThreads; 6957 } 6958 } 6959 } 6960 } 6961 return ThreadLimit; 6962 } 6963 case OMPD_target_teams_distribute_simd: 6964 case OMPD_target_simd: 6965 DefaultVal = 1; 6966 return nullptr; 6967 case OMPD_parallel: 6968 case OMPD_for: 6969 case OMPD_parallel_for: 6970 case OMPD_parallel_master: 6971 case OMPD_parallel_sections: 6972 case OMPD_for_simd: 6973 case OMPD_parallel_for_simd: 6974 case OMPD_cancel: 6975 case OMPD_cancellation_point: 6976 case OMPD_ordered: 6977 case OMPD_threadprivate: 6978 case OMPD_allocate: 6979 case OMPD_task: 6980 case OMPD_simd: 6981 case OMPD_tile: 6982 case OMPD_unroll: 6983 case OMPD_sections: 6984 case OMPD_section: 6985 case OMPD_single: 6986 case OMPD_master: 6987 case OMPD_critical: 6988 case OMPD_taskyield: 6989 case OMPD_barrier: 6990 case OMPD_taskwait: 6991 case OMPD_taskgroup: 6992 case OMPD_atomic: 6993 case OMPD_flush: 6994 case OMPD_depobj: 6995 case OMPD_scan: 6996 case OMPD_teams: 6997 case OMPD_target_data: 6998 case OMPD_target_exit_data: 6999 case OMPD_target_enter_data: 7000 case OMPD_distribute: 7001 case OMPD_distribute_simd: 7002 case OMPD_distribute_parallel_for: 7003 case OMPD_distribute_parallel_for_simd: 7004 case OMPD_teams_distribute: 7005 case OMPD_teams_distribute_simd: 7006 case OMPD_teams_distribute_parallel_for: 7007 case OMPD_teams_distribute_parallel_for_simd: 7008 case OMPD_target_update: 7009 case OMPD_declare_simd: 7010 case OMPD_declare_variant: 7011 case OMPD_begin_declare_variant: 7012 case OMPD_end_declare_variant: 7013 case OMPD_declare_target: 7014 case OMPD_end_declare_target: 7015 case OMPD_declare_reduction: 7016 case OMPD_declare_mapper: 7017 case OMPD_taskloop: 7018 case OMPD_taskloop_simd: 7019 case OMPD_master_taskloop: 7020 case OMPD_master_taskloop_simd: 7021 case OMPD_parallel_master_taskloop: 7022 case OMPD_parallel_master_taskloop_simd: 7023 case OMPD_requires: 7024 case OMPD_unknown: 7025 break; 7026 default: 7027 break; 7028 } 7029 llvm_unreachable("Unsupported directive kind."); 7030 } 7031 7032 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7033 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7034 assert(!CGF.getLangOpts().OpenMPIsDevice && 7035 "Clauses associated with the teams directive expected to be emitted " 7036 "only for the host!"); 7037 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7038 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7039 "Expected target-based executable directive."); 7040 CGBuilderTy &Bld = CGF.Builder; 7041 llvm::Value *ThreadLimitVal = nullptr; 7042 llvm::Value *NumThreadsVal = nullptr; 7043 switch (DirectiveKind) { 7044 case OMPD_target: { 7045 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7046 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7047 return NumThreads; 7048 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7049 CGF.getContext(), CS->getCapturedStmt()); 7050 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7051 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7052 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7053 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7054 const auto *ThreadLimitClause = 7055 Dir->getSingleClause<OMPThreadLimitClause>(); 7056 CodeGenFunction::LexicalScope Scope( 7057 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7058 if (const auto *PreInit = 7059 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7060 for (const auto *I : PreInit->decls()) { 7061 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7062 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7063 } else { 7064 CodeGenFunction::AutoVarEmission Emission = 7065 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7066 CGF.EmitAutoVarCleanups(Emission); 7067 } 7068 } 7069 } 7070 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7071 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7072 ThreadLimitVal = 7073 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7074 } 7075 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7076 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7077 CS = Dir->getInnermostCapturedStmt(); 7078 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7079 CGF.getContext(), CS->getCapturedStmt()); 7080 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7081 } 7082 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7083 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7084 CS = Dir->getInnermostCapturedStmt(); 7085 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7086 return NumThreads; 7087 } 7088 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7089 return Bld.getInt32(1); 7090 } 7091 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7092 } 7093 case OMPD_target_teams: { 7094 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7095 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7096 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7097 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7098 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7099 ThreadLimitVal = 7100 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7101 } 7102 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7103 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7104 return NumThreads; 7105 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7106 CGF.getContext(), CS->getCapturedStmt()); 7107 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7108 if (Dir->getDirectiveKind() == OMPD_distribute) { 7109 CS = Dir->getInnermostCapturedStmt(); 7110 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7111 return NumThreads; 7112 } 7113 } 7114 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7115 } 7116 case OMPD_target_teams_distribute: 7117 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7118 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7119 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7120 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7121 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7122 ThreadLimitVal = 7123 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7124 } 7125 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7126 case OMPD_target_parallel: 7127 case OMPD_target_parallel_for: 7128 case OMPD_target_parallel_for_simd: 7129 case OMPD_target_teams_distribute_parallel_for: 7130 case OMPD_target_teams_distribute_parallel_for_simd: { 7131 llvm::Value *CondVal = nullptr; 7132 // Handle if clause. If if clause present, the number of threads is 7133 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7134 if (D.hasClausesOfKind<OMPIfClause>()) { 7135 const OMPIfClause *IfClause = nullptr; 7136 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7137 if (C->getNameModifier() == OMPD_unknown || 7138 C->getNameModifier() == OMPD_parallel) { 7139 IfClause = C; 7140 break; 7141 } 7142 } 7143 if (IfClause) { 7144 const Expr *Cond = IfClause->getCondition(); 7145 bool Result; 7146 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7147 if (!Result) 7148 return Bld.getInt32(1); 7149 } else { 7150 CodeGenFunction::RunCleanupsScope Scope(CGF); 7151 CondVal = CGF.EvaluateExprAsBool(Cond); 7152 } 7153 } 7154 } 7155 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7156 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7157 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7158 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7159 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7160 ThreadLimitVal = 7161 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7162 } 7163 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7164 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7165 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7166 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7167 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7168 NumThreadsVal = 7169 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7170 ThreadLimitVal = ThreadLimitVal 7171 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7172 ThreadLimitVal), 7173 NumThreadsVal, ThreadLimitVal) 7174 : NumThreadsVal; 7175 } 7176 if (!ThreadLimitVal) 7177 ThreadLimitVal = Bld.getInt32(0); 7178 if (CondVal) 7179 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7180 return ThreadLimitVal; 7181 } 7182 case OMPD_target_teams_distribute_simd: 7183 case OMPD_target_simd: 7184 return Bld.getInt32(1); 7185 case OMPD_parallel: 7186 case OMPD_for: 7187 case OMPD_parallel_for: 7188 case OMPD_parallel_master: 7189 case OMPD_parallel_sections: 7190 case OMPD_for_simd: 7191 case OMPD_parallel_for_simd: 7192 case OMPD_cancel: 7193 case OMPD_cancellation_point: 7194 case OMPD_ordered: 7195 case OMPD_threadprivate: 7196 case OMPD_allocate: 7197 case OMPD_task: 7198 case OMPD_simd: 7199 case OMPD_tile: 7200 case OMPD_unroll: 7201 case OMPD_sections: 7202 case OMPD_section: 7203 case OMPD_single: 7204 case OMPD_master: 7205 case OMPD_critical: 7206 case OMPD_taskyield: 7207 case OMPD_barrier: 7208 case OMPD_taskwait: 7209 case OMPD_taskgroup: 7210 case OMPD_atomic: 7211 case OMPD_flush: 7212 case OMPD_depobj: 7213 case OMPD_scan: 7214 case OMPD_teams: 7215 case OMPD_target_data: 7216 case OMPD_target_exit_data: 7217 case OMPD_target_enter_data: 7218 case OMPD_distribute: 7219 case OMPD_distribute_simd: 7220 case OMPD_distribute_parallel_for: 7221 case OMPD_distribute_parallel_for_simd: 7222 case OMPD_teams_distribute: 7223 case OMPD_teams_distribute_simd: 7224 case OMPD_teams_distribute_parallel_for: 7225 case OMPD_teams_distribute_parallel_for_simd: 7226 case OMPD_target_update: 7227 case OMPD_declare_simd: 7228 case OMPD_declare_variant: 7229 case OMPD_begin_declare_variant: 7230 case OMPD_end_declare_variant: 7231 case OMPD_declare_target: 7232 case OMPD_end_declare_target: 7233 case OMPD_declare_reduction: 7234 case OMPD_declare_mapper: 7235 case OMPD_taskloop: 7236 case OMPD_taskloop_simd: 7237 case OMPD_master_taskloop: 7238 case OMPD_master_taskloop_simd: 7239 case OMPD_parallel_master_taskloop: 7240 case OMPD_parallel_master_taskloop_simd: 7241 case OMPD_requires: 7242 case OMPD_metadirective: 7243 case OMPD_unknown: 7244 break; 7245 default: 7246 break; 7247 } 7248 llvm_unreachable("Unsupported directive kind."); 7249 } 7250 7251 namespace { 7252 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7253 7254 // Utility to handle information from clauses associated with a given 7255 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7256 // It provides a convenient interface to obtain the information and generate 7257 // code for that information. 7258 class MappableExprsHandler { 7259 public: 7260 /// Values for bit flags used to specify the mapping type for 7261 /// offloading. 7262 enum OpenMPOffloadMappingFlags : uint64_t { 7263 /// No flags 7264 OMP_MAP_NONE = 0x0, 7265 /// Allocate memory on the device and move data from host to device. 7266 OMP_MAP_TO = 0x01, 7267 /// Allocate memory on the device and move data from device to host. 7268 OMP_MAP_FROM = 0x02, 7269 /// Always perform the requested mapping action on the element, even 7270 /// if it was already mapped before. 7271 OMP_MAP_ALWAYS = 0x04, 7272 /// Delete the element from the device environment, ignoring the 7273 /// current reference count associated with the element. 7274 OMP_MAP_DELETE = 0x08, 7275 /// The element being mapped is a pointer-pointee pair; both the 7276 /// pointer and the pointee should be mapped. 7277 OMP_MAP_PTR_AND_OBJ = 0x10, 7278 /// This flags signals that the base address of an entry should be 7279 /// passed to the target kernel as an argument. 7280 OMP_MAP_TARGET_PARAM = 0x20, 7281 /// Signal that the runtime library has to return the device pointer 7282 /// in the current position for the data being mapped. Used when we have the 7283 /// use_device_ptr or use_device_addr clause. 7284 OMP_MAP_RETURN_PARAM = 0x40, 7285 /// This flag signals that the reference being passed is a pointer to 7286 /// private data. 7287 OMP_MAP_PRIVATE = 0x80, 7288 /// Pass the element to the device by value. 7289 OMP_MAP_LITERAL = 0x100, 7290 /// Implicit map 7291 OMP_MAP_IMPLICIT = 0x200, 7292 /// Close is a hint to the runtime to allocate memory close to 7293 /// the target device. 7294 OMP_MAP_CLOSE = 0x400, 7295 /// 0x800 is reserved for compatibility with XLC. 7296 /// Produce a runtime error if the data is not already allocated. 7297 OMP_MAP_PRESENT = 0x1000, 7298 // Increment and decrement a separate reference counter so that the data 7299 // cannot be unmapped within the associated region. Thus, this flag is 7300 // intended to be used on 'target' and 'target data' directives because they 7301 // are inherently structured. It is not intended to be used on 'target 7302 // enter data' and 'target exit data' directives because they are inherently 7303 // dynamic. 7304 // This is an OpenMP extension for the sake of OpenACC support. 7305 OMP_MAP_OMPX_HOLD = 0x2000, 7306 /// Signal that the runtime library should use args as an array of 7307 /// descriptor_dim pointers and use args_size as dims. Used when we have 7308 /// non-contiguous list items in target update directive 7309 OMP_MAP_NON_CONTIG = 0x100000000000, 7310 /// The 16 MSBs of the flags indicate whether the entry is member of some 7311 /// struct/class. 7312 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7313 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7314 }; 7315 7316 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7317 static unsigned getFlagMemberOffset() { 7318 unsigned Offset = 0; 7319 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7320 Remain = Remain >> 1) 7321 Offset++; 7322 return Offset; 7323 } 7324 7325 /// Class that holds debugging information for a data mapping to be passed to 7326 /// the runtime library. 7327 class MappingExprInfo { 7328 /// The variable declaration used for the data mapping. 7329 const ValueDecl *MapDecl = nullptr; 7330 /// The original expression used in the map clause, or null if there is 7331 /// none. 7332 const Expr *MapExpr = nullptr; 7333 7334 public: 7335 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7336 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7337 7338 const ValueDecl *getMapDecl() const { return MapDecl; } 7339 const Expr *getMapExpr() const { return MapExpr; } 7340 }; 7341 7342 /// Class that associates information with a base pointer to be passed to the 7343 /// runtime library. 7344 class BasePointerInfo { 7345 /// The base pointer. 7346 llvm::Value *Ptr = nullptr; 7347 /// The base declaration that refers to this device pointer, or null if 7348 /// there is none. 7349 const ValueDecl *DevPtrDecl = nullptr; 7350 7351 public: 7352 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7353 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7354 llvm::Value *operator*() const { return Ptr; } 7355 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7356 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7357 }; 7358 7359 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7360 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7361 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7362 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7363 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7364 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7365 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7366 7367 /// This structure contains combined information generated for mappable 7368 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7369 /// mappers, and non-contiguous information. 7370 struct MapCombinedInfoTy { 7371 struct StructNonContiguousInfo { 7372 bool IsNonContiguous = false; 7373 MapDimArrayTy Dims; 7374 MapNonContiguousArrayTy Offsets; 7375 MapNonContiguousArrayTy Counts; 7376 MapNonContiguousArrayTy Strides; 7377 }; 7378 MapExprsArrayTy Exprs; 7379 MapBaseValuesArrayTy BasePointers; 7380 MapValuesArrayTy Pointers; 7381 MapValuesArrayTy Sizes; 7382 MapFlagsArrayTy Types; 7383 MapMappersArrayTy Mappers; 7384 StructNonContiguousInfo NonContigInfo; 7385 7386 /// Append arrays in \a CurInfo. 7387 void append(MapCombinedInfoTy &CurInfo) { 7388 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7389 BasePointers.append(CurInfo.BasePointers.begin(), 7390 CurInfo.BasePointers.end()); 7391 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7392 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7393 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7394 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7395 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7396 CurInfo.NonContigInfo.Dims.end()); 7397 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7398 CurInfo.NonContigInfo.Offsets.end()); 7399 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7400 CurInfo.NonContigInfo.Counts.end()); 7401 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7402 CurInfo.NonContigInfo.Strides.end()); 7403 } 7404 }; 7405 7406 /// Map between a struct and the its lowest & highest elements which have been 7407 /// mapped. 7408 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7409 /// HE(FieldIndex, Pointer)} 7410 struct StructRangeInfoTy { 7411 MapCombinedInfoTy PreliminaryMapData; 7412 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7413 0, Address::invalid()}; 7414 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7415 0, Address::invalid()}; 7416 Address Base = Address::invalid(); 7417 Address LB = Address::invalid(); 7418 bool IsArraySection = false; 7419 bool HasCompleteRecord = false; 7420 }; 7421 7422 private: 7423 /// Kind that defines how a device pointer has to be returned. 7424 struct MapInfo { 7425 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7426 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7427 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7428 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7429 bool ReturnDevicePointer = false; 7430 bool IsImplicit = false; 7431 const ValueDecl *Mapper = nullptr; 7432 const Expr *VarRef = nullptr; 7433 bool ForDeviceAddr = false; 7434 7435 MapInfo() = default; 7436 MapInfo( 7437 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7438 OpenMPMapClauseKind MapType, 7439 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7440 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7441 bool ReturnDevicePointer, bool IsImplicit, 7442 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7443 bool ForDeviceAddr = false) 7444 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7445 MotionModifiers(MotionModifiers), 7446 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7447 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7448 }; 7449 7450 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7451 /// member and there is no map information about it, then emission of that 7452 /// entry is deferred until the whole struct has been processed. 7453 struct DeferredDevicePtrEntryTy { 7454 const Expr *IE = nullptr; 7455 const ValueDecl *VD = nullptr; 7456 bool ForDeviceAddr = false; 7457 7458 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7459 bool ForDeviceAddr) 7460 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7461 }; 7462 7463 /// The target directive from where the mappable clauses were extracted. It 7464 /// is either a executable directive or a user-defined mapper directive. 7465 llvm::PointerUnion<const OMPExecutableDirective *, 7466 const OMPDeclareMapperDecl *> 7467 CurDir; 7468 7469 /// Function the directive is being generated for. 7470 CodeGenFunction &CGF; 7471 7472 /// Set of all first private variables in the current directive. 7473 /// bool data is set to true if the variable is implicitly marked as 7474 /// firstprivate, false otherwise. 7475 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7476 7477 /// Map between device pointer declarations and their expression components. 7478 /// The key value for declarations in 'this' is null. 7479 llvm::DenseMap< 7480 const ValueDecl *, 7481 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7482 DevPointersMap; 7483 7484 /// Map between lambda declarations and their map type. 7485 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7486 7487 llvm::Value *getExprTypeSize(const Expr *E) const { 7488 QualType ExprTy = E->getType().getCanonicalType(); 7489 7490 // Calculate the size for array shaping expression. 7491 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7492 llvm::Value *Size = 7493 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7494 for (const Expr *SE : OAE->getDimensions()) { 7495 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7496 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7497 CGF.getContext().getSizeType(), 7498 SE->getExprLoc()); 7499 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7500 } 7501 return Size; 7502 } 7503 7504 // Reference types are ignored for mapping purposes. 7505 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7506 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7507 7508 // Given that an array section is considered a built-in type, we need to 7509 // do the calculation based on the length of the section instead of relying 7510 // on CGF.getTypeSize(E->getType()). 7511 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7512 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7513 OAE->getBase()->IgnoreParenImpCasts()) 7514 .getCanonicalType(); 7515 7516 // If there is no length associated with the expression and lower bound is 7517 // not specified too, that means we are using the whole length of the 7518 // base. 7519 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7520 !OAE->getLowerBound()) 7521 return CGF.getTypeSize(BaseTy); 7522 7523 llvm::Value *ElemSize; 7524 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7525 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7526 } else { 7527 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7528 assert(ATy && "Expecting array type if not a pointer type."); 7529 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7530 } 7531 7532 // If we don't have a length at this point, that is because we have an 7533 // array section with a single element. 7534 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7535 return ElemSize; 7536 7537 if (const Expr *LenExpr = OAE->getLength()) { 7538 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7539 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7540 CGF.getContext().getSizeType(), 7541 LenExpr->getExprLoc()); 7542 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7543 } 7544 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7545 OAE->getLowerBound() && "expected array_section[lb:]."); 7546 // Size = sizetype - lb * elemtype; 7547 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7548 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7549 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7550 CGF.getContext().getSizeType(), 7551 OAE->getLowerBound()->getExprLoc()); 7552 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7553 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7554 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7555 LengthVal = CGF.Builder.CreateSelect( 7556 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7557 return LengthVal; 7558 } 7559 return CGF.getTypeSize(ExprTy); 7560 } 7561 7562 /// Return the corresponding bits for a given map clause modifier. Add 7563 /// a flag marking the map as a pointer if requested. Add a flag marking the 7564 /// map as the first one of a series of maps that relate to the same map 7565 /// expression. 7566 OpenMPOffloadMappingFlags getMapTypeBits( 7567 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7568 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7569 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7570 OpenMPOffloadMappingFlags Bits = 7571 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7572 switch (MapType) { 7573 case OMPC_MAP_alloc: 7574 case OMPC_MAP_release: 7575 // alloc and release is the default behavior in the runtime library, i.e. 7576 // if we don't pass any bits alloc/release that is what the runtime is 7577 // going to do. Therefore, we don't need to signal anything for these two 7578 // type modifiers. 7579 break; 7580 case OMPC_MAP_to: 7581 Bits |= OMP_MAP_TO; 7582 break; 7583 case OMPC_MAP_from: 7584 Bits |= OMP_MAP_FROM; 7585 break; 7586 case OMPC_MAP_tofrom: 7587 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7588 break; 7589 case OMPC_MAP_delete: 7590 Bits |= OMP_MAP_DELETE; 7591 break; 7592 case OMPC_MAP_unknown: 7593 llvm_unreachable("Unexpected map type!"); 7594 } 7595 if (AddPtrFlag) 7596 Bits |= OMP_MAP_PTR_AND_OBJ; 7597 if (AddIsTargetParamFlag) 7598 Bits |= OMP_MAP_TARGET_PARAM; 7599 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7600 Bits |= OMP_MAP_ALWAYS; 7601 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7602 Bits |= OMP_MAP_CLOSE; 7603 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7604 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7605 Bits |= OMP_MAP_PRESENT; 7606 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7607 Bits |= OMP_MAP_OMPX_HOLD; 7608 if (IsNonContiguous) 7609 Bits |= OMP_MAP_NON_CONTIG; 7610 return Bits; 7611 } 7612 7613 /// Return true if the provided expression is a final array section. A 7614 /// final array section, is one whose length can't be proved to be one. 7615 bool isFinalArraySectionExpression(const Expr *E) const { 7616 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7617 7618 // It is not an array section and therefore not a unity-size one. 7619 if (!OASE) 7620 return false; 7621 7622 // An array section with no colon always refer to a single element. 7623 if (OASE->getColonLocFirst().isInvalid()) 7624 return false; 7625 7626 const Expr *Length = OASE->getLength(); 7627 7628 // If we don't have a length we have to check if the array has size 1 7629 // for this dimension. Also, we should always expect a length if the 7630 // base type is pointer. 7631 if (!Length) { 7632 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7633 OASE->getBase()->IgnoreParenImpCasts()) 7634 .getCanonicalType(); 7635 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7636 return ATy->getSize().getSExtValue() != 1; 7637 // If we don't have a constant dimension length, we have to consider 7638 // the current section as having any size, so it is not necessarily 7639 // unitary. If it happen to be unity size, that's user fault. 7640 return true; 7641 } 7642 7643 // Check if the length evaluates to 1. 7644 Expr::EvalResult Result; 7645 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7646 return true; // Can have more that size 1. 7647 7648 llvm::APSInt ConstLength = Result.Val.getInt(); 7649 return ConstLength.getSExtValue() != 1; 7650 } 7651 7652 /// Generate the base pointers, section pointers, sizes, map type bits, and 7653 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7654 /// map type, map or motion modifiers, and expression components. 7655 /// \a IsFirstComponent should be set to true if the provided set of 7656 /// components is the first associated with a capture. 7657 void generateInfoForComponentList( 7658 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7659 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7660 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7661 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7662 bool IsFirstComponentList, bool IsImplicit, 7663 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7664 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7665 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7666 OverlappedElements = llvm::None) const { 7667 // The following summarizes what has to be generated for each map and the 7668 // types below. The generated information is expressed in this order: 7669 // base pointer, section pointer, size, flags 7670 // (to add to the ones that come from the map type and modifier). 7671 // 7672 // double d; 7673 // int i[100]; 7674 // float *p; 7675 // 7676 // struct S1 { 7677 // int i; 7678 // float f[50]; 7679 // } 7680 // struct S2 { 7681 // int i; 7682 // float f[50]; 7683 // S1 s; 7684 // double *p; 7685 // struct S2 *ps; 7686 // int &ref; 7687 // } 7688 // S2 s; 7689 // S2 *ps; 7690 // 7691 // map(d) 7692 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7693 // 7694 // map(i) 7695 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7696 // 7697 // map(i[1:23]) 7698 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7699 // 7700 // map(p) 7701 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7702 // 7703 // map(p[1:24]) 7704 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7705 // in unified shared memory mode or for local pointers 7706 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7707 // 7708 // map(s) 7709 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7710 // 7711 // map(s.i) 7712 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7713 // 7714 // map(s.s.f) 7715 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7716 // 7717 // map(s.p) 7718 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7719 // 7720 // map(to: s.p[:22]) 7721 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7722 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7723 // &(s.p), &(s.p[0]), 22*sizeof(double), 7724 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7725 // (*) alloc space for struct members, only this is a target parameter 7726 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7727 // optimizes this entry out, same in the examples below) 7728 // (***) map the pointee (map: to) 7729 // 7730 // map(to: s.ref) 7731 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7732 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7733 // (*) alloc space for struct members, only this is a target parameter 7734 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7735 // optimizes this entry out, same in the examples below) 7736 // (***) map the pointee (map: to) 7737 // 7738 // map(s.ps) 7739 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7740 // 7741 // map(from: s.ps->s.i) 7742 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7743 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7744 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7745 // 7746 // map(to: s.ps->ps) 7747 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7748 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7749 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7750 // 7751 // map(s.ps->ps->ps) 7752 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7753 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7754 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7755 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7756 // 7757 // map(to: s.ps->ps->s.f[:22]) 7758 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7759 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7760 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7761 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7762 // 7763 // map(ps) 7764 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7765 // 7766 // map(ps->i) 7767 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7768 // 7769 // map(ps->s.f) 7770 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7771 // 7772 // map(from: ps->p) 7773 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7774 // 7775 // map(to: ps->p[:22]) 7776 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7777 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7778 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7779 // 7780 // map(ps->ps) 7781 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7782 // 7783 // map(from: ps->ps->s.i) 7784 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7785 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7786 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7787 // 7788 // map(from: ps->ps->ps) 7789 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7790 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7791 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7792 // 7793 // map(ps->ps->ps->ps) 7794 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7795 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7796 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7797 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7798 // 7799 // map(to: ps->ps->ps->s.f[:22]) 7800 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7801 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7802 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7803 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7804 // 7805 // map(to: s.f[:22]) map(from: s.p[:33]) 7806 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7807 // sizeof(double*) (**), TARGET_PARAM 7808 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7809 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7810 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7811 // (*) allocate contiguous space needed to fit all mapped members even if 7812 // we allocate space for members not mapped (in this example, 7813 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7814 // them as well because they fall between &s.f[0] and &s.p) 7815 // 7816 // map(from: s.f[:22]) map(to: ps->p[:33]) 7817 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7818 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7819 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7820 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7821 // (*) the struct this entry pertains to is the 2nd element in the list of 7822 // arguments, hence MEMBER_OF(2) 7823 // 7824 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7825 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7826 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7827 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7828 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7829 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7830 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7831 // (*) the struct this entry pertains to is the 4th element in the list 7832 // of arguments, hence MEMBER_OF(4) 7833 7834 // Track if the map information being generated is the first for a capture. 7835 bool IsCaptureFirstInfo = IsFirstComponentList; 7836 // When the variable is on a declare target link or in a to clause with 7837 // unified memory, a reference is needed to hold the host/device address 7838 // of the variable. 7839 bool RequiresReference = false; 7840 7841 // Scan the components from the base to the complete expression. 7842 auto CI = Components.rbegin(); 7843 auto CE = Components.rend(); 7844 auto I = CI; 7845 7846 // Track if the map information being generated is the first for a list of 7847 // components. 7848 bool IsExpressionFirstInfo = true; 7849 bool FirstPointerInComplexData = false; 7850 Address BP = Address::invalid(); 7851 const Expr *AssocExpr = I->getAssociatedExpression(); 7852 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7853 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7854 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7855 7856 if (isa<MemberExpr>(AssocExpr)) { 7857 // The base is the 'this' pointer. The content of the pointer is going 7858 // to be the base of the field being mapped. 7859 BP = CGF.LoadCXXThisAddress(); 7860 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7861 (OASE && 7862 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7863 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7864 } else if (OAShE && 7865 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7866 BP = Address( 7867 CGF.EmitScalarExpr(OAShE->getBase()), 7868 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7869 } else { 7870 // The base is the reference to the variable. 7871 // BP = &Var. 7872 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7873 if (const auto *VD = 7874 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7875 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7876 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7877 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7878 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7879 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7880 RequiresReference = true; 7881 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7882 } 7883 } 7884 } 7885 7886 // If the variable is a pointer and is being dereferenced (i.e. is not 7887 // the last component), the base has to be the pointer itself, not its 7888 // reference. References are ignored for mapping purposes. 7889 QualType Ty = 7890 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7891 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7892 // No need to generate individual map information for the pointer, it 7893 // can be associated with the combined storage if shared memory mode is 7894 // active or the base declaration is not global variable. 7895 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7896 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7897 !VD || VD->hasLocalStorage()) 7898 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7899 else 7900 FirstPointerInComplexData = true; 7901 ++I; 7902 } 7903 } 7904 7905 // Track whether a component of the list should be marked as MEMBER_OF some 7906 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7907 // in a component list should be marked as MEMBER_OF, all subsequent entries 7908 // do not belong to the base struct. E.g. 7909 // struct S2 s; 7910 // s.ps->ps->ps->f[:] 7911 // (1) (2) (3) (4) 7912 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7913 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7914 // is the pointee of ps(2) which is not member of struct s, so it should not 7915 // be marked as such (it is still PTR_AND_OBJ). 7916 // The variable is initialized to false so that PTR_AND_OBJ entries which 7917 // are not struct members are not considered (e.g. array of pointers to 7918 // data). 7919 bool ShouldBeMemberOf = false; 7920 7921 // Variable keeping track of whether or not we have encountered a component 7922 // in the component list which is a member expression. Useful when we have a 7923 // pointer or a final array section, in which case it is the previous 7924 // component in the list which tells us whether we have a member expression. 7925 // E.g. X.f[:] 7926 // While processing the final array section "[:]" it is "f" which tells us 7927 // whether we are dealing with a member of a declared struct. 7928 const MemberExpr *EncounteredME = nullptr; 7929 7930 // Track for the total number of dimension. Start from one for the dummy 7931 // dimension. 7932 uint64_t DimSize = 1; 7933 7934 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7935 bool IsPrevMemberReference = false; 7936 7937 for (; I != CE; ++I) { 7938 // If the current component is member of a struct (parent struct) mark it. 7939 if (!EncounteredME) { 7940 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7941 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7942 // as MEMBER_OF the parent struct. 7943 if (EncounteredME) { 7944 ShouldBeMemberOf = true; 7945 // Do not emit as complex pointer if this is actually not array-like 7946 // expression. 7947 if (FirstPointerInComplexData) { 7948 QualType Ty = std::prev(I) 7949 ->getAssociatedDeclaration() 7950 ->getType() 7951 .getNonReferenceType(); 7952 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7953 FirstPointerInComplexData = false; 7954 } 7955 } 7956 } 7957 7958 auto Next = std::next(I); 7959 7960 // We need to generate the addresses and sizes if this is the last 7961 // component, if the component is a pointer or if it is an array section 7962 // whose length can't be proved to be one. If this is a pointer, it 7963 // becomes the base address for the following components. 7964 7965 // A final array section, is one whose length can't be proved to be one. 7966 // If the map item is non-contiguous then we don't treat any array section 7967 // as final array section. 7968 bool IsFinalArraySection = 7969 !IsNonContiguous && 7970 isFinalArraySectionExpression(I->getAssociatedExpression()); 7971 7972 // If we have a declaration for the mapping use that, otherwise use 7973 // the base declaration of the map clause. 7974 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7975 ? I->getAssociatedDeclaration() 7976 : BaseDecl; 7977 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7978 : MapExpr; 7979 7980 // Get information on whether the element is a pointer. Have to do a 7981 // special treatment for array sections given that they are built-in 7982 // types. 7983 const auto *OASE = 7984 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7985 const auto *OAShE = 7986 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7987 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7988 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7989 bool IsPointer = 7990 OAShE || 7991 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7992 .getCanonicalType() 7993 ->isAnyPointerType()) || 7994 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7995 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7996 MapDecl && 7997 MapDecl->getType()->isLValueReferenceType(); 7998 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7999 8000 if (OASE) 8001 ++DimSize; 8002 8003 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8004 IsFinalArraySection) { 8005 // If this is not the last component, we expect the pointer to be 8006 // associated with an array expression or member expression. 8007 assert((Next == CE || 8008 isa<MemberExpr>(Next->getAssociatedExpression()) || 8009 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8010 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8011 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8012 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8013 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8014 "Unexpected expression"); 8015 8016 Address LB = Address::invalid(); 8017 Address LowestElem = Address::invalid(); 8018 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8019 const MemberExpr *E) { 8020 const Expr *BaseExpr = E->getBase(); 8021 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8022 // scalar. 8023 LValue BaseLV; 8024 if (E->isArrow()) { 8025 LValueBaseInfo BaseInfo; 8026 TBAAAccessInfo TBAAInfo; 8027 Address Addr = 8028 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8029 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8030 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8031 } else { 8032 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8033 } 8034 return BaseLV; 8035 }; 8036 if (OAShE) { 8037 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 8038 CGF.getContext().getTypeAlignInChars( 8039 OAShE->getBase()->getType())); 8040 } else if (IsMemberReference) { 8041 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8042 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8043 LowestElem = CGF.EmitLValueForFieldInitialization( 8044 BaseLVal, cast<FieldDecl>(MapDecl)) 8045 .getAddress(CGF); 8046 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8047 .getAddress(CGF); 8048 } else { 8049 LowestElem = LB = 8050 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8051 .getAddress(CGF); 8052 } 8053 8054 // If this component is a pointer inside the base struct then we don't 8055 // need to create any entry for it - it will be combined with the object 8056 // it is pointing to into a single PTR_AND_OBJ entry. 8057 bool IsMemberPointerOrAddr = 8058 EncounteredME && 8059 (((IsPointer || ForDeviceAddr) && 8060 I->getAssociatedExpression() == EncounteredME) || 8061 (IsPrevMemberReference && !IsPointer) || 8062 (IsMemberReference && Next != CE && 8063 !Next->getAssociatedExpression()->getType()->isPointerType())); 8064 if (!OverlappedElements.empty() && Next == CE) { 8065 // Handle base element with the info for overlapped elements. 8066 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8067 assert(!IsPointer && 8068 "Unexpected base element with the pointer type."); 8069 // Mark the whole struct as the struct that requires allocation on the 8070 // device. 8071 PartialStruct.LowestElem = {0, LowestElem}; 8072 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8073 I->getAssociatedExpression()->getType()); 8074 Address HB = CGF.Builder.CreateConstGEP( 8075 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 8076 CGF.VoidPtrTy), 8077 TypeSize.getQuantity() - 1); 8078 PartialStruct.HighestElem = { 8079 std::numeric_limits<decltype( 8080 PartialStruct.HighestElem.first)>::max(), 8081 HB}; 8082 PartialStruct.Base = BP; 8083 PartialStruct.LB = LB; 8084 assert( 8085 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8086 "Overlapped elements must be used only once for the variable."); 8087 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8088 // Emit data for non-overlapped data. 8089 OpenMPOffloadMappingFlags Flags = 8090 OMP_MAP_MEMBER_OF | 8091 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8092 /*AddPtrFlag=*/false, 8093 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8094 llvm::Value *Size = nullptr; 8095 // Do bitcopy of all non-overlapped structure elements. 8096 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8097 Component : OverlappedElements) { 8098 Address ComponentLB = Address::invalid(); 8099 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8100 Component) { 8101 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8102 const auto *FD = dyn_cast<FieldDecl>(VD); 8103 if (FD && FD->getType()->isLValueReferenceType()) { 8104 const auto *ME = 8105 cast<MemberExpr>(MC.getAssociatedExpression()); 8106 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8107 ComponentLB = 8108 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8109 .getAddress(CGF); 8110 } else { 8111 ComponentLB = 8112 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8113 .getAddress(CGF); 8114 } 8115 Size = CGF.Builder.CreatePtrDiff( 8116 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8117 CGF.EmitCastToVoidPtr(LB.getPointer())); 8118 break; 8119 } 8120 } 8121 assert(Size && "Failed to determine structure size"); 8122 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8123 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8124 CombinedInfo.Pointers.push_back(LB.getPointer()); 8125 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8126 Size, CGF.Int64Ty, /*isSigned=*/true)); 8127 CombinedInfo.Types.push_back(Flags); 8128 CombinedInfo.Mappers.push_back(nullptr); 8129 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8130 : 1); 8131 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8132 } 8133 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8134 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8135 CombinedInfo.Pointers.push_back(LB.getPointer()); 8136 Size = CGF.Builder.CreatePtrDiff( 8137 CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8138 CGF.EmitCastToVoidPtr(LB.getPointer())); 8139 CombinedInfo.Sizes.push_back( 8140 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8141 CombinedInfo.Types.push_back(Flags); 8142 CombinedInfo.Mappers.push_back(nullptr); 8143 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8144 : 1); 8145 break; 8146 } 8147 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8148 if (!IsMemberPointerOrAddr || 8149 (Next == CE && MapType != OMPC_MAP_unknown)) { 8150 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8151 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8152 CombinedInfo.Pointers.push_back(LB.getPointer()); 8153 CombinedInfo.Sizes.push_back( 8154 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8155 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8156 : 1); 8157 8158 // If Mapper is valid, the last component inherits the mapper. 8159 bool HasMapper = Mapper && Next == CE; 8160 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8161 8162 // We need to add a pointer flag for each map that comes from the 8163 // same expression except for the first one. We also need to signal 8164 // this map is the first one that relates with the current capture 8165 // (there is a set of entries for each capture). 8166 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8167 MapType, MapModifiers, MotionModifiers, IsImplicit, 8168 !IsExpressionFirstInfo || RequiresReference || 8169 FirstPointerInComplexData || IsMemberReference, 8170 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8171 8172 if (!IsExpressionFirstInfo || IsMemberReference) { 8173 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8174 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8175 if (IsPointer || (IsMemberReference && Next != CE)) 8176 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8177 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8178 8179 if (ShouldBeMemberOf) { 8180 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8181 // should be later updated with the correct value of MEMBER_OF. 8182 Flags |= OMP_MAP_MEMBER_OF; 8183 // From now on, all subsequent PTR_AND_OBJ entries should not be 8184 // marked as MEMBER_OF. 8185 ShouldBeMemberOf = false; 8186 } 8187 } 8188 8189 CombinedInfo.Types.push_back(Flags); 8190 } 8191 8192 // If we have encountered a member expression so far, keep track of the 8193 // mapped member. If the parent is "*this", then the value declaration 8194 // is nullptr. 8195 if (EncounteredME) { 8196 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8197 unsigned FieldIndex = FD->getFieldIndex(); 8198 8199 // Update info about the lowest and highest elements for this struct 8200 if (!PartialStruct.Base.isValid()) { 8201 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8202 if (IsFinalArraySection) { 8203 Address HB = 8204 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8205 .getAddress(CGF); 8206 PartialStruct.HighestElem = {FieldIndex, HB}; 8207 } else { 8208 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8209 } 8210 PartialStruct.Base = BP; 8211 PartialStruct.LB = BP; 8212 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8213 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8214 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8215 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8216 } 8217 } 8218 8219 // Need to emit combined struct for array sections. 8220 if (IsFinalArraySection || IsNonContiguous) 8221 PartialStruct.IsArraySection = true; 8222 8223 // If we have a final array section, we are done with this expression. 8224 if (IsFinalArraySection) 8225 break; 8226 8227 // The pointer becomes the base for the next element. 8228 if (Next != CE) 8229 BP = IsMemberReference ? LowestElem : LB; 8230 8231 IsExpressionFirstInfo = false; 8232 IsCaptureFirstInfo = false; 8233 FirstPointerInComplexData = false; 8234 IsPrevMemberReference = IsMemberReference; 8235 } else if (FirstPointerInComplexData) { 8236 QualType Ty = Components.rbegin() 8237 ->getAssociatedDeclaration() 8238 ->getType() 8239 .getNonReferenceType(); 8240 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8241 FirstPointerInComplexData = false; 8242 } 8243 } 8244 // If ran into the whole component - allocate the space for the whole 8245 // record. 8246 if (!EncounteredME) 8247 PartialStruct.HasCompleteRecord = true; 8248 8249 if (!IsNonContiguous) 8250 return; 8251 8252 const ASTContext &Context = CGF.getContext(); 8253 8254 // For supporting stride in array section, we need to initialize the first 8255 // dimension size as 1, first offset as 0, and first count as 1 8256 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8257 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8258 MapValuesArrayTy CurStrides; 8259 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8260 uint64_t ElementTypeSize; 8261 8262 // Collect Size information for each dimension and get the element size as 8263 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8264 // should be [10, 10] and the first stride is 4 btyes. 8265 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8266 Components) { 8267 const Expr *AssocExpr = Component.getAssociatedExpression(); 8268 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8269 8270 if (!OASE) 8271 continue; 8272 8273 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8274 auto *CAT = Context.getAsConstantArrayType(Ty); 8275 auto *VAT = Context.getAsVariableArrayType(Ty); 8276 8277 // We need all the dimension size except for the last dimension. 8278 assert((VAT || CAT || &Component == &*Components.begin()) && 8279 "Should be either ConstantArray or VariableArray if not the " 8280 "first Component"); 8281 8282 // Get element size if CurStrides is empty. 8283 if (CurStrides.empty()) { 8284 const Type *ElementType = nullptr; 8285 if (CAT) 8286 ElementType = CAT->getElementType().getTypePtr(); 8287 else if (VAT) 8288 ElementType = VAT->getElementType().getTypePtr(); 8289 else 8290 assert(&Component == &*Components.begin() && 8291 "Only expect pointer (non CAT or VAT) when this is the " 8292 "first Component"); 8293 // If ElementType is null, then it means the base is a pointer 8294 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8295 // for next iteration. 8296 if (ElementType) { 8297 // For the case that having pointer as base, we need to remove one 8298 // level of indirection. 8299 if (&Component != &*Components.begin()) 8300 ElementType = ElementType->getPointeeOrArrayElementType(); 8301 ElementTypeSize = 8302 Context.getTypeSizeInChars(ElementType).getQuantity(); 8303 CurStrides.push_back( 8304 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8305 } 8306 } 8307 // Get dimension value except for the last dimension since we don't need 8308 // it. 8309 if (DimSizes.size() < Components.size() - 1) { 8310 if (CAT) 8311 DimSizes.push_back(llvm::ConstantInt::get( 8312 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8313 else if (VAT) 8314 DimSizes.push_back(CGF.Builder.CreateIntCast( 8315 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8316 /*IsSigned=*/false)); 8317 } 8318 } 8319 8320 // Skip the dummy dimension since we have already have its information. 8321 auto DI = DimSizes.begin() + 1; 8322 // Product of dimension. 8323 llvm::Value *DimProd = 8324 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8325 8326 // Collect info for non-contiguous. Notice that offset, count, and stride 8327 // are only meaningful for array-section, so we insert a null for anything 8328 // other than array-section. 8329 // Also, the size of offset, count, and stride are not the same as 8330 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8331 // count, and stride are the same as the number of non-contiguous 8332 // declaration in target update to/from clause. 8333 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8334 Components) { 8335 const Expr *AssocExpr = Component.getAssociatedExpression(); 8336 8337 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8338 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8339 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8340 /*isSigned=*/false); 8341 CurOffsets.push_back(Offset); 8342 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8343 CurStrides.push_back(CurStrides.back()); 8344 continue; 8345 } 8346 8347 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8348 8349 if (!OASE) 8350 continue; 8351 8352 // Offset 8353 const Expr *OffsetExpr = OASE->getLowerBound(); 8354 llvm::Value *Offset = nullptr; 8355 if (!OffsetExpr) { 8356 // If offset is absent, then we just set it to zero. 8357 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8358 } else { 8359 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8360 CGF.Int64Ty, 8361 /*isSigned=*/false); 8362 } 8363 CurOffsets.push_back(Offset); 8364 8365 // Count 8366 const Expr *CountExpr = OASE->getLength(); 8367 llvm::Value *Count = nullptr; 8368 if (!CountExpr) { 8369 // In Clang, once a high dimension is an array section, we construct all 8370 // the lower dimension as array section, however, for case like 8371 // arr[0:2][2], Clang construct the inner dimension as an array section 8372 // but it actually is not in an array section form according to spec. 8373 if (!OASE->getColonLocFirst().isValid() && 8374 !OASE->getColonLocSecond().isValid()) { 8375 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8376 } else { 8377 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8378 // When the length is absent it defaults to ⌈(size − 8379 // lower-bound)/stride⌉, where size is the size of the array 8380 // dimension. 8381 const Expr *StrideExpr = OASE->getStride(); 8382 llvm::Value *Stride = 8383 StrideExpr 8384 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8385 CGF.Int64Ty, /*isSigned=*/false) 8386 : nullptr; 8387 if (Stride) 8388 Count = CGF.Builder.CreateUDiv( 8389 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8390 else 8391 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8392 } 8393 } else { 8394 Count = CGF.EmitScalarExpr(CountExpr); 8395 } 8396 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8397 CurCounts.push_back(Count); 8398 8399 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8400 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8401 // Offset Count Stride 8402 // D0 0 1 4 (int) <- dummy dimension 8403 // D1 0 2 8 (2 * (1) * 4) 8404 // D2 1 2 20 (1 * (1 * 5) * 4) 8405 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8406 const Expr *StrideExpr = OASE->getStride(); 8407 llvm::Value *Stride = 8408 StrideExpr 8409 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8410 CGF.Int64Ty, /*isSigned=*/false) 8411 : nullptr; 8412 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8413 if (Stride) 8414 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8415 else 8416 CurStrides.push_back(DimProd); 8417 if (DI != DimSizes.end()) 8418 ++DI; 8419 } 8420 8421 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8422 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8423 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8424 } 8425 8426 /// Return the adjusted map modifiers if the declaration a capture refers to 8427 /// appears in a first-private clause. This is expected to be used only with 8428 /// directives that start with 'target'. 8429 MappableExprsHandler::OpenMPOffloadMappingFlags 8430 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8431 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8432 8433 // A first private variable captured by reference will use only the 8434 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8435 // declaration is known as first-private in this handler. 8436 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8437 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8438 return MappableExprsHandler::OMP_MAP_TO | 8439 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8440 return MappableExprsHandler::OMP_MAP_PRIVATE | 8441 MappableExprsHandler::OMP_MAP_TO; 8442 } 8443 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8444 if (I != LambdasMap.end()) 8445 // for map(to: lambda): using user specified map type. 8446 return getMapTypeBits( 8447 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8448 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8449 /*AddPtrFlag=*/false, 8450 /*AddIsTargetParamFlag=*/false, 8451 /*isNonContiguous=*/false); 8452 return MappableExprsHandler::OMP_MAP_TO | 8453 MappableExprsHandler::OMP_MAP_FROM; 8454 } 8455 8456 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8457 // Rotate by getFlagMemberOffset() bits. 8458 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8459 << getFlagMemberOffset()); 8460 } 8461 8462 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8463 OpenMPOffloadMappingFlags MemberOfFlag) { 8464 // If the entry is PTR_AND_OBJ but has not been marked with the special 8465 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8466 // marked as MEMBER_OF. 8467 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8468 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8469 return; 8470 8471 // Reset the placeholder value to prepare the flag for the assignment of the 8472 // proper MEMBER_OF value. 8473 Flags &= ~OMP_MAP_MEMBER_OF; 8474 Flags |= MemberOfFlag; 8475 } 8476 8477 void getPlainLayout(const CXXRecordDecl *RD, 8478 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8479 bool AsBase) const { 8480 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8481 8482 llvm::StructType *St = 8483 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8484 8485 unsigned NumElements = St->getNumElements(); 8486 llvm::SmallVector< 8487 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8488 RecordLayout(NumElements); 8489 8490 // Fill bases. 8491 for (const auto &I : RD->bases()) { 8492 if (I.isVirtual()) 8493 continue; 8494 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8495 // Ignore empty bases. 8496 if (Base->isEmpty() || CGF.getContext() 8497 .getASTRecordLayout(Base) 8498 .getNonVirtualSize() 8499 .isZero()) 8500 continue; 8501 8502 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8503 RecordLayout[FieldIndex] = Base; 8504 } 8505 // Fill in virtual bases. 8506 for (const auto &I : RD->vbases()) { 8507 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8508 // Ignore empty bases. 8509 if (Base->isEmpty()) 8510 continue; 8511 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8512 if (RecordLayout[FieldIndex]) 8513 continue; 8514 RecordLayout[FieldIndex] = Base; 8515 } 8516 // Fill in all the fields. 8517 assert(!RD->isUnion() && "Unexpected union."); 8518 for (const auto *Field : RD->fields()) { 8519 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8520 // will fill in later.) 8521 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8522 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8523 RecordLayout[FieldIndex] = Field; 8524 } 8525 } 8526 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8527 &Data : RecordLayout) { 8528 if (Data.isNull()) 8529 continue; 8530 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8531 getPlainLayout(Base, Layout, /*AsBase=*/true); 8532 else 8533 Layout.push_back(Data.get<const FieldDecl *>()); 8534 } 8535 } 8536 8537 /// Generate all the base pointers, section pointers, sizes, map types, and 8538 /// mappers for the extracted mappable expressions (all included in \a 8539 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8540 /// pair of the relevant declaration and index where it occurs is appended to 8541 /// the device pointers info array. 8542 void generateAllInfoForClauses( 8543 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8544 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8545 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8546 // We have to process the component lists that relate with the same 8547 // declaration in a single chunk so that we can generate the map flags 8548 // correctly. Therefore, we organize all lists in a map. 8549 enum MapKind { Present, Allocs, Other, Total }; 8550 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8551 SmallVector<SmallVector<MapInfo, 8>, 4>> 8552 Info; 8553 8554 // Helper function to fill the information map for the different supported 8555 // clauses. 8556 auto &&InfoGen = 8557 [&Info, &SkipVarSet]( 8558 const ValueDecl *D, MapKind Kind, 8559 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8560 OpenMPMapClauseKind MapType, 8561 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8562 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8563 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8564 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8565 if (SkipVarSet.contains(D)) 8566 return; 8567 auto It = Info.find(D); 8568 if (It == Info.end()) 8569 It = Info 8570 .insert(std::make_pair( 8571 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8572 .first; 8573 It->second[Kind].emplace_back( 8574 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8575 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8576 }; 8577 8578 for (const auto *Cl : Clauses) { 8579 const auto *C = dyn_cast<OMPMapClause>(Cl); 8580 if (!C) 8581 continue; 8582 MapKind Kind = Other; 8583 if (!C->getMapTypeModifiers().empty() && 8584 llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) { 8585 return K == OMPC_MAP_MODIFIER_present; 8586 })) 8587 Kind = Present; 8588 else if (C->getMapType() == OMPC_MAP_alloc) 8589 Kind = Allocs; 8590 const auto *EI = C->getVarRefs().begin(); 8591 for (const auto L : C->component_lists()) { 8592 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8593 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8594 C->getMapTypeModifiers(), llvm::None, 8595 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8596 E); 8597 ++EI; 8598 } 8599 } 8600 for (const auto *Cl : Clauses) { 8601 const auto *C = dyn_cast<OMPToClause>(Cl); 8602 if (!C) 8603 continue; 8604 MapKind Kind = Other; 8605 if (!C->getMotionModifiers().empty() && 8606 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8607 return K == OMPC_MOTION_MODIFIER_present; 8608 })) 8609 Kind = Present; 8610 const auto *EI = C->getVarRefs().begin(); 8611 for (const auto L : C->component_lists()) { 8612 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8613 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8614 C->isImplicit(), std::get<2>(L), *EI); 8615 ++EI; 8616 } 8617 } 8618 for (const auto *Cl : Clauses) { 8619 const auto *C = dyn_cast<OMPFromClause>(Cl); 8620 if (!C) 8621 continue; 8622 MapKind Kind = Other; 8623 if (!C->getMotionModifiers().empty() && 8624 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8625 return K == OMPC_MOTION_MODIFIER_present; 8626 })) 8627 Kind = Present; 8628 const auto *EI = C->getVarRefs().begin(); 8629 for (const auto L : C->component_lists()) { 8630 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8631 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8632 C->isImplicit(), std::get<2>(L), *EI); 8633 ++EI; 8634 } 8635 } 8636 8637 // Look at the use_device_ptr clause information and mark the existing map 8638 // entries as such. If there is no map information for an entry in the 8639 // use_device_ptr list, we create one with map type 'alloc' and zero size 8640 // section. It is the user fault if that was not mapped before. If there is 8641 // no map information and the pointer is a struct member, then we defer the 8642 // emission of that entry until the whole struct has been processed. 8643 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8644 SmallVector<DeferredDevicePtrEntryTy, 4>> 8645 DeferredInfo; 8646 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8647 8648 for (const auto *Cl : Clauses) { 8649 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8650 if (!C) 8651 continue; 8652 for (const auto L : C->component_lists()) { 8653 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8654 std::get<1>(L); 8655 assert(!Components.empty() && 8656 "Not expecting empty list of components!"); 8657 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8658 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8659 const Expr *IE = Components.back().getAssociatedExpression(); 8660 // If the first component is a member expression, we have to look into 8661 // 'this', which maps to null in the map of map information. Otherwise 8662 // look directly for the information. 8663 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8664 8665 // We potentially have map information for this declaration already. 8666 // Look for the first set of components that refer to it. 8667 if (It != Info.end()) { 8668 bool Found = false; 8669 for (auto &Data : It->second) { 8670 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8671 return MI.Components.back().getAssociatedDeclaration() == VD; 8672 }); 8673 // If we found a map entry, signal that the pointer has to be 8674 // returned and move on to the next declaration. Exclude cases where 8675 // the base pointer is mapped as array subscript, array section or 8676 // array shaping. The base address is passed as a pointer to base in 8677 // this case and cannot be used as a base for use_device_ptr list 8678 // item. 8679 if (CI != Data.end()) { 8680 auto PrevCI = std::next(CI->Components.rbegin()); 8681 const auto *VarD = dyn_cast<VarDecl>(VD); 8682 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8683 isa<MemberExpr>(IE) || 8684 !VD->getType().getNonReferenceType()->isPointerType() || 8685 PrevCI == CI->Components.rend() || 8686 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8687 VarD->hasLocalStorage()) { 8688 CI->ReturnDevicePointer = true; 8689 Found = true; 8690 break; 8691 } 8692 } 8693 } 8694 if (Found) 8695 continue; 8696 } 8697 8698 // We didn't find any match in our map information - generate a zero 8699 // size array section - if the pointer is a struct member we defer this 8700 // action until the whole struct has been processed. 8701 if (isa<MemberExpr>(IE)) { 8702 // Insert the pointer into Info to be processed by 8703 // generateInfoForComponentList. Because it is a member pointer 8704 // without a pointee, no entry will be generated for it, therefore 8705 // we need to generate one after the whole struct has been processed. 8706 // Nonetheless, generateInfoForComponentList must be called to take 8707 // the pointer into account for the calculation of the range of the 8708 // partial struct. 8709 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8710 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8711 nullptr); 8712 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8713 } else { 8714 llvm::Value *Ptr = 8715 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8716 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8717 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8718 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8719 UseDevicePtrCombinedInfo.Sizes.push_back( 8720 llvm::Constant::getNullValue(CGF.Int64Ty)); 8721 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8722 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8723 } 8724 } 8725 } 8726 8727 // Look at the use_device_addr clause information and mark the existing map 8728 // entries as such. If there is no map information for an entry in the 8729 // use_device_addr list, we create one with map type 'alloc' and zero size 8730 // section. It is the user fault if that was not mapped before. If there is 8731 // no map information and the pointer is a struct member, then we defer the 8732 // emission of that entry until the whole struct has been processed. 8733 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8734 for (const auto *Cl : Clauses) { 8735 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8736 if (!C) 8737 continue; 8738 for (const auto L : C->component_lists()) { 8739 assert(!std::get<1>(L).empty() && 8740 "Not expecting empty list of components!"); 8741 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8742 if (!Processed.insert(VD).second) 8743 continue; 8744 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8745 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8746 // If the first component is a member expression, we have to look into 8747 // 'this', which maps to null in the map of map information. Otherwise 8748 // look directly for the information. 8749 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8750 8751 // We potentially have map information for this declaration already. 8752 // Look for the first set of components that refer to it. 8753 if (It != Info.end()) { 8754 bool Found = false; 8755 for (auto &Data : It->second) { 8756 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8757 return MI.Components.back().getAssociatedDeclaration() == VD; 8758 }); 8759 // If we found a map entry, signal that the pointer has to be 8760 // returned and move on to the next declaration. 8761 if (CI != Data.end()) { 8762 CI->ReturnDevicePointer = true; 8763 Found = true; 8764 break; 8765 } 8766 } 8767 if (Found) 8768 continue; 8769 } 8770 8771 // We didn't find any match in our map information - generate a zero 8772 // size array section - if the pointer is a struct member we defer this 8773 // action until the whole struct has been processed. 8774 if (isa<MemberExpr>(IE)) { 8775 // Insert the pointer into Info to be processed by 8776 // generateInfoForComponentList. Because it is a member pointer 8777 // without a pointee, no entry will be generated for it, therefore 8778 // we need to generate one after the whole struct has been processed. 8779 // Nonetheless, generateInfoForComponentList must be called to take 8780 // the pointer into account for the calculation of the range of the 8781 // partial struct. 8782 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8783 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8784 nullptr, nullptr, /*ForDeviceAddr=*/true); 8785 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8786 } else { 8787 llvm::Value *Ptr; 8788 if (IE->isGLValue()) 8789 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8790 else 8791 Ptr = CGF.EmitScalarExpr(IE); 8792 CombinedInfo.Exprs.push_back(VD); 8793 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8794 CombinedInfo.Pointers.push_back(Ptr); 8795 CombinedInfo.Sizes.push_back( 8796 llvm::Constant::getNullValue(CGF.Int64Ty)); 8797 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8798 CombinedInfo.Mappers.push_back(nullptr); 8799 } 8800 } 8801 } 8802 8803 for (const auto &Data : Info) { 8804 StructRangeInfoTy PartialStruct; 8805 // Temporary generated information. 8806 MapCombinedInfoTy CurInfo; 8807 const Decl *D = Data.first; 8808 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8809 for (const auto &M : Data.second) { 8810 for (const MapInfo &L : M) { 8811 assert(!L.Components.empty() && 8812 "Not expecting declaration with no component lists."); 8813 8814 // Remember the current base pointer index. 8815 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8816 CurInfo.NonContigInfo.IsNonContiguous = 8817 L.Components.back().isNonContiguous(); 8818 generateInfoForComponentList( 8819 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8820 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8821 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8822 8823 // If this entry relates with a device pointer, set the relevant 8824 // declaration and add the 'return pointer' flag. 8825 if (L.ReturnDevicePointer) { 8826 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8827 "Unexpected number of mapped base pointers."); 8828 8829 const ValueDecl *RelevantVD = 8830 L.Components.back().getAssociatedDeclaration(); 8831 assert(RelevantVD && 8832 "No relevant declaration related with device pointer??"); 8833 8834 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8835 RelevantVD); 8836 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8837 } 8838 } 8839 } 8840 8841 // Append any pending zero-length pointers which are struct members and 8842 // used with use_device_ptr or use_device_addr. 8843 auto CI = DeferredInfo.find(Data.first); 8844 if (CI != DeferredInfo.end()) { 8845 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8846 llvm::Value *BasePtr; 8847 llvm::Value *Ptr; 8848 if (L.ForDeviceAddr) { 8849 if (L.IE->isGLValue()) 8850 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8851 else 8852 Ptr = this->CGF.EmitScalarExpr(L.IE); 8853 BasePtr = Ptr; 8854 // Entry is RETURN_PARAM. Also, set the placeholder value 8855 // MEMBER_OF=FFFF so that the entry is later updated with the 8856 // correct value of MEMBER_OF. 8857 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8858 } else { 8859 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8860 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8861 L.IE->getExprLoc()); 8862 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8863 // placeholder value MEMBER_OF=FFFF so that the entry is later 8864 // updated with the correct value of MEMBER_OF. 8865 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8866 OMP_MAP_MEMBER_OF); 8867 } 8868 CurInfo.Exprs.push_back(L.VD); 8869 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8870 CurInfo.Pointers.push_back(Ptr); 8871 CurInfo.Sizes.push_back( 8872 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8873 CurInfo.Mappers.push_back(nullptr); 8874 } 8875 } 8876 // If there is an entry in PartialStruct it means we have a struct with 8877 // individual members mapped. Emit an extra combined entry. 8878 if (PartialStruct.Base.isValid()) { 8879 CurInfo.NonContigInfo.Dims.push_back(0); 8880 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8881 } 8882 8883 // We need to append the results of this capture to what we already 8884 // have. 8885 CombinedInfo.append(CurInfo); 8886 } 8887 // Append data for use_device_ptr clauses. 8888 CombinedInfo.append(UseDevicePtrCombinedInfo); 8889 } 8890 8891 public: 8892 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8893 : CurDir(&Dir), CGF(CGF) { 8894 // Extract firstprivate clause information. 8895 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8896 for (const auto *D : C->varlists()) 8897 FirstPrivateDecls.try_emplace( 8898 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8899 // Extract implicit firstprivates from uses_allocators clauses. 8900 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8901 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8902 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8903 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8904 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8905 /*Implicit=*/true); 8906 else if (const auto *VD = dyn_cast<VarDecl>( 8907 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8908 ->getDecl())) 8909 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8910 } 8911 } 8912 // Extract device pointer clause information. 8913 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8914 for (auto L : C->component_lists()) 8915 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8916 // Extract map information. 8917 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8918 if (C->getMapType() != OMPC_MAP_to) 8919 continue; 8920 for (auto L : C->component_lists()) { 8921 const ValueDecl *VD = std::get<0>(L); 8922 const auto *RD = VD ? VD->getType() 8923 .getCanonicalType() 8924 .getNonReferenceType() 8925 ->getAsCXXRecordDecl() 8926 : nullptr; 8927 if (RD && RD->isLambda()) 8928 LambdasMap.try_emplace(std::get<0>(L), C); 8929 } 8930 } 8931 } 8932 8933 /// Constructor for the declare mapper directive. 8934 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8935 : CurDir(&Dir), CGF(CGF) {} 8936 8937 /// Generate code for the combined entry if we have a partially mapped struct 8938 /// and take care of the mapping flags of the arguments corresponding to 8939 /// individual struct members. 8940 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8941 MapFlagsArrayTy &CurTypes, 8942 const StructRangeInfoTy &PartialStruct, 8943 const ValueDecl *VD = nullptr, 8944 bool NotTargetParams = true) const { 8945 if (CurTypes.size() == 1 && 8946 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8947 !PartialStruct.IsArraySection) 8948 return; 8949 Address LBAddr = PartialStruct.LowestElem.second; 8950 Address HBAddr = PartialStruct.HighestElem.second; 8951 if (PartialStruct.HasCompleteRecord) { 8952 LBAddr = PartialStruct.LB; 8953 HBAddr = PartialStruct.LB; 8954 } 8955 CombinedInfo.Exprs.push_back(VD); 8956 // Base is the base of the struct 8957 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8958 // Pointer is the address of the lowest element 8959 llvm::Value *LB = LBAddr.getPointer(); 8960 CombinedInfo.Pointers.push_back(LB); 8961 // There should not be a mapper for a combined entry. 8962 CombinedInfo.Mappers.push_back(nullptr); 8963 // Size is (addr of {highest+1} element) - (addr of lowest element) 8964 llvm::Value *HB = HBAddr.getPointer(); 8965 llvm::Value *HAddr = 8966 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8967 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8968 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8969 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8970 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8971 /*isSigned=*/false); 8972 CombinedInfo.Sizes.push_back(Size); 8973 // Map type is always TARGET_PARAM, if generate info for captures. 8974 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8975 : OMP_MAP_TARGET_PARAM); 8976 // If any element has the present modifier, then make sure the runtime 8977 // doesn't attempt to allocate the struct. 8978 if (CurTypes.end() != 8979 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8980 return Type & OMP_MAP_PRESENT; 8981 })) 8982 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8983 // Remove TARGET_PARAM flag from the first element 8984 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8985 // If any element has the ompx_hold modifier, then make sure the runtime 8986 // uses the hold reference count for the struct as a whole so that it won't 8987 // be unmapped by an extra dynamic reference count decrement. Add it to all 8988 // elements as well so the runtime knows which reference count to check 8989 // when determining whether it's time for device-to-host transfers of 8990 // individual elements. 8991 if (CurTypes.end() != 8992 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8993 return Type & OMP_MAP_OMPX_HOLD; 8994 })) { 8995 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 8996 for (auto &M : CurTypes) 8997 M |= OMP_MAP_OMPX_HOLD; 8998 } 8999 9000 // All other current entries will be MEMBER_OF the combined entry 9001 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9002 // 0xFFFF in the MEMBER_OF field). 9003 OpenMPOffloadMappingFlags MemberOfFlag = 9004 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 9005 for (auto &M : CurTypes) 9006 setCorrectMemberOfFlag(M, MemberOfFlag); 9007 } 9008 9009 /// Generate all the base pointers, section pointers, sizes, map types, and 9010 /// mappers for the extracted mappable expressions (all included in \a 9011 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9012 /// pair of the relevant declaration and index where it occurs is appended to 9013 /// the device pointers info array. 9014 void generateAllInfo( 9015 MapCombinedInfoTy &CombinedInfo, 9016 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9017 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9018 assert(CurDir.is<const OMPExecutableDirective *>() && 9019 "Expect a executable directive"); 9020 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9021 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9022 } 9023 9024 /// Generate all the base pointers, section pointers, sizes, map types, and 9025 /// mappers for the extracted map clauses of user-defined mapper (all included 9026 /// in \a CombinedInfo). 9027 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9028 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9029 "Expect a declare mapper directive"); 9030 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9031 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9032 } 9033 9034 /// Emit capture info for lambdas for variables captured by reference. 9035 void generateInfoForLambdaCaptures( 9036 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9037 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9038 const auto *RD = VD->getType() 9039 .getCanonicalType() 9040 .getNonReferenceType() 9041 ->getAsCXXRecordDecl(); 9042 if (!RD || !RD->isLambda()) 9043 return; 9044 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 9045 LValue VDLVal = CGF.MakeAddrLValue( 9046 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 9047 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9048 FieldDecl *ThisCapture = nullptr; 9049 RD->getCaptureFields(Captures, ThisCapture); 9050 if (ThisCapture) { 9051 LValue ThisLVal = 9052 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9053 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9054 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9055 VDLVal.getPointer(CGF)); 9056 CombinedInfo.Exprs.push_back(VD); 9057 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9058 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9059 CombinedInfo.Sizes.push_back( 9060 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9061 CGF.Int64Ty, /*isSigned=*/true)); 9062 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9063 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9064 CombinedInfo.Mappers.push_back(nullptr); 9065 } 9066 for (const LambdaCapture &LC : RD->captures()) { 9067 if (!LC.capturesVariable()) 9068 continue; 9069 const VarDecl *VD = LC.getCapturedVar(); 9070 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9071 continue; 9072 auto It = Captures.find(VD); 9073 assert(It != Captures.end() && "Found lambda capture without field."); 9074 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9075 if (LC.getCaptureKind() == LCK_ByRef) { 9076 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9077 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9078 VDLVal.getPointer(CGF)); 9079 CombinedInfo.Exprs.push_back(VD); 9080 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9081 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9082 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9083 CGF.getTypeSize( 9084 VD->getType().getCanonicalType().getNonReferenceType()), 9085 CGF.Int64Ty, /*isSigned=*/true)); 9086 } else { 9087 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9088 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9089 VDLVal.getPointer(CGF)); 9090 CombinedInfo.Exprs.push_back(VD); 9091 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9092 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9093 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9094 } 9095 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9096 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9097 CombinedInfo.Mappers.push_back(nullptr); 9098 } 9099 } 9100 9101 /// Set correct indices for lambdas captures. 9102 void adjustMemberOfForLambdaCaptures( 9103 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9104 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9105 MapFlagsArrayTy &Types) const { 9106 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9107 // Set correct member_of idx for all implicit lambda captures. 9108 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9109 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9110 continue; 9111 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9112 assert(BasePtr && "Unable to find base lambda address."); 9113 int TgtIdx = -1; 9114 for (unsigned J = I; J > 0; --J) { 9115 unsigned Idx = J - 1; 9116 if (Pointers[Idx] != BasePtr) 9117 continue; 9118 TgtIdx = Idx; 9119 break; 9120 } 9121 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9122 // All other current entries will be MEMBER_OF the combined entry 9123 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9124 // 0xFFFF in the MEMBER_OF field). 9125 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9126 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9127 } 9128 } 9129 9130 /// Generate the base pointers, section pointers, sizes, map types, and 9131 /// mappers associated to a given capture (all included in \a CombinedInfo). 9132 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9133 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9134 StructRangeInfoTy &PartialStruct) const { 9135 assert(!Cap->capturesVariableArrayType() && 9136 "Not expecting to generate map info for a variable array type!"); 9137 9138 // We need to know when we generating information for the first component 9139 const ValueDecl *VD = Cap->capturesThis() 9140 ? nullptr 9141 : Cap->getCapturedVar()->getCanonicalDecl(); 9142 9143 // for map(to: lambda): skip here, processing it in 9144 // generateDefaultMapInfo 9145 if (LambdasMap.count(VD)) 9146 return; 9147 9148 // If this declaration appears in a is_device_ptr clause we just have to 9149 // pass the pointer by value. If it is a reference to a declaration, we just 9150 // pass its value. 9151 if (DevPointersMap.count(VD)) { 9152 CombinedInfo.Exprs.push_back(VD); 9153 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9154 CombinedInfo.Pointers.push_back(Arg); 9155 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9156 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9157 /*isSigned=*/true)); 9158 CombinedInfo.Types.push_back( 9159 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9160 OMP_MAP_TARGET_PARAM); 9161 CombinedInfo.Mappers.push_back(nullptr); 9162 return; 9163 } 9164 9165 using MapData = 9166 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9167 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9168 const ValueDecl *, const Expr *>; 9169 SmallVector<MapData, 4> DeclComponentLists; 9170 assert(CurDir.is<const OMPExecutableDirective *>() && 9171 "Expect a executable directive"); 9172 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9173 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9174 const auto *EI = C->getVarRefs().begin(); 9175 for (const auto L : C->decl_component_lists(VD)) { 9176 const ValueDecl *VDecl, *Mapper; 9177 // The Expression is not correct if the mapping is implicit 9178 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9179 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9180 std::tie(VDecl, Components, Mapper) = L; 9181 assert(VDecl == VD && "We got information for the wrong declaration??"); 9182 assert(!Components.empty() && 9183 "Not expecting declaration with no component lists."); 9184 DeclComponentLists.emplace_back(Components, C->getMapType(), 9185 C->getMapTypeModifiers(), 9186 C->isImplicit(), Mapper, E); 9187 ++EI; 9188 } 9189 } 9190 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9191 const MapData &RHS) { 9192 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9193 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9194 bool HasPresent = !MapModifiers.empty() && 9195 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 9196 return K == clang::OMPC_MAP_MODIFIER_present; 9197 }); 9198 bool HasAllocs = MapType == OMPC_MAP_alloc; 9199 MapModifiers = std::get<2>(RHS); 9200 MapType = std::get<1>(LHS); 9201 bool HasPresentR = 9202 !MapModifiers.empty() && 9203 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 9204 return K == clang::OMPC_MAP_MODIFIER_present; 9205 }); 9206 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9207 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9208 }); 9209 9210 // Find overlapping elements (including the offset from the base element). 9211 llvm::SmallDenseMap< 9212 const MapData *, 9213 llvm::SmallVector< 9214 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9215 4> 9216 OverlappedData; 9217 size_t Count = 0; 9218 for (const MapData &L : DeclComponentLists) { 9219 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9220 OpenMPMapClauseKind MapType; 9221 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9222 bool IsImplicit; 9223 const ValueDecl *Mapper; 9224 const Expr *VarRef; 9225 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9226 L; 9227 ++Count; 9228 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9229 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9230 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9231 VarRef) = L1; 9232 auto CI = Components.rbegin(); 9233 auto CE = Components.rend(); 9234 auto SI = Components1.rbegin(); 9235 auto SE = Components1.rend(); 9236 for (; CI != CE && SI != SE; ++CI, ++SI) { 9237 if (CI->getAssociatedExpression()->getStmtClass() != 9238 SI->getAssociatedExpression()->getStmtClass()) 9239 break; 9240 // Are we dealing with different variables/fields? 9241 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9242 break; 9243 } 9244 // Found overlapping if, at least for one component, reached the head 9245 // of the components list. 9246 if (CI == CE || SI == SE) { 9247 // Ignore it if it is the same component. 9248 if (CI == CE && SI == SE) 9249 continue; 9250 const auto It = (SI == SE) ? CI : SI; 9251 // If one component is a pointer and another one is a kind of 9252 // dereference of this pointer (array subscript, section, dereference, 9253 // etc.), it is not an overlapping. 9254 // Same, if one component is a base and another component is a 9255 // dereferenced pointer memberexpr with the same base. 9256 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9257 (std::prev(It)->getAssociatedDeclaration() && 9258 std::prev(It) 9259 ->getAssociatedDeclaration() 9260 ->getType() 9261 ->isPointerType()) || 9262 (It->getAssociatedDeclaration() && 9263 It->getAssociatedDeclaration()->getType()->isPointerType() && 9264 std::next(It) != CE && std::next(It) != SE)) 9265 continue; 9266 const MapData &BaseData = CI == CE ? L : L1; 9267 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9268 SI == SE ? Components : Components1; 9269 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9270 OverlappedElements.getSecond().push_back(SubData); 9271 } 9272 } 9273 } 9274 // Sort the overlapped elements for each item. 9275 llvm::SmallVector<const FieldDecl *, 4> Layout; 9276 if (!OverlappedData.empty()) { 9277 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9278 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9279 while (BaseType != OrigType) { 9280 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9281 OrigType = BaseType->getPointeeOrArrayElementType(); 9282 } 9283 9284 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9285 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9286 else { 9287 const auto *RD = BaseType->getAsRecordDecl(); 9288 Layout.append(RD->field_begin(), RD->field_end()); 9289 } 9290 } 9291 for (auto &Pair : OverlappedData) { 9292 llvm::stable_sort( 9293 Pair.getSecond(), 9294 [&Layout]( 9295 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9296 OMPClauseMappableExprCommon::MappableExprComponentListRef 9297 Second) { 9298 auto CI = First.rbegin(); 9299 auto CE = First.rend(); 9300 auto SI = Second.rbegin(); 9301 auto SE = Second.rend(); 9302 for (; CI != CE && SI != SE; ++CI, ++SI) { 9303 if (CI->getAssociatedExpression()->getStmtClass() != 9304 SI->getAssociatedExpression()->getStmtClass()) 9305 break; 9306 // Are we dealing with different variables/fields? 9307 if (CI->getAssociatedDeclaration() != 9308 SI->getAssociatedDeclaration()) 9309 break; 9310 } 9311 9312 // Lists contain the same elements. 9313 if (CI == CE && SI == SE) 9314 return false; 9315 9316 // List with less elements is less than list with more elements. 9317 if (CI == CE || SI == SE) 9318 return CI == CE; 9319 9320 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9321 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9322 if (FD1->getParent() == FD2->getParent()) 9323 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9324 const auto *It = 9325 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9326 return FD == FD1 || FD == FD2; 9327 }); 9328 return *It == FD1; 9329 }); 9330 } 9331 9332 // Associated with a capture, because the mapping flags depend on it. 9333 // Go through all of the elements with the overlapped elements. 9334 bool IsFirstComponentList = true; 9335 for (const auto &Pair : OverlappedData) { 9336 const MapData &L = *Pair.getFirst(); 9337 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9338 OpenMPMapClauseKind MapType; 9339 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9340 bool IsImplicit; 9341 const ValueDecl *Mapper; 9342 const Expr *VarRef; 9343 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9344 L; 9345 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9346 OverlappedComponents = Pair.getSecond(); 9347 generateInfoForComponentList( 9348 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9349 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9350 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9351 IsFirstComponentList = false; 9352 } 9353 // Go through other elements without overlapped elements. 9354 for (const MapData &L : DeclComponentLists) { 9355 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9356 OpenMPMapClauseKind MapType; 9357 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9358 bool IsImplicit; 9359 const ValueDecl *Mapper; 9360 const Expr *VarRef; 9361 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9362 L; 9363 auto It = OverlappedData.find(&L); 9364 if (It == OverlappedData.end()) 9365 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9366 Components, CombinedInfo, PartialStruct, 9367 IsFirstComponentList, IsImplicit, Mapper, 9368 /*ForDeviceAddr=*/false, VD, VarRef); 9369 IsFirstComponentList = false; 9370 } 9371 } 9372 9373 /// Generate the default map information for a given capture \a CI, 9374 /// record field declaration \a RI and captured value \a CV. 9375 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9376 const FieldDecl &RI, llvm::Value *CV, 9377 MapCombinedInfoTy &CombinedInfo) const { 9378 bool IsImplicit = true; 9379 // Do the default mapping. 9380 if (CI.capturesThis()) { 9381 CombinedInfo.Exprs.push_back(nullptr); 9382 CombinedInfo.BasePointers.push_back(CV); 9383 CombinedInfo.Pointers.push_back(CV); 9384 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9385 CombinedInfo.Sizes.push_back( 9386 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9387 CGF.Int64Ty, /*isSigned=*/true)); 9388 // Default map type. 9389 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9390 } else if (CI.capturesVariableByCopy()) { 9391 const VarDecl *VD = CI.getCapturedVar(); 9392 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9393 CombinedInfo.BasePointers.push_back(CV); 9394 CombinedInfo.Pointers.push_back(CV); 9395 if (!RI.getType()->isAnyPointerType()) { 9396 // We have to signal to the runtime captures passed by value that are 9397 // not pointers. 9398 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9399 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9400 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9401 } else { 9402 // Pointers are implicitly mapped with a zero size and no flags 9403 // (other than first map that is added for all implicit maps). 9404 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9405 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9406 } 9407 auto I = FirstPrivateDecls.find(VD); 9408 if (I != FirstPrivateDecls.end()) 9409 IsImplicit = I->getSecond(); 9410 } else { 9411 assert(CI.capturesVariable() && "Expected captured reference."); 9412 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9413 QualType ElementType = PtrTy->getPointeeType(); 9414 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9415 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9416 // The default map type for a scalar/complex type is 'to' because by 9417 // default the value doesn't have to be retrieved. For an aggregate 9418 // type, the default is 'tofrom'. 9419 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9420 const VarDecl *VD = CI.getCapturedVar(); 9421 auto I = FirstPrivateDecls.find(VD); 9422 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9423 CombinedInfo.BasePointers.push_back(CV); 9424 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9425 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9426 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9427 AlignmentSource::Decl)); 9428 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9429 } else { 9430 CombinedInfo.Pointers.push_back(CV); 9431 } 9432 if (I != FirstPrivateDecls.end()) 9433 IsImplicit = I->getSecond(); 9434 } 9435 // Every default map produces a single argument which is a target parameter. 9436 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9437 9438 // Add flag stating this is an implicit map. 9439 if (IsImplicit) 9440 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9441 9442 // No user-defined mapper for default mapping. 9443 CombinedInfo.Mappers.push_back(nullptr); 9444 } 9445 }; 9446 } // anonymous namespace 9447 9448 static void emitNonContiguousDescriptor( 9449 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9450 CGOpenMPRuntime::TargetDataInfo &Info) { 9451 CodeGenModule &CGM = CGF.CGM; 9452 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9453 &NonContigInfo = CombinedInfo.NonContigInfo; 9454 9455 // Build an array of struct descriptor_dim and then assign it to 9456 // offload_args. 9457 // 9458 // struct descriptor_dim { 9459 // uint64_t offset; 9460 // uint64_t count; 9461 // uint64_t stride 9462 // }; 9463 ASTContext &C = CGF.getContext(); 9464 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9465 RecordDecl *RD; 9466 RD = C.buildImplicitRecord("descriptor_dim"); 9467 RD->startDefinition(); 9468 addFieldToRecordDecl(C, RD, Int64Ty); 9469 addFieldToRecordDecl(C, RD, Int64Ty); 9470 addFieldToRecordDecl(C, RD, Int64Ty); 9471 RD->completeDefinition(); 9472 QualType DimTy = C.getRecordType(RD); 9473 9474 enum { OffsetFD = 0, CountFD, StrideFD }; 9475 // We need two index variable here since the size of "Dims" is the same as the 9476 // size of Components, however, the size of offset, count, and stride is equal 9477 // to the size of base declaration that is non-contiguous. 9478 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9479 // Skip emitting ir if dimension size is 1 since it cannot be 9480 // non-contiguous. 9481 if (NonContigInfo.Dims[I] == 1) 9482 continue; 9483 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9484 QualType ArrayTy = 9485 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9486 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9487 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9488 unsigned RevIdx = EE - II - 1; 9489 LValue DimsLVal = CGF.MakeAddrLValue( 9490 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9491 // Offset 9492 LValue OffsetLVal = CGF.EmitLValueForField( 9493 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9494 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9495 // Count 9496 LValue CountLVal = CGF.EmitLValueForField( 9497 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9498 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9499 // Stride 9500 LValue StrideLVal = CGF.EmitLValueForField( 9501 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9502 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9503 } 9504 // args[I] = &dims 9505 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9506 DimsAddr, CGM.Int8PtrTy); 9507 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9508 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9509 Info.PointersArray, 0, I); 9510 Address PAddr(P, CGF.getPointerAlign()); 9511 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9512 ++L; 9513 } 9514 } 9515 9516 // Try to extract the base declaration from a `this->x` expression if possible. 9517 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9518 if (!E) 9519 return nullptr; 9520 9521 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9522 if (const MemberExpr *ME = 9523 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9524 return ME->getMemberDecl(); 9525 return nullptr; 9526 } 9527 9528 /// Emit a string constant containing the names of the values mapped to the 9529 /// offloading runtime library. 9530 llvm::Constant * 9531 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9532 MappableExprsHandler::MappingExprInfo &MapExprs) { 9533 9534 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9535 return OMPBuilder.getOrCreateDefaultSrcLocStr(); 9536 9537 SourceLocation Loc; 9538 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9539 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9540 Loc = VD->getLocation(); 9541 else 9542 Loc = MapExprs.getMapExpr()->getExprLoc(); 9543 } else { 9544 Loc = MapExprs.getMapDecl()->getLocation(); 9545 } 9546 9547 std::string ExprName = ""; 9548 if (MapExprs.getMapExpr()) { 9549 PrintingPolicy P(CGF.getContext().getLangOpts()); 9550 llvm::raw_string_ostream OS(ExprName); 9551 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9552 OS.flush(); 9553 } else { 9554 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9555 } 9556 9557 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9558 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(), 9559 PLoc.getLine(), PLoc.getColumn()); 9560 } 9561 9562 /// Emit the arrays used to pass the captures and map information to the 9563 /// offloading runtime library. If there is no map or capture information, 9564 /// return nullptr by reference. 9565 static void emitOffloadingArrays( 9566 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9567 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9568 bool IsNonContiguous = false) { 9569 CodeGenModule &CGM = CGF.CGM; 9570 ASTContext &Ctx = CGF.getContext(); 9571 9572 // Reset the array information. 9573 Info.clearArrayInfo(); 9574 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9575 9576 if (Info.NumberOfPtrs) { 9577 // Detect if we have any capture size requiring runtime evaluation of the 9578 // size so that a constant array could be eventually used. 9579 bool hasRuntimeEvaluationCaptureSize = false; 9580 for (llvm::Value *S : CombinedInfo.Sizes) 9581 if (!isa<llvm::Constant>(S)) { 9582 hasRuntimeEvaluationCaptureSize = true; 9583 break; 9584 } 9585 9586 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9587 QualType PointerArrayType = Ctx.getConstantArrayType( 9588 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9589 /*IndexTypeQuals=*/0); 9590 9591 Info.BasePointersArray = 9592 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9593 Info.PointersArray = 9594 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9595 Address MappersArray = 9596 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9597 Info.MappersArray = MappersArray.getPointer(); 9598 9599 // If we don't have any VLA types or other types that require runtime 9600 // evaluation, we can use a constant array for the map sizes, otherwise we 9601 // need to fill up the arrays as we do for the pointers. 9602 QualType Int64Ty = 9603 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9604 if (hasRuntimeEvaluationCaptureSize) { 9605 QualType SizeArrayType = Ctx.getConstantArrayType( 9606 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9607 /*IndexTypeQuals=*/0); 9608 Info.SizesArray = 9609 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9610 } else { 9611 // We expect all the sizes to be constant, so we collect them to create 9612 // a constant array. 9613 SmallVector<llvm::Constant *, 16> ConstSizes; 9614 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9615 if (IsNonContiguous && 9616 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9617 ConstSizes.push_back(llvm::ConstantInt::get( 9618 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9619 } else { 9620 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9621 } 9622 } 9623 9624 auto *SizesArrayInit = llvm::ConstantArray::get( 9625 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9626 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9627 auto *SizesArrayGbl = new llvm::GlobalVariable( 9628 CGM.getModule(), SizesArrayInit->getType(), 9629 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9630 SizesArrayInit, Name); 9631 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9632 Info.SizesArray = SizesArrayGbl; 9633 } 9634 9635 // The map types are always constant so we don't need to generate code to 9636 // fill arrays. Instead, we create an array constant. 9637 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9638 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9639 std::string MaptypesName = 9640 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9641 auto *MapTypesArrayGbl = 9642 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9643 Info.MapTypesArray = MapTypesArrayGbl; 9644 9645 // The information types are only built if there is debug information 9646 // requested. 9647 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9648 Info.MapNamesArray = llvm::Constant::getNullValue( 9649 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9650 } else { 9651 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9652 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9653 }; 9654 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9655 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9656 std::string MapnamesName = 9657 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9658 auto *MapNamesArrayGbl = 9659 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9660 Info.MapNamesArray = MapNamesArrayGbl; 9661 } 9662 9663 // If there's a present map type modifier, it must not be applied to the end 9664 // of a region, so generate a separate map type array in that case. 9665 if (Info.separateBeginEndCalls()) { 9666 bool EndMapTypesDiffer = false; 9667 for (uint64_t &Type : Mapping) { 9668 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9669 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9670 EndMapTypesDiffer = true; 9671 } 9672 } 9673 if (EndMapTypesDiffer) { 9674 MapTypesArrayGbl = 9675 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9676 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9677 } 9678 } 9679 9680 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9681 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9682 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9683 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9684 Info.BasePointersArray, 0, I); 9685 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9686 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9687 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9688 CGF.Builder.CreateStore(BPVal, BPAddr); 9689 9690 if (Info.requiresDevicePointerInfo()) 9691 if (const ValueDecl *DevVD = 9692 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9693 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9694 9695 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9696 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9697 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9698 Info.PointersArray, 0, I); 9699 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9700 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9701 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9702 CGF.Builder.CreateStore(PVal, PAddr); 9703 9704 if (hasRuntimeEvaluationCaptureSize) { 9705 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9706 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9707 Info.SizesArray, 9708 /*Idx0=*/0, 9709 /*Idx1=*/I); 9710 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9711 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9712 CGM.Int64Ty, 9713 /*isSigned=*/true), 9714 SAddr); 9715 } 9716 9717 // Fill up the mapper array. 9718 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9719 if (CombinedInfo.Mappers[I]) { 9720 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9721 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9722 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9723 Info.HasMapper = true; 9724 } 9725 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9726 CGF.Builder.CreateStore(MFunc, MAddr); 9727 } 9728 } 9729 9730 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9731 Info.NumberOfPtrs == 0) 9732 return; 9733 9734 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9735 } 9736 9737 namespace { 9738 /// Additional arguments for emitOffloadingArraysArgument function. 9739 struct ArgumentsOptions { 9740 bool ForEndCall = false; 9741 ArgumentsOptions() = default; 9742 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9743 }; 9744 } // namespace 9745 9746 /// Emit the arguments to be passed to the runtime library based on the 9747 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9748 /// ForEndCall, emit map types to be passed for the end of the region instead of 9749 /// the beginning. 9750 static void emitOffloadingArraysArgument( 9751 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9752 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9753 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9754 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9755 const ArgumentsOptions &Options = ArgumentsOptions()) { 9756 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9757 "expected region end call to runtime only when end call is separate"); 9758 CodeGenModule &CGM = CGF.CGM; 9759 if (Info.NumberOfPtrs) { 9760 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9761 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9762 Info.BasePointersArray, 9763 /*Idx0=*/0, /*Idx1=*/0); 9764 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9765 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9766 Info.PointersArray, 9767 /*Idx0=*/0, 9768 /*Idx1=*/0); 9769 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9770 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9771 /*Idx0=*/0, /*Idx1=*/0); 9772 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9773 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9774 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9775 : Info.MapTypesArray, 9776 /*Idx0=*/0, 9777 /*Idx1=*/0); 9778 9779 // Only emit the mapper information arrays if debug information is 9780 // requested. 9781 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9782 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9783 else 9784 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9785 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9786 Info.MapNamesArray, 9787 /*Idx0=*/0, 9788 /*Idx1=*/0); 9789 // If there is no user-defined mapper, set the mapper array to nullptr to 9790 // avoid an unnecessary data privatization 9791 if (!Info.HasMapper) 9792 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9793 else 9794 MappersArrayArg = 9795 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9796 } else { 9797 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9798 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9799 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9800 MapTypesArrayArg = 9801 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9802 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9803 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9804 } 9805 } 9806 9807 /// Check for inner distribute directive. 9808 static const OMPExecutableDirective * 9809 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9810 const auto *CS = D.getInnermostCapturedStmt(); 9811 const auto *Body = 9812 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9813 const Stmt *ChildStmt = 9814 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9815 9816 if (const auto *NestedDir = 9817 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9818 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9819 switch (D.getDirectiveKind()) { 9820 case OMPD_target: 9821 if (isOpenMPDistributeDirective(DKind)) 9822 return NestedDir; 9823 if (DKind == OMPD_teams) { 9824 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9825 /*IgnoreCaptured=*/true); 9826 if (!Body) 9827 return nullptr; 9828 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9829 if (const auto *NND = 9830 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9831 DKind = NND->getDirectiveKind(); 9832 if (isOpenMPDistributeDirective(DKind)) 9833 return NND; 9834 } 9835 } 9836 return nullptr; 9837 case OMPD_target_teams: 9838 if (isOpenMPDistributeDirective(DKind)) 9839 return NestedDir; 9840 return nullptr; 9841 case OMPD_target_parallel: 9842 case OMPD_target_simd: 9843 case OMPD_target_parallel_for: 9844 case OMPD_target_parallel_for_simd: 9845 return nullptr; 9846 case OMPD_target_teams_distribute: 9847 case OMPD_target_teams_distribute_simd: 9848 case OMPD_target_teams_distribute_parallel_for: 9849 case OMPD_target_teams_distribute_parallel_for_simd: 9850 case OMPD_parallel: 9851 case OMPD_for: 9852 case OMPD_parallel_for: 9853 case OMPD_parallel_master: 9854 case OMPD_parallel_sections: 9855 case OMPD_for_simd: 9856 case OMPD_parallel_for_simd: 9857 case OMPD_cancel: 9858 case OMPD_cancellation_point: 9859 case OMPD_ordered: 9860 case OMPD_threadprivate: 9861 case OMPD_allocate: 9862 case OMPD_task: 9863 case OMPD_simd: 9864 case OMPD_tile: 9865 case OMPD_unroll: 9866 case OMPD_sections: 9867 case OMPD_section: 9868 case OMPD_single: 9869 case OMPD_master: 9870 case OMPD_critical: 9871 case OMPD_taskyield: 9872 case OMPD_barrier: 9873 case OMPD_taskwait: 9874 case OMPD_taskgroup: 9875 case OMPD_atomic: 9876 case OMPD_flush: 9877 case OMPD_depobj: 9878 case OMPD_scan: 9879 case OMPD_teams: 9880 case OMPD_target_data: 9881 case OMPD_target_exit_data: 9882 case OMPD_target_enter_data: 9883 case OMPD_distribute: 9884 case OMPD_distribute_simd: 9885 case OMPD_distribute_parallel_for: 9886 case OMPD_distribute_parallel_for_simd: 9887 case OMPD_teams_distribute: 9888 case OMPD_teams_distribute_simd: 9889 case OMPD_teams_distribute_parallel_for: 9890 case OMPD_teams_distribute_parallel_for_simd: 9891 case OMPD_target_update: 9892 case OMPD_declare_simd: 9893 case OMPD_declare_variant: 9894 case OMPD_begin_declare_variant: 9895 case OMPD_end_declare_variant: 9896 case OMPD_declare_target: 9897 case OMPD_end_declare_target: 9898 case OMPD_declare_reduction: 9899 case OMPD_declare_mapper: 9900 case OMPD_taskloop: 9901 case OMPD_taskloop_simd: 9902 case OMPD_master_taskloop: 9903 case OMPD_master_taskloop_simd: 9904 case OMPD_parallel_master_taskloop: 9905 case OMPD_parallel_master_taskloop_simd: 9906 case OMPD_requires: 9907 case OMPD_metadirective: 9908 case OMPD_unknown: 9909 default: 9910 llvm_unreachable("Unexpected directive."); 9911 } 9912 } 9913 9914 return nullptr; 9915 } 9916 9917 /// Emit the user-defined mapper function. The code generation follows the 9918 /// pattern in the example below. 9919 /// \code 9920 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9921 /// void *base, void *begin, 9922 /// int64_t size, int64_t type, 9923 /// void *name = nullptr) { 9924 /// // Allocate space for an array section first or add a base/begin for 9925 /// // pointer dereference. 9926 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9927 /// !maptype.IsDelete) 9928 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9929 /// size*sizeof(Ty), clearToFromMember(type)); 9930 /// // Map members. 9931 /// for (unsigned i = 0; i < size; i++) { 9932 /// // For each component specified by this mapper: 9933 /// for (auto c : begin[i]->all_components) { 9934 /// if (c.hasMapper()) 9935 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9936 /// c.arg_type, c.arg_name); 9937 /// else 9938 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9939 /// c.arg_begin, c.arg_size, c.arg_type, 9940 /// c.arg_name); 9941 /// } 9942 /// } 9943 /// // Delete the array section. 9944 /// if (size > 1 && maptype.IsDelete) 9945 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9946 /// size*sizeof(Ty), clearToFromMember(type)); 9947 /// } 9948 /// \endcode 9949 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9950 CodeGenFunction *CGF) { 9951 if (UDMMap.count(D) > 0) 9952 return; 9953 ASTContext &C = CGM.getContext(); 9954 QualType Ty = D->getType(); 9955 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9956 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9957 auto *MapperVarDecl = 9958 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9959 SourceLocation Loc = D->getLocation(); 9960 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9961 9962 // Prepare mapper function arguments and attributes. 9963 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9964 C.VoidPtrTy, ImplicitParamDecl::Other); 9965 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9966 ImplicitParamDecl::Other); 9967 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9968 C.VoidPtrTy, ImplicitParamDecl::Other); 9969 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9970 ImplicitParamDecl::Other); 9971 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9972 ImplicitParamDecl::Other); 9973 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9974 ImplicitParamDecl::Other); 9975 FunctionArgList Args; 9976 Args.push_back(&HandleArg); 9977 Args.push_back(&BaseArg); 9978 Args.push_back(&BeginArg); 9979 Args.push_back(&SizeArg); 9980 Args.push_back(&TypeArg); 9981 Args.push_back(&NameArg); 9982 const CGFunctionInfo &FnInfo = 9983 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9984 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9985 SmallString<64> TyStr; 9986 llvm::raw_svector_ostream Out(TyStr); 9987 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9988 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9989 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9990 Name, &CGM.getModule()); 9991 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9992 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9993 // Start the mapper function code generation. 9994 CodeGenFunction MapperCGF(CGM); 9995 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9996 // Compute the starting and end addresses of array elements. 9997 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9998 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9999 C.getPointerType(Int64Ty), Loc); 10000 // Prepare common arguments for array initiation and deletion. 10001 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 10002 MapperCGF.GetAddrOfLocalVar(&HandleArg), 10003 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10004 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10005 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10006 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10007 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10008 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10009 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10010 // Convert the size in bytes into the number of array elements. 10011 Size = MapperCGF.Builder.CreateExactUDiv( 10012 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10013 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10014 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10015 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP( 10016 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size); 10017 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10018 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10019 C.getPointerType(Int64Ty), Loc); 10020 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10021 MapperCGF.GetAddrOfLocalVar(&NameArg), 10022 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10023 10024 // Emit array initiation if this is an array section and \p MapType indicates 10025 // that memory allocation is required. 10026 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10027 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10028 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10029 10030 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10031 10032 // Emit the loop header block. 10033 MapperCGF.EmitBlock(HeadBB); 10034 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10035 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10036 // Evaluate whether the initial condition is satisfied. 10037 llvm::Value *IsEmpty = 10038 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10039 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10040 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10041 10042 // Emit the loop body block. 10043 MapperCGF.EmitBlock(BodyBB); 10044 llvm::BasicBlock *LastBB = BodyBB; 10045 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10046 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10047 PtrPHI->addIncoming(PtrBegin, EntryBB); 10048 Address PtrCurrent = 10049 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 10050 .getAlignment() 10051 .alignmentOfArrayElement(ElementSize)); 10052 // Privatize the declared variable of mapper to be the current array element. 10053 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10054 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 10055 (void)Scope.Privatize(); 10056 10057 // Get map clause information. Fill up the arrays with all mapped variables. 10058 MappableExprsHandler::MapCombinedInfoTy Info; 10059 MappableExprsHandler MEHandler(*D, MapperCGF); 10060 MEHandler.generateAllInfoForMapper(Info); 10061 10062 // Call the runtime API __tgt_mapper_num_components to get the number of 10063 // pre-existing components. 10064 llvm::Value *OffloadingArgs[] = {Handle}; 10065 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10066 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10067 OMPRTL___tgt_mapper_num_components), 10068 OffloadingArgs); 10069 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10070 PreviousSize, 10071 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10072 10073 // Fill up the runtime mapper handle for all components. 10074 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10075 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10076 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10077 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10078 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10079 llvm::Value *CurSizeArg = Info.Sizes[I]; 10080 llvm::Value *CurNameArg = 10081 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10082 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10083 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10084 10085 // Extract the MEMBER_OF field from the map type. 10086 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10087 llvm::Value *MemberMapType = 10088 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10089 10090 // Combine the map type inherited from user-defined mapper with that 10091 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10092 // bits of the \a MapType, which is the input argument of the mapper 10093 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10094 // bits of MemberMapType. 10095 // [OpenMP 5.0], 1.2.6. map-type decay. 10096 // | alloc | to | from | tofrom | release | delete 10097 // ---------------------------------------------------------- 10098 // alloc | alloc | alloc | alloc | alloc | release | delete 10099 // to | alloc | to | alloc | to | release | delete 10100 // from | alloc | alloc | from | from | release | delete 10101 // tofrom | alloc | to | from | tofrom | release | delete 10102 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10103 MapType, 10104 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10105 MappableExprsHandler::OMP_MAP_FROM)); 10106 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10107 llvm::BasicBlock *AllocElseBB = 10108 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10109 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10110 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10111 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10112 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10113 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10114 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10115 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10116 MapperCGF.EmitBlock(AllocBB); 10117 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10118 MemberMapType, 10119 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10120 MappableExprsHandler::OMP_MAP_FROM))); 10121 MapperCGF.Builder.CreateBr(EndBB); 10122 MapperCGF.EmitBlock(AllocElseBB); 10123 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10124 LeftToFrom, 10125 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10126 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10127 // In case of to, clear OMP_MAP_FROM. 10128 MapperCGF.EmitBlock(ToBB); 10129 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10130 MemberMapType, 10131 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10132 MapperCGF.Builder.CreateBr(EndBB); 10133 MapperCGF.EmitBlock(ToElseBB); 10134 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10135 LeftToFrom, 10136 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10137 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10138 // In case of from, clear OMP_MAP_TO. 10139 MapperCGF.EmitBlock(FromBB); 10140 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10141 MemberMapType, 10142 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10143 // In case of tofrom, do nothing. 10144 MapperCGF.EmitBlock(EndBB); 10145 LastBB = EndBB; 10146 llvm::PHINode *CurMapType = 10147 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10148 CurMapType->addIncoming(AllocMapType, AllocBB); 10149 CurMapType->addIncoming(ToMapType, ToBB); 10150 CurMapType->addIncoming(FromMapType, FromBB); 10151 CurMapType->addIncoming(MemberMapType, ToElseBB); 10152 10153 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10154 CurSizeArg, CurMapType, CurNameArg}; 10155 if (Info.Mappers[I]) { 10156 // Call the corresponding mapper function. 10157 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10158 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10159 assert(MapperFunc && "Expect a valid mapper function is available."); 10160 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10161 } else { 10162 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10163 // data structure. 10164 MapperCGF.EmitRuntimeCall( 10165 OMPBuilder.getOrCreateRuntimeFunction( 10166 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10167 OffloadingArgs); 10168 } 10169 } 10170 10171 // Update the pointer to point to the next element that needs to be mapped, 10172 // and check whether we have mapped all elements. 10173 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType(); 10174 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10175 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10176 PtrPHI->addIncoming(PtrNext, LastBB); 10177 llvm::Value *IsDone = 10178 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10179 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10180 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10181 10182 MapperCGF.EmitBlock(ExitBB); 10183 // Emit array deletion if this is an array section and \p MapType indicates 10184 // that deletion is required. 10185 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10186 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10187 10188 // Emit the function exit block. 10189 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10190 MapperCGF.FinishFunction(); 10191 UDMMap.try_emplace(D, Fn); 10192 if (CGF) { 10193 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10194 Decls.second.push_back(D); 10195 } 10196 } 10197 10198 /// Emit the array initialization or deletion portion for user-defined mapper 10199 /// code generation. First, it evaluates whether an array section is mapped and 10200 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10201 /// true, and \a MapType indicates to not delete this array, array 10202 /// initialization code is generated. If \a IsInit is false, and \a MapType 10203 /// indicates to not this array, array deletion code is generated. 10204 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10205 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10206 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10207 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10208 bool IsInit) { 10209 StringRef Prefix = IsInit ? ".init" : ".del"; 10210 10211 // Evaluate if this is an array section. 10212 llvm::BasicBlock *BodyBB = 10213 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10214 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10215 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10216 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10217 MapType, 10218 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10219 llvm::Value *DeleteCond; 10220 llvm::Value *Cond; 10221 if (IsInit) { 10222 // base != begin? 10223 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 10224 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 10225 // IsPtrAndObj? 10226 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10227 MapType, 10228 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10229 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10230 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10231 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10232 DeleteCond = MapperCGF.Builder.CreateIsNull( 10233 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10234 } else { 10235 Cond = IsArray; 10236 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10237 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10238 } 10239 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10240 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10241 10242 MapperCGF.EmitBlock(BodyBB); 10243 // Get the array size by multiplying element size and element number (i.e., \p 10244 // Size). 10245 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10246 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10247 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10248 // memory allocation/deletion purpose only. 10249 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10250 MapType, 10251 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10252 MappableExprsHandler::OMP_MAP_FROM))); 10253 MapTypeArg = MapperCGF.Builder.CreateOr( 10254 MapTypeArg, 10255 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10256 10257 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10258 // data structure. 10259 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10260 ArraySize, MapTypeArg, MapName}; 10261 MapperCGF.EmitRuntimeCall( 10262 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10263 OMPRTL___tgt_push_mapper_component), 10264 OffloadingArgs); 10265 } 10266 10267 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10268 const OMPDeclareMapperDecl *D) { 10269 auto I = UDMMap.find(D); 10270 if (I != UDMMap.end()) 10271 return I->second; 10272 emitUserDefinedMapper(D); 10273 return UDMMap.lookup(D); 10274 } 10275 10276 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10277 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10278 llvm::Value *DeviceID, 10279 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10280 const OMPLoopDirective &D)> 10281 SizeEmitter) { 10282 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10283 const OMPExecutableDirective *TD = &D; 10284 // Get nested teams distribute kind directive, if any. 10285 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10286 TD = getNestedDistributeDirective(CGM.getContext(), D); 10287 if (!TD) 10288 return; 10289 const auto *LD = cast<OMPLoopDirective>(TD); 10290 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10291 PrePostActionTy &) { 10292 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10293 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10294 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10295 CGF.EmitRuntimeCall( 10296 OMPBuilder.getOrCreateRuntimeFunction( 10297 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10298 Args); 10299 } 10300 }; 10301 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10302 } 10303 10304 void CGOpenMPRuntime::emitTargetCall( 10305 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10306 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10307 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10308 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10309 const OMPLoopDirective &D)> 10310 SizeEmitter) { 10311 if (!CGF.HaveInsertPoint()) 10312 return; 10313 10314 assert(OutlinedFn && "Invalid outlined function!"); 10315 10316 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10317 D.hasClausesOfKind<OMPNowaitClause>(); 10318 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10319 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10320 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10321 PrePostActionTy &) { 10322 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10323 }; 10324 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10325 10326 CodeGenFunction::OMPTargetDataInfo InputInfo; 10327 llvm::Value *MapTypesArray = nullptr; 10328 llvm::Value *MapNamesArray = nullptr; 10329 // Fill up the pointer arrays and transfer execution to the device. 10330 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10331 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10332 &CapturedVars, 10333 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10334 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10335 // Reverse offloading is not supported, so just execute on the host. 10336 if (RequiresOuterTask) { 10337 CapturedVars.clear(); 10338 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10339 } 10340 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10341 return; 10342 } 10343 10344 // On top of the arrays that were filled up, the target offloading call 10345 // takes as arguments the device id as well as the host pointer. The host 10346 // pointer is used by the runtime library to identify the current target 10347 // region, so it only has to be unique and not necessarily point to 10348 // anything. It could be the pointer to the outlined function that 10349 // implements the target region, but we aren't using that so that the 10350 // compiler doesn't need to keep that, and could therefore inline the host 10351 // function if proven worthwhile during optimization. 10352 10353 // From this point on, we need to have an ID of the target region defined. 10354 assert(OutlinedFnID && "Invalid outlined function ID!"); 10355 10356 // Emit device ID if any. 10357 llvm::Value *DeviceID; 10358 if (Device.getPointer()) { 10359 assert((Device.getInt() == OMPC_DEVICE_unknown || 10360 Device.getInt() == OMPC_DEVICE_device_num) && 10361 "Expected device_num modifier."); 10362 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10363 DeviceID = 10364 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10365 } else { 10366 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10367 } 10368 10369 // Emit the number of elements in the offloading arrays. 10370 llvm::Value *PointerNum = 10371 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10372 10373 // Return value of the runtime offloading call. 10374 llvm::Value *Return; 10375 10376 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10377 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10378 10379 // Source location for the ident struct 10380 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10381 10382 // Emit tripcount for the target loop-based directive. 10383 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10384 10385 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10386 // The target region is an outlined function launched by the runtime 10387 // via calls __tgt_target() or __tgt_target_teams(). 10388 // 10389 // __tgt_target() launches a target region with one team and one thread, 10390 // executing a serial region. This master thread may in turn launch 10391 // more threads within its team upon encountering a parallel region, 10392 // however, no additional teams can be launched on the device. 10393 // 10394 // __tgt_target_teams() launches a target region with one or more teams, 10395 // each with one or more threads. This call is required for target 10396 // constructs such as: 10397 // 'target teams' 10398 // 'target' / 'teams' 10399 // 'target teams distribute parallel for' 10400 // 'target parallel' 10401 // and so on. 10402 // 10403 // Note that on the host and CPU targets, the runtime implementation of 10404 // these calls simply call the outlined function without forking threads. 10405 // The outlined functions themselves have runtime calls to 10406 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10407 // the compiler in emitTeamsCall() and emitParallelCall(). 10408 // 10409 // In contrast, on the NVPTX target, the implementation of 10410 // __tgt_target_teams() launches a GPU kernel with the requested number 10411 // of teams and threads so no additional calls to the runtime are required. 10412 if (NumTeams) { 10413 // If we have NumTeams defined this means that we have an enclosed teams 10414 // region. Therefore we also expect to have NumThreads defined. These two 10415 // values should be defined in the presence of a teams directive, 10416 // regardless of having any clauses associated. If the user is using teams 10417 // but no clauses, these two values will be the default that should be 10418 // passed to the runtime library - a 32-bit integer with the value zero. 10419 assert(NumThreads && "Thread limit expression should be available along " 10420 "with number of teams."); 10421 SmallVector<llvm::Value *> OffloadingArgs = { 10422 RTLoc, 10423 DeviceID, 10424 OutlinedFnID, 10425 PointerNum, 10426 InputInfo.BasePointersArray.getPointer(), 10427 InputInfo.PointersArray.getPointer(), 10428 InputInfo.SizesArray.getPointer(), 10429 MapTypesArray, 10430 MapNamesArray, 10431 InputInfo.MappersArray.getPointer(), 10432 NumTeams, 10433 NumThreads}; 10434 if (HasNowait) { 10435 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10436 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10437 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10438 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10439 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10440 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10441 } 10442 Return = CGF.EmitRuntimeCall( 10443 OMPBuilder.getOrCreateRuntimeFunction( 10444 CGM.getModule(), HasNowait 10445 ? OMPRTL___tgt_target_teams_nowait_mapper 10446 : OMPRTL___tgt_target_teams_mapper), 10447 OffloadingArgs); 10448 } else { 10449 SmallVector<llvm::Value *> OffloadingArgs = { 10450 RTLoc, 10451 DeviceID, 10452 OutlinedFnID, 10453 PointerNum, 10454 InputInfo.BasePointersArray.getPointer(), 10455 InputInfo.PointersArray.getPointer(), 10456 InputInfo.SizesArray.getPointer(), 10457 MapTypesArray, 10458 MapNamesArray, 10459 InputInfo.MappersArray.getPointer()}; 10460 if (HasNowait) { 10461 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10462 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10463 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10464 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10465 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10466 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10467 } 10468 Return = CGF.EmitRuntimeCall( 10469 OMPBuilder.getOrCreateRuntimeFunction( 10470 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10471 : OMPRTL___tgt_target_mapper), 10472 OffloadingArgs); 10473 } 10474 10475 // Check the error code and execute the host version if required. 10476 llvm::BasicBlock *OffloadFailedBlock = 10477 CGF.createBasicBlock("omp_offload.failed"); 10478 llvm::BasicBlock *OffloadContBlock = 10479 CGF.createBasicBlock("omp_offload.cont"); 10480 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10481 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10482 10483 CGF.EmitBlock(OffloadFailedBlock); 10484 if (RequiresOuterTask) { 10485 CapturedVars.clear(); 10486 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10487 } 10488 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10489 CGF.EmitBranch(OffloadContBlock); 10490 10491 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10492 }; 10493 10494 // Notify that the host version must be executed. 10495 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10496 RequiresOuterTask](CodeGenFunction &CGF, 10497 PrePostActionTy &) { 10498 if (RequiresOuterTask) { 10499 CapturedVars.clear(); 10500 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10501 } 10502 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10503 }; 10504 10505 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10506 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10507 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10508 // Fill up the arrays with all the captured variables. 10509 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10510 10511 // Get mappable expression information. 10512 MappableExprsHandler MEHandler(D, CGF); 10513 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10514 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10515 10516 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10517 auto *CV = CapturedVars.begin(); 10518 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10519 CE = CS.capture_end(); 10520 CI != CE; ++CI, ++RI, ++CV) { 10521 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10522 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10523 10524 // VLA sizes are passed to the outlined region by copy and do not have map 10525 // information associated. 10526 if (CI->capturesVariableArrayType()) { 10527 CurInfo.Exprs.push_back(nullptr); 10528 CurInfo.BasePointers.push_back(*CV); 10529 CurInfo.Pointers.push_back(*CV); 10530 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10531 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10532 // Copy to the device as an argument. No need to retrieve it. 10533 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10534 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10535 MappableExprsHandler::OMP_MAP_IMPLICIT); 10536 CurInfo.Mappers.push_back(nullptr); 10537 } else { 10538 // If we have any information in the map clause, we use it, otherwise we 10539 // just do a default mapping. 10540 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10541 if (!CI->capturesThis()) 10542 MappedVarSet.insert(CI->getCapturedVar()); 10543 else 10544 MappedVarSet.insert(nullptr); 10545 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10546 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10547 // Generate correct mapping for variables captured by reference in 10548 // lambdas. 10549 if (CI->capturesVariable()) 10550 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10551 CurInfo, LambdaPointers); 10552 } 10553 // We expect to have at least an element of information for this capture. 10554 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10555 "Non-existing map pointer for capture!"); 10556 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10557 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10558 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10559 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10560 "Inconsistent map information sizes!"); 10561 10562 // If there is an entry in PartialStruct it means we have a struct with 10563 // individual members mapped. Emit an extra combined entry. 10564 if (PartialStruct.Base.isValid()) { 10565 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10566 MEHandler.emitCombinedEntry( 10567 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10568 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10569 } 10570 10571 // We need to append the results of this capture to what we already have. 10572 CombinedInfo.append(CurInfo); 10573 } 10574 // Adjust MEMBER_OF flags for the lambdas captures. 10575 MEHandler.adjustMemberOfForLambdaCaptures( 10576 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10577 CombinedInfo.Types); 10578 // Map any list items in a map clause that were not captures because they 10579 // weren't referenced within the construct. 10580 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10581 10582 TargetDataInfo Info; 10583 // Fill up the arrays and create the arguments. 10584 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10585 emitOffloadingArraysArgument( 10586 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10587 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10588 {/*ForEndTask=*/false}); 10589 10590 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10591 InputInfo.BasePointersArray = 10592 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10593 InputInfo.PointersArray = 10594 Address(Info.PointersArray, CGM.getPointerAlign()); 10595 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10596 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10597 MapTypesArray = Info.MapTypesArray; 10598 MapNamesArray = Info.MapNamesArray; 10599 if (RequiresOuterTask) 10600 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10601 else 10602 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10603 }; 10604 10605 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10606 CodeGenFunction &CGF, PrePostActionTy &) { 10607 if (RequiresOuterTask) { 10608 CodeGenFunction::OMPTargetDataInfo InputInfo; 10609 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10610 } else { 10611 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10612 } 10613 }; 10614 10615 // If we have a target function ID it means that we need to support 10616 // offloading, otherwise, just execute on the host. We need to execute on host 10617 // regardless of the conditional in the if clause if, e.g., the user do not 10618 // specify target triples. 10619 if (OutlinedFnID) { 10620 if (IfCond) { 10621 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10622 } else { 10623 RegionCodeGenTy ThenRCG(TargetThenGen); 10624 ThenRCG(CGF); 10625 } 10626 } else { 10627 RegionCodeGenTy ElseRCG(TargetElseGen); 10628 ElseRCG(CGF); 10629 } 10630 } 10631 10632 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10633 StringRef ParentName) { 10634 if (!S) 10635 return; 10636 10637 // Codegen OMP target directives that offload compute to the device. 10638 bool RequiresDeviceCodegen = 10639 isa<OMPExecutableDirective>(S) && 10640 isOpenMPTargetExecutionDirective( 10641 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10642 10643 if (RequiresDeviceCodegen) { 10644 const auto &E = *cast<OMPExecutableDirective>(S); 10645 unsigned DeviceID; 10646 unsigned FileID; 10647 unsigned Line; 10648 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10649 FileID, Line); 10650 10651 // Is this a target region that should not be emitted as an entry point? If 10652 // so just signal we are done with this target region. 10653 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10654 ParentName, Line)) 10655 return; 10656 10657 switch (E.getDirectiveKind()) { 10658 case OMPD_target: 10659 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10660 cast<OMPTargetDirective>(E)); 10661 break; 10662 case OMPD_target_parallel: 10663 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10664 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10665 break; 10666 case OMPD_target_teams: 10667 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10668 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10669 break; 10670 case OMPD_target_teams_distribute: 10671 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10672 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10673 break; 10674 case OMPD_target_teams_distribute_simd: 10675 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10676 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10677 break; 10678 case OMPD_target_parallel_for: 10679 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10680 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10681 break; 10682 case OMPD_target_parallel_for_simd: 10683 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10684 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10685 break; 10686 case OMPD_target_simd: 10687 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10688 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10689 break; 10690 case OMPD_target_teams_distribute_parallel_for: 10691 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10692 CGM, ParentName, 10693 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10694 break; 10695 case OMPD_target_teams_distribute_parallel_for_simd: 10696 CodeGenFunction:: 10697 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10698 CGM, ParentName, 10699 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10700 break; 10701 case OMPD_parallel: 10702 case OMPD_for: 10703 case OMPD_parallel_for: 10704 case OMPD_parallel_master: 10705 case OMPD_parallel_sections: 10706 case OMPD_for_simd: 10707 case OMPD_parallel_for_simd: 10708 case OMPD_cancel: 10709 case OMPD_cancellation_point: 10710 case OMPD_ordered: 10711 case OMPD_threadprivate: 10712 case OMPD_allocate: 10713 case OMPD_task: 10714 case OMPD_simd: 10715 case OMPD_tile: 10716 case OMPD_unroll: 10717 case OMPD_sections: 10718 case OMPD_section: 10719 case OMPD_single: 10720 case OMPD_master: 10721 case OMPD_critical: 10722 case OMPD_taskyield: 10723 case OMPD_barrier: 10724 case OMPD_taskwait: 10725 case OMPD_taskgroup: 10726 case OMPD_atomic: 10727 case OMPD_flush: 10728 case OMPD_depobj: 10729 case OMPD_scan: 10730 case OMPD_teams: 10731 case OMPD_target_data: 10732 case OMPD_target_exit_data: 10733 case OMPD_target_enter_data: 10734 case OMPD_distribute: 10735 case OMPD_distribute_simd: 10736 case OMPD_distribute_parallel_for: 10737 case OMPD_distribute_parallel_for_simd: 10738 case OMPD_teams_distribute: 10739 case OMPD_teams_distribute_simd: 10740 case OMPD_teams_distribute_parallel_for: 10741 case OMPD_teams_distribute_parallel_for_simd: 10742 case OMPD_target_update: 10743 case OMPD_declare_simd: 10744 case OMPD_declare_variant: 10745 case OMPD_begin_declare_variant: 10746 case OMPD_end_declare_variant: 10747 case OMPD_declare_target: 10748 case OMPD_end_declare_target: 10749 case OMPD_declare_reduction: 10750 case OMPD_declare_mapper: 10751 case OMPD_taskloop: 10752 case OMPD_taskloop_simd: 10753 case OMPD_master_taskloop: 10754 case OMPD_master_taskloop_simd: 10755 case OMPD_parallel_master_taskloop: 10756 case OMPD_parallel_master_taskloop_simd: 10757 case OMPD_requires: 10758 case OMPD_metadirective: 10759 case OMPD_unknown: 10760 default: 10761 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10762 } 10763 return; 10764 } 10765 10766 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10767 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10768 return; 10769 10770 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10771 return; 10772 } 10773 10774 // If this is a lambda function, look into its body. 10775 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10776 S = L->getBody(); 10777 10778 // Keep looking for target regions recursively. 10779 for (const Stmt *II : S->children()) 10780 scanForTargetRegionsFunctions(II, ParentName); 10781 } 10782 10783 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10784 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10785 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10786 if (!DevTy) 10787 return false; 10788 // Do not emit device_type(nohost) functions for the host. 10789 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10790 return true; 10791 // Do not emit device_type(host) functions for the device. 10792 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10793 return true; 10794 return false; 10795 } 10796 10797 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10798 // If emitting code for the host, we do not process FD here. Instead we do 10799 // the normal code generation. 10800 if (!CGM.getLangOpts().OpenMPIsDevice) { 10801 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10802 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10803 CGM.getLangOpts().OpenMPIsDevice)) 10804 return true; 10805 return false; 10806 } 10807 10808 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10809 // Try to detect target regions in the function. 10810 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10811 StringRef Name = CGM.getMangledName(GD); 10812 scanForTargetRegionsFunctions(FD->getBody(), Name); 10813 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10814 CGM.getLangOpts().OpenMPIsDevice)) 10815 return true; 10816 } 10817 10818 // Do not to emit function if it is not marked as declare target. 10819 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10820 AlreadyEmittedTargetDecls.count(VD) == 0; 10821 } 10822 10823 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10824 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10825 CGM.getLangOpts().OpenMPIsDevice)) 10826 return true; 10827 10828 if (!CGM.getLangOpts().OpenMPIsDevice) 10829 return false; 10830 10831 // Check if there are Ctors/Dtors in this declaration and look for target 10832 // regions in it. We use the complete variant to produce the kernel name 10833 // mangling. 10834 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10835 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10836 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10837 StringRef ParentName = 10838 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10839 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10840 } 10841 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10842 StringRef ParentName = 10843 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10844 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10845 } 10846 } 10847 10848 // Do not to emit variable if it is not marked as declare target. 10849 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10850 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10851 cast<VarDecl>(GD.getDecl())); 10852 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10853 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10854 HasRequiresUnifiedSharedMemory)) { 10855 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10856 return true; 10857 } 10858 return false; 10859 } 10860 10861 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10862 llvm::Constant *Addr) { 10863 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10864 !CGM.getLangOpts().OpenMPIsDevice) 10865 return; 10866 10867 // If we have host/nohost variables, they do not need to be registered. 10868 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10869 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10870 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10871 return; 10872 10873 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10874 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10875 if (!Res) { 10876 if (CGM.getLangOpts().OpenMPIsDevice) { 10877 // Register non-target variables being emitted in device code (debug info 10878 // may cause this). 10879 StringRef VarName = CGM.getMangledName(VD); 10880 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10881 } 10882 return; 10883 } 10884 // Register declare target variables. 10885 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10886 StringRef VarName; 10887 CharUnits VarSize; 10888 llvm::GlobalValue::LinkageTypes Linkage; 10889 10890 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10891 !HasRequiresUnifiedSharedMemory) { 10892 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10893 VarName = CGM.getMangledName(VD); 10894 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10895 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10896 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10897 } else { 10898 VarSize = CharUnits::Zero(); 10899 } 10900 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10901 // Temp solution to prevent optimizations of the internal variables. 10902 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10903 // Do not create a "ref-variable" if the original is not also available 10904 // on the host. 10905 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10906 return; 10907 std::string RefName = getName({VarName, "ref"}); 10908 if (!CGM.GetGlobalValue(RefName)) { 10909 llvm::Constant *AddrRef = 10910 getOrCreateInternalVariable(Addr->getType(), RefName); 10911 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10912 GVAddrRef->setConstant(/*Val=*/true); 10913 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10914 GVAddrRef->setInitializer(Addr); 10915 CGM.addCompilerUsedGlobal(GVAddrRef); 10916 } 10917 } 10918 } else { 10919 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10920 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10921 HasRequiresUnifiedSharedMemory)) && 10922 "Declare target attribute must link or to with unified memory."); 10923 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10924 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10925 else 10926 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10927 10928 if (CGM.getLangOpts().OpenMPIsDevice) { 10929 VarName = Addr->getName(); 10930 Addr = nullptr; 10931 } else { 10932 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10933 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10934 } 10935 VarSize = CGM.getPointerSize(); 10936 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10937 } 10938 10939 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10940 VarName, Addr, VarSize, Flags, Linkage); 10941 } 10942 10943 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10944 if (isa<FunctionDecl>(GD.getDecl()) || 10945 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10946 return emitTargetFunctions(GD); 10947 10948 return emitTargetGlobalVariable(GD); 10949 } 10950 10951 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10952 for (const VarDecl *VD : DeferredGlobalVariables) { 10953 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10954 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10955 if (!Res) 10956 continue; 10957 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10958 !HasRequiresUnifiedSharedMemory) { 10959 CGM.EmitGlobal(VD); 10960 } else { 10961 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10962 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10963 HasRequiresUnifiedSharedMemory)) && 10964 "Expected link clause or to clause with unified memory."); 10965 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10966 } 10967 } 10968 } 10969 10970 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10971 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10972 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10973 " Expected target-based directive."); 10974 } 10975 10976 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10977 for (const OMPClause *Clause : D->clauselists()) { 10978 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10979 HasRequiresUnifiedSharedMemory = true; 10980 } else if (const auto *AC = 10981 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10982 switch (AC->getAtomicDefaultMemOrderKind()) { 10983 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10984 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10985 break; 10986 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10987 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10988 break; 10989 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10990 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10991 break; 10992 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10993 break; 10994 } 10995 } 10996 } 10997 } 10998 10999 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 11000 return RequiresAtomicOrdering; 11001 } 11002 11003 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11004 LangAS &AS) { 11005 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11006 return false; 11007 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11008 switch(A->getAllocatorType()) { 11009 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11010 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11011 // Not supported, fallback to the default mem space. 11012 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11013 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11014 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11015 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11016 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11017 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11018 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11019 AS = LangAS::Default; 11020 return true; 11021 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11022 llvm_unreachable("Expected predefined allocator for the variables with the " 11023 "static storage."); 11024 } 11025 return false; 11026 } 11027 11028 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11029 return HasRequiresUnifiedSharedMemory; 11030 } 11031 11032 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11033 CodeGenModule &CGM) 11034 : CGM(CGM) { 11035 if (CGM.getLangOpts().OpenMPIsDevice) { 11036 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11037 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11038 } 11039 } 11040 11041 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11042 if (CGM.getLangOpts().OpenMPIsDevice) 11043 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11044 } 11045 11046 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11047 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11048 return true; 11049 11050 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11051 // Do not to emit function if it is marked as declare target as it was already 11052 // emitted. 11053 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11054 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11055 if (auto *F = dyn_cast_or_null<llvm::Function>( 11056 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11057 return !F->isDeclaration(); 11058 return false; 11059 } 11060 return true; 11061 } 11062 11063 return !AlreadyEmittedTargetDecls.insert(D).second; 11064 } 11065 11066 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11067 // If we don't have entries or if we are emitting code for the device, we 11068 // don't need to do anything. 11069 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11070 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11071 (OffloadEntriesInfoManager.empty() && 11072 !HasEmittedDeclareTargetRegion && 11073 !HasEmittedTargetRegion)) 11074 return nullptr; 11075 11076 // Create and register the function that handles the requires directives. 11077 ASTContext &C = CGM.getContext(); 11078 11079 llvm::Function *RequiresRegFn; 11080 { 11081 CodeGenFunction CGF(CGM); 11082 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11083 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11084 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11085 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11086 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11087 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11088 // TODO: check for other requires clauses. 11089 // The requires directive takes effect only when a target region is 11090 // present in the compilation unit. Otherwise it is ignored and not 11091 // passed to the runtime. This avoids the runtime from throwing an error 11092 // for mismatching requires clauses across compilation units that don't 11093 // contain at least 1 target region. 11094 assert((HasEmittedTargetRegion || 11095 HasEmittedDeclareTargetRegion || 11096 !OffloadEntriesInfoManager.empty()) && 11097 "Target or declare target region expected."); 11098 if (HasRequiresUnifiedSharedMemory) 11099 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11100 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11101 CGM.getModule(), OMPRTL___tgt_register_requires), 11102 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11103 CGF.FinishFunction(); 11104 } 11105 return RequiresRegFn; 11106 } 11107 11108 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11109 const OMPExecutableDirective &D, 11110 SourceLocation Loc, 11111 llvm::Function *OutlinedFn, 11112 ArrayRef<llvm::Value *> CapturedVars) { 11113 if (!CGF.HaveInsertPoint()) 11114 return; 11115 11116 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11117 CodeGenFunction::RunCleanupsScope Scope(CGF); 11118 11119 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11120 llvm::Value *Args[] = { 11121 RTLoc, 11122 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11123 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11124 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11125 RealArgs.append(std::begin(Args), std::end(Args)); 11126 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11127 11128 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11129 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11130 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11131 } 11132 11133 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11134 const Expr *NumTeams, 11135 const Expr *ThreadLimit, 11136 SourceLocation Loc) { 11137 if (!CGF.HaveInsertPoint()) 11138 return; 11139 11140 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11141 11142 llvm::Value *NumTeamsVal = 11143 NumTeams 11144 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11145 CGF.CGM.Int32Ty, /* isSigned = */ true) 11146 : CGF.Builder.getInt32(0); 11147 11148 llvm::Value *ThreadLimitVal = 11149 ThreadLimit 11150 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11151 CGF.CGM.Int32Ty, /* isSigned = */ true) 11152 : CGF.Builder.getInt32(0); 11153 11154 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11155 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11156 ThreadLimitVal}; 11157 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11158 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11159 PushNumTeamsArgs); 11160 } 11161 11162 void CGOpenMPRuntime::emitTargetDataCalls( 11163 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11164 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11165 if (!CGF.HaveInsertPoint()) 11166 return; 11167 11168 // Action used to replace the default codegen action and turn privatization 11169 // off. 11170 PrePostActionTy NoPrivAction; 11171 11172 // Generate the code for the opening of the data environment. Capture all the 11173 // arguments of the runtime call by reference because they are used in the 11174 // closing of the region. 11175 auto &&BeginThenGen = [this, &D, Device, &Info, 11176 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11177 // Fill up the arrays with all the mapped variables. 11178 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11179 11180 // Get map clause information. 11181 MappableExprsHandler MEHandler(D, CGF); 11182 MEHandler.generateAllInfo(CombinedInfo); 11183 11184 // Fill up the arrays and create the arguments. 11185 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11186 /*IsNonContiguous=*/true); 11187 11188 llvm::Value *BasePointersArrayArg = nullptr; 11189 llvm::Value *PointersArrayArg = nullptr; 11190 llvm::Value *SizesArrayArg = nullptr; 11191 llvm::Value *MapTypesArrayArg = nullptr; 11192 llvm::Value *MapNamesArrayArg = nullptr; 11193 llvm::Value *MappersArrayArg = nullptr; 11194 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11195 SizesArrayArg, MapTypesArrayArg, 11196 MapNamesArrayArg, MappersArrayArg, Info); 11197 11198 // Emit device ID if any. 11199 llvm::Value *DeviceID = nullptr; 11200 if (Device) { 11201 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11202 CGF.Int64Ty, /*isSigned=*/true); 11203 } else { 11204 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11205 } 11206 11207 // Emit the number of elements in the offloading arrays. 11208 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11209 // 11210 // Source location for the ident struct 11211 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11212 11213 llvm::Value *OffloadingArgs[] = {RTLoc, 11214 DeviceID, 11215 PointerNum, 11216 BasePointersArrayArg, 11217 PointersArrayArg, 11218 SizesArrayArg, 11219 MapTypesArrayArg, 11220 MapNamesArrayArg, 11221 MappersArrayArg}; 11222 CGF.EmitRuntimeCall( 11223 OMPBuilder.getOrCreateRuntimeFunction( 11224 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11225 OffloadingArgs); 11226 11227 // If device pointer privatization is required, emit the body of the region 11228 // here. It will have to be duplicated: with and without privatization. 11229 if (!Info.CaptureDeviceAddrMap.empty()) 11230 CodeGen(CGF); 11231 }; 11232 11233 // Generate code for the closing of the data region. 11234 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11235 PrePostActionTy &) { 11236 assert(Info.isValid() && "Invalid data environment closing arguments."); 11237 11238 llvm::Value *BasePointersArrayArg = nullptr; 11239 llvm::Value *PointersArrayArg = nullptr; 11240 llvm::Value *SizesArrayArg = nullptr; 11241 llvm::Value *MapTypesArrayArg = nullptr; 11242 llvm::Value *MapNamesArrayArg = nullptr; 11243 llvm::Value *MappersArrayArg = nullptr; 11244 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11245 SizesArrayArg, MapTypesArrayArg, 11246 MapNamesArrayArg, MappersArrayArg, Info, 11247 {/*ForEndCall=*/true}); 11248 11249 // Emit device ID if any. 11250 llvm::Value *DeviceID = nullptr; 11251 if (Device) { 11252 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11253 CGF.Int64Ty, /*isSigned=*/true); 11254 } else { 11255 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11256 } 11257 11258 // Emit the number of elements in the offloading arrays. 11259 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11260 11261 // Source location for the ident struct 11262 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11263 11264 llvm::Value *OffloadingArgs[] = {RTLoc, 11265 DeviceID, 11266 PointerNum, 11267 BasePointersArrayArg, 11268 PointersArrayArg, 11269 SizesArrayArg, 11270 MapTypesArrayArg, 11271 MapNamesArrayArg, 11272 MappersArrayArg}; 11273 CGF.EmitRuntimeCall( 11274 OMPBuilder.getOrCreateRuntimeFunction( 11275 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11276 OffloadingArgs); 11277 }; 11278 11279 // If we need device pointer privatization, we need to emit the body of the 11280 // region with no privatization in the 'else' branch of the conditional. 11281 // Otherwise, we don't have to do anything. 11282 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11283 PrePostActionTy &) { 11284 if (!Info.CaptureDeviceAddrMap.empty()) { 11285 CodeGen.setAction(NoPrivAction); 11286 CodeGen(CGF); 11287 } 11288 }; 11289 11290 // We don't have to do anything to close the region if the if clause evaluates 11291 // to false. 11292 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11293 11294 if (IfCond) { 11295 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11296 } else { 11297 RegionCodeGenTy RCG(BeginThenGen); 11298 RCG(CGF); 11299 } 11300 11301 // If we don't require privatization of device pointers, we emit the body in 11302 // between the runtime calls. This avoids duplicating the body code. 11303 if (Info.CaptureDeviceAddrMap.empty()) { 11304 CodeGen.setAction(NoPrivAction); 11305 CodeGen(CGF); 11306 } 11307 11308 if (IfCond) { 11309 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11310 } else { 11311 RegionCodeGenTy RCG(EndThenGen); 11312 RCG(CGF); 11313 } 11314 } 11315 11316 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11317 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11318 const Expr *Device) { 11319 if (!CGF.HaveInsertPoint()) 11320 return; 11321 11322 assert((isa<OMPTargetEnterDataDirective>(D) || 11323 isa<OMPTargetExitDataDirective>(D) || 11324 isa<OMPTargetUpdateDirective>(D)) && 11325 "Expecting either target enter, exit data, or update directives."); 11326 11327 CodeGenFunction::OMPTargetDataInfo InputInfo; 11328 llvm::Value *MapTypesArray = nullptr; 11329 llvm::Value *MapNamesArray = nullptr; 11330 // Generate the code for the opening of the data environment. 11331 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11332 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11333 // Emit device ID if any. 11334 llvm::Value *DeviceID = nullptr; 11335 if (Device) { 11336 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11337 CGF.Int64Ty, /*isSigned=*/true); 11338 } else { 11339 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11340 } 11341 11342 // Emit the number of elements in the offloading arrays. 11343 llvm::Constant *PointerNum = 11344 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11345 11346 // Source location for the ident struct 11347 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11348 11349 llvm::Value *OffloadingArgs[] = {RTLoc, 11350 DeviceID, 11351 PointerNum, 11352 InputInfo.BasePointersArray.getPointer(), 11353 InputInfo.PointersArray.getPointer(), 11354 InputInfo.SizesArray.getPointer(), 11355 MapTypesArray, 11356 MapNamesArray, 11357 InputInfo.MappersArray.getPointer()}; 11358 11359 // Select the right runtime function call for each standalone 11360 // directive. 11361 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11362 RuntimeFunction RTLFn; 11363 switch (D.getDirectiveKind()) { 11364 case OMPD_target_enter_data: 11365 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11366 : OMPRTL___tgt_target_data_begin_mapper; 11367 break; 11368 case OMPD_target_exit_data: 11369 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11370 : OMPRTL___tgt_target_data_end_mapper; 11371 break; 11372 case OMPD_target_update: 11373 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11374 : OMPRTL___tgt_target_data_update_mapper; 11375 break; 11376 case OMPD_parallel: 11377 case OMPD_for: 11378 case OMPD_parallel_for: 11379 case OMPD_parallel_master: 11380 case OMPD_parallel_sections: 11381 case OMPD_for_simd: 11382 case OMPD_parallel_for_simd: 11383 case OMPD_cancel: 11384 case OMPD_cancellation_point: 11385 case OMPD_ordered: 11386 case OMPD_threadprivate: 11387 case OMPD_allocate: 11388 case OMPD_task: 11389 case OMPD_simd: 11390 case OMPD_tile: 11391 case OMPD_unroll: 11392 case OMPD_sections: 11393 case OMPD_section: 11394 case OMPD_single: 11395 case OMPD_master: 11396 case OMPD_critical: 11397 case OMPD_taskyield: 11398 case OMPD_barrier: 11399 case OMPD_taskwait: 11400 case OMPD_taskgroup: 11401 case OMPD_atomic: 11402 case OMPD_flush: 11403 case OMPD_depobj: 11404 case OMPD_scan: 11405 case OMPD_teams: 11406 case OMPD_target_data: 11407 case OMPD_distribute: 11408 case OMPD_distribute_simd: 11409 case OMPD_distribute_parallel_for: 11410 case OMPD_distribute_parallel_for_simd: 11411 case OMPD_teams_distribute: 11412 case OMPD_teams_distribute_simd: 11413 case OMPD_teams_distribute_parallel_for: 11414 case OMPD_teams_distribute_parallel_for_simd: 11415 case OMPD_declare_simd: 11416 case OMPD_declare_variant: 11417 case OMPD_begin_declare_variant: 11418 case OMPD_end_declare_variant: 11419 case OMPD_declare_target: 11420 case OMPD_end_declare_target: 11421 case OMPD_declare_reduction: 11422 case OMPD_declare_mapper: 11423 case OMPD_taskloop: 11424 case OMPD_taskloop_simd: 11425 case OMPD_master_taskloop: 11426 case OMPD_master_taskloop_simd: 11427 case OMPD_parallel_master_taskloop: 11428 case OMPD_parallel_master_taskloop_simd: 11429 case OMPD_target: 11430 case OMPD_target_simd: 11431 case OMPD_target_teams_distribute: 11432 case OMPD_target_teams_distribute_simd: 11433 case OMPD_target_teams_distribute_parallel_for: 11434 case OMPD_target_teams_distribute_parallel_for_simd: 11435 case OMPD_target_teams: 11436 case OMPD_target_parallel: 11437 case OMPD_target_parallel_for: 11438 case OMPD_target_parallel_for_simd: 11439 case OMPD_requires: 11440 case OMPD_metadirective: 11441 case OMPD_unknown: 11442 default: 11443 llvm_unreachable("Unexpected standalone target data directive."); 11444 break; 11445 } 11446 CGF.EmitRuntimeCall( 11447 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11448 OffloadingArgs); 11449 }; 11450 11451 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11452 &MapNamesArray](CodeGenFunction &CGF, 11453 PrePostActionTy &) { 11454 // Fill up the arrays with all the mapped variables. 11455 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11456 11457 // Get map clause information. 11458 MappableExprsHandler MEHandler(D, CGF); 11459 MEHandler.generateAllInfo(CombinedInfo); 11460 11461 TargetDataInfo Info; 11462 // Fill up the arrays and create the arguments. 11463 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11464 /*IsNonContiguous=*/true); 11465 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11466 D.hasClausesOfKind<OMPNowaitClause>(); 11467 emitOffloadingArraysArgument( 11468 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11469 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11470 {/*ForEndTask=*/false}); 11471 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11472 InputInfo.BasePointersArray = 11473 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11474 InputInfo.PointersArray = 11475 Address(Info.PointersArray, CGM.getPointerAlign()); 11476 InputInfo.SizesArray = 11477 Address(Info.SizesArray, CGM.getPointerAlign()); 11478 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11479 MapTypesArray = Info.MapTypesArray; 11480 MapNamesArray = Info.MapNamesArray; 11481 if (RequiresOuterTask) 11482 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11483 else 11484 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11485 }; 11486 11487 if (IfCond) { 11488 emitIfClause(CGF, IfCond, TargetThenGen, 11489 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11490 } else { 11491 RegionCodeGenTy ThenRCG(TargetThenGen); 11492 ThenRCG(CGF); 11493 } 11494 } 11495 11496 namespace { 11497 /// Kind of parameter in a function with 'declare simd' directive. 11498 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11499 /// Attribute set of the parameter. 11500 struct ParamAttrTy { 11501 ParamKindTy Kind = Vector; 11502 llvm::APSInt StrideOrArg; 11503 llvm::APSInt Alignment; 11504 }; 11505 } // namespace 11506 11507 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11508 ArrayRef<ParamAttrTy> ParamAttrs) { 11509 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11510 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11511 // of that clause. The VLEN value must be power of 2. 11512 // In other case the notion of the function`s "characteristic data type" (CDT) 11513 // is used to compute the vector length. 11514 // CDT is defined in the following order: 11515 // a) For non-void function, the CDT is the return type. 11516 // b) If the function has any non-uniform, non-linear parameters, then the 11517 // CDT is the type of the first such parameter. 11518 // c) If the CDT determined by a) or b) above is struct, union, or class 11519 // type which is pass-by-value (except for the type that maps to the 11520 // built-in complex data type), the characteristic data type is int. 11521 // d) If none of the above three cases is applicable, the CDT is int. 11522 // The VLEN is then determined based on the CDT and the size of vector 11523 // register of that ISA for which current vector version is generated. The 11524 // VLEN is computed using the formula below: 11525 // VLEN = sizeof(vector_register) / sizeof(CDT), 11526 // where vector register size specified in section 3.2.1 Registers and the 11527 // Stack Frame of original AMD64 ABI document. 11528 QualType RetType = FD->getReturnType(); 11529 if (RetType.isNull()) 11530 return 0; 11531 ASTContext &C = FD->getASTContext(); 11532 QualType CDT; 11533 if (!RetType.isNull() && !RetType->isVoidType()) { 11534 CDT = RetType; 11535 } else { 11536 unsigned Offset = 0; 11537 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11538 if (ParamAttrs[Offset].Kind == Vector) 11539 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11540 ++Offset; 11541 } 11542 if (CDT.isNull()) { 11543 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11544 if (ParamAttrs[I + Offset].Kind == Vector) { 11545 CDT = FD->getParamDecl(I)->getType(); 11546 break; 11547 } 11548 } 11549 } 11550 } 11551 if (CDT.isNull()) 11552 CDT = C.IntTy; 11553 CDT = CDT->getCanonicalTypeUnqualified(); 11554 if (CDT->isRecordType() || CDT->isUnionType()) 11555 CDT = C.IntTy; 11556 return C.getTypeSize(CDT); 11557 } 11558 11559 static void 11560 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11561 const llvm::APSInt &VLENVal, 11562 ArrayRef<ParamAttrTy> ParamAttrs, 11563 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11564 struct ISADataTy { 11565 char ISA; 11566 unsigned VecRegSize; 11567 }; 11568 ISADataTy ISAData[] = { 11569 { 11570 'b', 128 11571 }, // SSE 11572 { 11573 'c', 256 11574 }, // AVX 11575 { 11576 'd', 256 11577 }, // AVX2 11578 { 11579 'e', 512 11580 }, // AVX512 11581 }; 11582 llvm::SmallVector<char, 2> Masked; 11583 switch (State) { 11584 case OMPDeclareSimdDeclAttr::BS_Undefined: 11585 Masked.push_back('N'); 11586 Masked.push_back('M'); 11587 break; 11588 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11589 Masked.push_back('N'); 11590 break; 11591 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11592 Masked.push_back('M'); 11593 break; 11594 } 11595 for (char Mask : Masked) { 11596 for (const ISADataTy &Data : ISAData) { 11597 SmallString<256> Buffer; 11598 llvm::raw_svector_ostream Out(Buffer); 11599 Out << "_ZGV" << Data.ISA << Mask; 11600 if (!VLENVal) { 11601 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11602 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11603 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11604 } else { 11605 Out << VLENVal; 11606 } 11607 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11608 switch (ParamAttr.Kind){ 11609 case LinearWithVarStride: 11610 Out << 's' << ParamAttr.StrideOrArg; 11611 break; 11612 case Linear: 11613 Out << 'l'; 11614 if (ParamAttr.StrideOrArg != 1) 11615 Out << ParamAttr.StrideOrArg; 11616 break; 11617 case Uniform: 11618 Out << 'u'; 11619 break; 11620 case Vector: 11621 Out << 'v'; 11622 break; 11623 } 11624 if (!!ParamAttr.Alignment) 11625 Out << 'a' << ParamAttr.Alignment; 11626 } 11627 Out << '_' << Fn->getName(); 11628 Fn->addFnAttr(Out.str()); 11629 } 11630 } 11631 } 11632 11633 // This are the Functions that are needed to mangle the name of the 11634 // vector functions generated by the compiler, according to the rules 11635 // defined in the "Vector Function ABI specifications for AArch64", 11636 // available at 11637 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11638 11639 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11640 /// 11641 /// TODO: Need to implement the behavior for reference marked with a 11642 /// var or no linear modifiers (1.b in the section). For this, we 11643 /// need to extend ParamKindTy to support the linear modifiers. 11644 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11645 QT = QT.getCanonicalType(); 11646 11647 if (QT->isVoidType()) 11648 return false; 11649 11650 if (Kind == ParamKindTy::Uniform) 11651 return false; 11652 11653 if (Kind == ParamKindTy::Linear) 11654 return false; 11655 11656 // TODO: Handle linear references with modifiers 11657 11658 if (Kind == ParamKindTy::LinearWithVarStride) 11659 return false; 11660 11661 return true; 11662 } 11663 11664 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11665 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11666 QT = QT.getCanonicalType(); 11667 unsigned Size = C.getTypeSize(QT); 11668 11669 // Only scalars and complex within 16 bytes wide set PVB to true. 11670 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11671 return false; 11672 11673 if (QT->isFloatingType()) 11674 return true; 11675 11676 if (QT->isIntegerType()) 11677 return true; 11678 11679 if (QT->isPointerType()) 11680 return true; 11681 11682 // TODO: Add support for complex types (section 3.1.2, item 2). 11683 11684 return false; 11685 } 11686 11687 /// Computes the lane size (LS) of a return type or of an input parameter, 11688 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11689 /// TODO: Add support for references, section 3.2.1, item 1. 11690 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11691 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11692 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11693 if (getAArch64PBV(PTy, C)) 11694 return C.getTypeSize(PTy); 11695 } 11696 if (getAArch64PBV(QT, C)) 11697 return C.getTypeSize(QT); 11698 11699 return C.getTypeSize(C.getUIntPtrType()); 11700 } 11701 11702 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11703 // signature of the scalar function, as defined in 3.2.2 of the 11704 // AAVFABI. 11705 static std::tuple<unsigned, unsigned, bool> 11706 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11707 QualType RetType = FD->getReturnType().getCanonicalType(); 11708 11709 ASTContext &C = FD->getASTContext(); 11710 11711 bool OutputBecomesInput = false; 11712 11713 llvm::SmallVector<unsigned, 8> Sizes; 11714 if (!RetType->isVoidType()) { 11715 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11716 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11717 OutputBecomesInput = true; 11718 } 11719 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11720 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11721 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11722 } 11723 11724 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11725 // The LS of a function parameter / return value can only be a power 11726 // of 2, starting from 8 bits, up to 128. 11727 assert(std::all_of(Sizes.begin(), Sizes.end(), 11728 [](unsigned Size) { 11729 return Size == 8 || Size == 16 || Size == 32 || 11730 Size == 64 || Size == 128; 11731 }) && 11732 "Invalid size"); 11733 11734 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11735 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11736 OutputBecomesInput); 11737 } 11738 11739 /// Mangle the parameter part of the vector function name according to 11740 /// their OpenMP classification. The mangling function is defined in 11741 /// section 3.5 of the AAVFABI. 11742 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11743 SmallString<256> Buffer; 11744 llvm::raw_svector_ostream Out(Buffer); 11745 for (const auto &ParamAttr : ParamAttrs) { 11746 switch (ParamAttr.Kind) { 11747 case LinearWithVarStride: 11748 Out << "ls" << ParamAttr.StrideOrArg; 11749 break; 11750 case Linear: 11751 Out << 'l'; 11752 // Don't print the step value if it is not present or if it is 11753 // equal to 1. 11754 if (ParamAttr.StrideOrArg != 1) 11755 Out << ParamAttr.StrideOrArg; 11756 break; 11757 case Uniform: 11758 Out << 'u'; 11759 break; 11760 case Vector: 11761 Out << 'v'; 11762 break; 11763 } 11764 11765 if (!!ParamAttr.Alignment) 11766 Out << 'a' << ParamAttr.Alignment; 11767 } 11768 11769 return std::string(Out.str()); 11770 } 11771 11772 // Function used to add the attribute. The parameter `VLEN` is 11773 // templated to allow the use of "x" when targeting scalable functions 11774 // for SVE. 11775 template <typename T> 11776 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11777 char ISA, StringRef ParSeq, 11778 StringRef MangledName, bool OutputBecomesInput, 11779 llvm::Function *Fn) { 11780 SmallString<256> Buffer; 11781 llvm::raw_svector_ostream Out(Buffer); 11782 Out << Prefix << ISA << LMask << VLEN; 11783 if (OutputBecomesInput) 11784 Out << "v"; 11785 Out << ParSeq << "_" << MangledName; 11786 Fn->addFnAttr(Out.str()); 11787 } 11788 11789 // Helper function to generate the Advanced SIMD names depending on 11790 // the value of the NDS when simdlen is not present. 11791 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11792 StringRef Prefix, char ISA, 11793 StringRef ParSeq, StringRef MangledName, 11794 bool OutputBecomesInput, 11795 llvm::Function *Fn) { 11796 switch (NDS) { 11797 case 8: 11798 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11799 OutputBecomesInput, Fn); 11800 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11801 OutputBecomesInput, Fn); 11802 break; 11803 case 16: 11804 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11805 OutputBecomesInput, Fn); 11806 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11807 OutputBecomesInput, Fn); 11808 break; 11809 case 32: 11810 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11811 OutputBecomesInput, Fn); 11812 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11813 OutputBecomesInput, Fn); 11814 break; 11815 case 64: 11816 case 128: 11817 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11818 OutputBecomesInput, Fn); 11819 break; 11820 default: 11821 llvm_unreachable("Scalar type is too wide."); 11822 } 11823 } 11824 11825 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11826 static void emitAArch64DeclareSimdFunction( 11827 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11828 ArrayRef<ParamAttrTy> ParamAttrs, 11829 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11830 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11831 11832 // Get basic data for building the vector signature. 11833 const auto Data = getNDSWDS(FD, ParamAttrs); 11834 const unsigned NDS = std::get<0>(Data); 11835 const unsigned WDS = std::get<1>(Data); 11836 const bool OutputBecomesInput = std::get<2>(Data); 11837 11838 // Check the values provided via `simdlen` by the user. 11839 // 1. A `simdlen(1)` doesn't produce vector signatures, 11840 if (UserVLEN == 1) { 11841 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11842 DiagnosticsEngine::Warning, 11843 "The clause simdlen(1) has no effect when targeting aarch64."); 11844 CGM.getDiags().Report(SLoc, DiagID); 11845 return; 11846 } 11847 11848 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11849 // Advanced SIMD output. 11850 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11851 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11852 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11853 "power of 2 when targeting Advanced SIMD."); 11854 CGM.getDiags().Report(SLoc, DiagID); 11855 return; 11856 } 11857 11858 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11859 // limits. 11860 if (ISA == 's' && UserVLEN != 0) { 11861 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11862 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11863 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11864 "lanes in the architectural constraints " 11865 "for SVE (min is 128-bit, max is " 11866 "2048-bit, by steps of 128-bit)"); 11867 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11868 return; 11869 } 11870 } 11871 11872 // Sort out parameter sequence. 11873 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11874 StringRef Prefix = "_ZGV"; 11875 // Generate simdlen from user input (if any). 11876 if (UserVLEN) { 11877 if (ISA == 's') { 11878 // SVE generates only a masked function. 11879 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11880 OutputBecomesInput, Fn); 11881 } else { 11882 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11883 // Advanced SIMD generates one or two functions, depending on 11884 // the `[not]inbranch` clause. 11885 switch (State) { 11886 case OMPDeclareSimdDeclAttr::BS_Undefined: 11887 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11888 OutputBecomesInput, Fn); 11889 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11890 OutputBecomesInput, Fn); 11891 break; 11892 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11893 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11894 OutputBecomesInput, Fn); 11895 break; 11896 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11897 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11898 OutputBecomesInput, Fn); 11899 break; 11900 } 11901 } 11902 } else { 11903 // If no user simdlen is provided, follow the AAVFABI rules for 11904 // generating the vector length. 11905 if (ISA == 's') { 11906 // SVE, section 3.4.1, item 1. 11907 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11908 OutputBecomesInput, Fn); 11909 } else { 11910 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11911 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11912 // two vector names depending on the use of the clause 11913 // `[not]inbranch`. 11914 switch (State) { 11915 case OMPDeclareSimdDeclAttr::BS_Undefined: 11916 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11917 OutputBecomesInput, Fn); 11918 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11919 OutputBecomesInput, Fn); 11920 break; 11921 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11922 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11923 OutputBecomesInput, Fn); 11924 break; 11925 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11926 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11927 OutputBecomesInput, Fn); 11928 break; 11929 } 11930 } 11931 } 11932 } 11933 11934 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11935 llvm::Function *Fn) { 11936 ASTContext &C = CGM.getContext(); 11937 FD = FD->getMostRecentDecl(); 11938 // Map params to their positions in function decl. 11939 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11940 if (isa<CXXMethodDecl>(FD)) 11941 ParamPositions.try_emplace(FD, 0); 11942 unsigned ParamPos = ParamPositions.size(); 11943 for (const ParmVarDecl *P : FD->parameters()) { 11944 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11945 ++ParamPos; 11946 } 11947 while (FD) { 11948 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11949 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11950 // Mark uniform parameters. 11951 for (const Expr *E : Attr->uniforms()) { 11952 E = E->IgnoreParenImpCasts(); 11953 unsigned Pos; 11954 if (isa<CXXThisExpr>(E)) { 11955 Pos = ParamPositions[FD]; 11956 } else { 11957 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11958 ->getCanonicalDecl(); 11959 Pos = ParamPositions[PVD]; 11960 } 11961 ParamAttrs[Pos].Kind = Uniform; 11962 } 11963 // Get alignment info. 11964 auto NI = Attr->alignments_begin(); 11965 for (const Expr *E : Attr->aligneds()) { 11966 E = E->IgnoreParenImpCasts(); 11967 unsigned Pos; 11968 QualType ParmTy; 11969 if (isa<CXXThisExpr>(E)) { 11970 Pos = ParamPositions[FD]; 11971 ParmTy = E->getType(); 11972 } else { 11973 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11974 ->getCanonicalDecl(); 11975 Pos = ParamPositions[PVD]; 11976 ParmTy = PVD->getType(); 11977 } 11978 ParamAttrs[Pos].Alignment = 11979 (*NI) 11980 ? (*NI)->EvaluateKnownConstInt(C) 11981 : llvm::APSInt::getUnsigned( 11982 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11983 .getQuantity()); 11984 ++NI; 11985 } 11986 // Mark linear parameters. 11987 auto SI = Attr->steps_begin(); 11988 auto MI = Attr->modifiers_begin(); 11989 for (const Expr *E : Attr->linears()) { 11990 E = E->IgnoreParenImpCasts(); 11991 unsigned Pos; 11992 // Rescaling factor needed to compute the linear parameter 11993 // value in the mangled name. 11994 unsigned PtrRescalingFactor = 1; 11995 if (isa<CXXThisExpr>(E)) { 11996 Pos = ParamPositions[FD]; 11997 } else { 11998 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11999 ->getCanonicalDecl(); 12000 Pos = ParamPositions[PVD]; 12001 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 12002 PtrRescalingFactor = CGM.getContext() 12003 .getTypeSizeInChars(P->getPointeeType()) 12004 .getQuantity(); 12005 } 12006 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12007 ParamAttr.Kind = Linear; 12008 // Assuming a stride of 1, for `linear` without modifiers. 12009 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12010 if (*SI) { 12011 Expr::EvalResult Result; 12012 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12013 if (const auto *DRE = 12014 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12015 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 12016 ParamAttr.Kind = LinearWithVarStride; 12017 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 12018 ParamPositions[StridePVD->getCanonicalDecl()]); 12019 } 12020 } 12021 } else { 12022 ParamAttr.StrideOrArg = Result.Val.getInt(); 12023 } 12024 } 12025 // If we are using a linear clause on a pointer, we need to 12026 // rescale the value of linear_step with the byte size of the 12027 // pointee type. 12028 if (Linear == ParamAttr.Kind) 12029 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12030 ++SI; 12031 ++MI; 12032 } 12033 llvm::APSInt VLENVal; 12034 SourceLocation ExprLoc; 12035 const Expr *VLENExpr = Attr->getSimdlen(); 12036 if (VLENExpr) { 12037 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12038 ExprLoc = VLENExpr->getExprLoc(); 12039 } 12040 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12041 if (CGM.getTriple().isX86()) { 12042 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12043 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12044 unsigned VLEN = VLENVal.getExtValue(); 12045 StringRef MangledName = Fn->getName(); 12046 if (CGM.getTarget().hasFeature("sve")) 12047 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12048 MangledName, 's', 128, Fn, ExprLoc); 12049 if (CGM.getTarget().hasFeature("neon")) 12050 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12051 MangledName, 'n', 128, Fn, ExprLoc); 12052 } 12053 } 12054 FD = FD->getPreviousDecl(); 12055 } 12056 } 12057 12058 namespace { 12059 /// Cleanup action for doacross support. 12060 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12061 public: 12062 static const int DoacrossFinArgs = 2; 12063 12064 private: 12065 llvm::FunctionCallee RTLFn; 12066 llvm::Value *Args[DoacrossFinArgs]; 12067 12068 public: 12069 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12070 ArrayRef<llvm::Value *> CallArgs) 12071 : RTLFn(RTLFn) { 12072 assert(CallArgs.size() == DoacrossFinArgs); 12073 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12074 } 12075 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12076 if (!CGF.HaveInsertPoint()) 12077 return; 12078 CGF.EmitRuntimeCall(RTLFn, Args); 12079 } 12080 }; 12081 } // namespace 12082 12083 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12084 const OMPLoopDirective &D, 12085 ArrayRef<Expr *> NumIterations) { 12086 if (!CGF.HaveInsertPoint()) 12087 return; 12088 12089 ASTContext &C = CGM.getContext(); 12090 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12091 RecordDecl *RD; 12092 if (KmpDimTy.isNull()) { 12093 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12094 // kmp_int64 lo; // lower 12095 // kmp_int64 up; // upper 12096 // kmp_int64 st; // stride 12097 // }; 12098 RD = C.buildImplicitRecord("kmp_dim"); 12099 RD->startDefinition(); 12100 addFieldToRecordDecl(C, RD, Int64Ty); 12101 addFieldToRecordDecl(C, RD, Int64Ty); 12102 addFieldToRecordDecl(C, RD, Int64Ty); 12103 RD->completeDefinition(); 12104 KmpDimTy = C.getRecordType(RD); 12105 } else { 12106 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12107 } 12108 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12109 QualType ArrayTy = 12110 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12111 12112 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12113 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12114 enum { LowerFD = 0, UpperFD, StrideFD }; 12115 // Fill dims with data. 12116 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12117 LValue DimsLVal = CGF.MakeAddrLValue( 12118 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12119 // dims.upper = num_iterations; 12120 LValue UpperLVal = CGF.EmitLValueForField( 12121 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12122 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12123 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12124 Int64Ty, NumIterations[I]->getExprLoc()); 12125 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12126 // dims.stride = 1; 12127 LValue StrideLVal = CGF.EmitLValueForField( 12128 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12129 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12130 StrideLVal); 12131 } 12132 12133 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12134 // kmp_int32 num_dims, struct kmp_dim * dims); 12135 llvm::Value *Args[] = { 12136 emitUpdateLocation(CGF, D.getBeginLoc()), 12137 getThreadID(CGF, D.getBeginLoc()), 12138 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12139 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12140 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12141 CGM.VoidPtrTy)}; 12142 12143 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12144 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12145 CGF.EmitRuntimeCall(RTLFn, Args); 12146 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12147 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12148 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12149 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12150 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12151 llvm::makeArrayRef(FiniArgs)); 12152 } 12153 12154 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12155 const OMPDependClause *C) { 12156 QualType Int64Ty = 12157 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12158 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12159 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12160 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12161 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12162 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12163 const Expr *CounterVal = C->getLoopData(I); 12164 assert(CounterVal); 12165 llvm::Value *CntVal = CGF.EmitScalarConversion( 12166 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12167 CounterVal->getExprLoc()); 12168 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12169 /*Volatile=*/false, Int64Ty); 12170 } 12171 llvm::Value *Args[] = { 12172 emitUpdateLocation(CGF, C->getBeginLoc()), 12173 getThreadID(CGF, C->getBeginLoc()), 12174 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12175 llvm::FunctionCallee RTLFn; 12176 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12177 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12178 OMPRTL___kmpc_doacross_post); 12179 } else { 12180 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12181 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12182 OMPRTL___kmpc_doacross_wait); 12183 } 12184 CGF.EmitRuntimeCall(RTLFn, Args); 12185 } 12186 12187 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12188 llvm::FunctionCallee Callee, 12189 ArrayRef<llvm::Value *> Args) const { 12190 assert(Loc.isValid() && "Outlined function call location must be valid."); 12191 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12192 12193 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12194 if (Fn->doesNotThrow()) { 12195 CGF.EmitNounwindRuntimeCall(Fn, Args); 12196 return; 12197 } 12198 } 12199 CGF.EmitRuntimeCall(Callee, Args); 12200 } 12201 12202 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12203 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12204 ArrayRef<llvm::Value *> Args) const { 12205 emitCall(CGF, Loc, OutlinedFn, Args); 12206 } 12207 12208 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12209 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12210 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12211 HasEmittedDeclareTargetRegion = true; 12212 } 12213 12214 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12215 const VarDecl *NativeParam, 12216 const VarDecl *TargetParam) const { 12217 return CGF.GetAddrOfLocalVar(NativeParam); 12218 } 12219 12220 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12221 const VarDecl *VD) { 12222 if (!VD) 12223 return Address::invalid(); 12224 Address UntiedAddr = Address::invalid(); 12225 Address UntiedRealAddr = Address::invalid(); 12226 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12227 if (It != FunctionToUntiedTaskStackMap.end()) { 12228 const UntiedLocalVarsAddressesMap &UntiedData = 12229 UntiedLocalVarsStack[It->second]; 12230 auto I = UntiedData.find(VD); 12231 if (I != UntiedData.end()) { 12232 UntiedAddr = I->second.first; 12233 UntiedRealAddr = I->second.second; 12234 } 12235 } 12236 const VarDecl *CVD = VD->getCanonicalDecl(); 12237 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12238 // Use the default allocation. 12239 if (!isAllocatableDecl(VD)) 12240 return UntiedAddr; 12241 llvm::Value *Size; 12242 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12243 if (CVD->getType()->isVariablyModifiedType()) { 12244 Size = CGF.getTypeSize(CVD->getType()); 12245 // Align the size: ((size + align - 1) / align) * align 12246 Size = CGF.Builder.CreateNUWAdd( 12247 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12248 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12249 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12250 } else { 12251 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12252 Size = CGM.getSize(Sz.alignTo(Align)); 12253 } 12254 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12255 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12256 assert(AA->getAllocator() && 12257 "Expected allocator expression for non-default allocator."); 12258 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 12259 // According to the standard, the original allocator type is a enum 12260 // (integer). Convert to pointer type, if required. 12261 Allocator = CGF.EmitScalarConversion( 12262 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 12263 AA->getAllocator()->getExprLoc()); 12264 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 12265 12266 llvm::Value *Addr = 12267 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 12268 CGM.getModule(), OMPRTL___kmpc_alloc), 12269 Args, getName({CVD->getName(), ".void.addr"})); 12270 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12271 CGM.getModule(), OMPRTL___kmpc_free); 12272 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12273 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12274 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12275 if (UntiedAddr.isValid()) 12276 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12277 12278 // Cleanup action for allocate support. 12279 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12280 llvm::FunctionCallee RTLFn; 12281 SourceLocation::UIntTy LocEncoding; 12282 Address Addr; 12283 const Expr *Allocator; 12284 12285 public: 12286 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12287 SourceLocation::UIntTy LocEncoding, Address Addr, 12288 const Expr *Allocator) 12289 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12290 Allocator(Allocator) {} 12291 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12292 if (!CGF.HaveInsertPoint()) 12293 return; 12294 llvm::Value *Args[3]; 12295 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12296 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12297 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12298 Addr.getPointer(), CGF.VoidPtrTy); 12299 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 12300 // According to the standard, the original allocator type is a enum 12301 // (integer). Convert to pointer type, if required. 12302 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12303 CGF.getContext().VoidPtrTy, 12304 Allocator->getExprLoc()); 12305 Args[2] = AllocVal; 12306 12307 CGF.EmitRuntimeCall(RTLFn, Args); 12308 } 12309 }; 12310 Address VDAddr = 12311 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12312 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12313 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12314 VDAddr, AA->getAllocator()); 12315 if (UntiedRealAddr.isValid()) 12316 if (auto *Region = 12317 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12318 Region->emitUntiedSwitch(CGF); 12319 return VDAddr; 12320 } 12321 return UntiedAddr; 12322 } 12323 12324 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12325 const VarDecl *VD) const { 12326 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12327 if (It == FunctionToUntiedTaskStackMap.end()) 12328 return false; 12329 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12330 } 12331 12332 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12333 CodeGenModule &CGM, const OMPLoopDirective &S) 12334 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12335 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12336 if (!NeedToPush) 12337 return; 12338 NontemporalDeclsSet &DS = 12339 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12340 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12341 for (const Stmt *Ref : C->private_refs()) { 12342 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12343 const ValueDecl *VD; 12344 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12345 VD = DRE->getDecl(); 12346 } else { 12347 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12348 assert((ME->isImplicitCXXThis() || 12349 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12350 "Expected member of current class."); 12351 VD = ME->getMemberDecl(); 12352 } 12353 DS.insert(VD); 12354 } 12355 } 12356 } 12357 12358 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12359 if (!NeedToPush) 12360 return; 12361 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12362 } 12363 12364 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12365 CodeGenFunction &CGF, 12366 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12367 std::pair<Address, Address>> &LocalVars) 12368 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12369 if (!NeedToPush) 12370 return; 12371 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12372 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12373 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12374 } 12375 12376 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12377 if (!NeedToPush) 12378 return; 12379 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12380 } 12381 12382 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12383 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12384 12385 return llvm::any_of( 12386 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12387 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 12388 } 12389 12390 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12391 const OMPExecutableDirective &S, 12392 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12393 const { 12394 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12395 // Vars in target/task regions must be excluded completely. 12396 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12397 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12398 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12399 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12400 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12401 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12402 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12403 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12404 } 12405 } 12406 // Exclude vars in private clauses. 12407 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12408 for (const Expr *Ref : C->varlists()) { 12409 if (!Ref->getType()->isScalarType()) 12410 continue; 12411 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12412 if (!DRE) 12413 continue; 12414 NeedToCheckForLPCs.insert(DRE->getDecl()); 12415 } 12416 } 12417 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12418 for (const Expr *Ref : C->varlists()) { 12419 if (!Ref->getType()->isScalarType()) 12420 continue; 12421 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12422 if (!DRE) 12423 continue; 12424 NeedToCheckForLPCs.insert(DRE->getDecl()); 12425 } 12426 } 12427 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12428 for (const Expr *Ref : C->varlists()) { 12429 if (!Ref->getType()->isScalarType()) 12430 continue; 12431 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12432 if (!DRE) 12433 continue; 12434 NeedToCheckForLPCs.insert(DRE->getDecl()); 12435 } 12436 } 12437 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12438 for (const Expr *Ref : C->varlists()) { 12439 if (!Ref->getType()->isScalarType()) 12440 continue; 12441 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12442 if (!DRE) 12443 continue; 12444 NeedToCheckForLPCs.insert(DRE->getDecl()); 12445 } 12446 } 12447 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12448 for (const Expr *Ref : C->varlists()) { 12449 if (!Ref->getType()->isScalarType()) 12450 continue; 12451 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12452 if (!DRE) 12453 continue; 12454 NeedToCheckForLPCs.insert(DRE->getDecl()); 12455 } 12456 } 12457 for (const Decl *VD : NeedToCheckForLPCs) { 12458 for (const LastprivateConditionalData &Data : 12459 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12460 if (Data.DeclToUniqueName.count(VD) > 0) { 12461 if (!Data.Disabled) 12462 NeedToAddForLPCsAsDisabled.insert(VD); 12463 break; 12464 } 12465 } 12466 } 12467 } 12468 12469 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12470 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12471 : CGM(CGF.CGM), 12472 Action((CGM.getLangOpts().OpenMP >= 50 && 12473 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12474 [](const OMPLastprivateClause *C) { 12475 return C->getKind() == 12476 OMPC_LASTPRIVATE_conditional; 12477 })) 12478 ? ActionToDo::PushAsLastprivateConditional 12479 : ActionToDo::DoNotPush) { 12480 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12481 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12482 return; 12483 assert(Action == ActionToDo::PushAsLastprivateConditional && 12484 "Expected a push action."); 12485 LastprivateConditionalData &Data = 12486 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12487 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12488 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12489 continue; 12490 12491 for (const Expr *Ref : C->varlists()) { 12492 Data.DeclToUniqueName.insert(std::make_pair( 12493 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12494 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12495 } 12496 } 12497 Data.IVLVal = IVLVal; 12498 Data.Fn = CGF.CurFn; 12499 } 12500 12501 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12502 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12503 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12504 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12505 if (CGM.getLangOpts().OpenMP < 50) 12506 return; 12507 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12508 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12509 if (!NeedToAddForLPCsAsDisabled.empty()) { 12510 Action = ActionToDo::DisableLastprivateConditional; 12511 LastprivateConditionalData &Data = 12512 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12513 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12514 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12515 Data.Fn = CGF.CurFn; 12516 Data.Disabled = true; 12517 } 12518 } 12519 12520 CGOpenMPRuntime::LastprivateConditionalRAII 12521 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12522 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12523 return LastprivateConditionalRAII(CGF, S); 12524 } 12525 12526 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12527 if (CGM.getLangOpts().OpenMP < 50) 12528 return; 12529 if (Action == ActionToDo::DisableLastprivateConditional) { 12530 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12531 "Expected list of disabled private vars."); 12532 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12533 } 12534 if (Action == ActionToDo::PushAsLastprivateConditional) { 12535 assert( 12536 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12537 "Expected list of lastprivate conditional vars."); 12538 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12539 } 12540 } 12541 12542 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12543 const VarDecl *VD) { 12544 ASTContext &C = CGM.getContext(); 12545 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12546 if (I == LastprivateConditionalToTypes.end()) 12547 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12548 QualType NewType; 12549 const FieldDecl *VDField; 12550 const FieldDecl *FiredField; 12551 LValue BaseLVal; 12552 auto VI = I->getSecond().find(VD); 12553 if (VI == I->getSecond().end()) { 12554 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12555 RD->startDefinition(); 12556 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12557 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12558 RD->completeDefinition(); 12559 NewType = C.getRecordType(RD); 12560 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12561 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12562 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12563 } else { 12564 NewType = std::get<0>(VI->getSecond()); 12565 VDField = std::get<1>(VI->getSecond()); 12566 FiredField = std::get<2>(VI->getSecond()); 12567 BaseLVal = std::get<3>(VI->getSecond()); 12568 } 12569 LValue FiredLVal = 12570 CGF.EmitLValueForField(BaseLVal, FiredField); 12571 CGF.EmitStoreOfScalar( 12572 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12573 FiredLVal); 12574 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12575 } 12576 12577 namespace { 12578 /// Checks if the lastprivate conditional variable is referenced in LHS. 12579 class LastprivateConditionalRefChecker final 12580 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12581 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12582 const Expr *FoundE = nullptr; 12583 const Decl *FoundD = nullptr; 12584 StringRef UniqueDeclName; 12585 LValue IVLVal; 12586 llvm::Function *FoundFn = nullptr; 12587 SourceLocation Loc; 12588 12589 public: 12590 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12591 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12592 llvm::reverse(LPM)) { 12593 auto It = D.DeclToUniqueName.find(E->getDecl()); 12594 if (It == D.DeclToUniqueName.end()) 12595 continue; 12596 if (D.Disabled) 12597 return false; 12598 FoundE = E; 12599 FoundD = E->getDecl()->getCanonicalDecl(); 12600 UniqueDeclName = It->second; 12601 IVLVal = D.IVLVal; 12602 FoundFn = D.Fn; 12603 break; 12604 } 12605 return FoundE == E; 12606 } 12607 bool VisitMemberExpr(const MemberExpr *E) { 12608 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12609 return false; 12610 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12611 llvm::reverse(LPM)) { 12612 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12613 if (It == D.DeclToUniqueName.end()) 12614 continue; 12615 if (D.Disabled) 12616 return false; 12617 FoundE = E; 12618 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12619 UniqueDeclName = It->second; 12620 IVLVal = D.IVLVal; 12621 FoundFn = D.Fn; 12622 break; 12623 } 12624 return FoundE == E; 12625 } 12626 bool VisitStmt(const Stmt *S) { 12627 for (const Stmt *Child : S->children()) { 12628 if (!Child) 12629 continue; 12630 if (const auto *E = dyn_cast<Expr>(Child)) 12631 if (!E->isGLValue()) 12632 continue; 12633 if (Visit(Child)) 12634 return true; 12635 } 12636 return false; 12637 } 12638 explicit LastprivateConditionalRefChecker( 12639 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12640 : LPM(LPM) {} 12641 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12642 getFoundData() const { 12643 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12644 } 12645 }; 12646 } // namespace 12647 12648 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12649 LValue IVLVal, 12650 StringRef UniqueDeclName, 12651 LValue LVal, 12652 SourceLocation Loc) { 12653 // Last updated loop counter for the lastprivate conditional var. 12654 // int<xx> last_iv = 0; 12655 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12656 llvm::Constant *LastIV = 12657 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12658 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12659 IVLVal.getAlignment().getAsAlign()); 12660 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12661 12662 // Last value of the lastprivate conditional. 12663 // decltype(priv_a) last_a; 12664 llvm::Constant *Last = getOrCreateInternalVariable( 12665 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12666 cast<llvm::GlobalVariable>(Last)->setAlignment( 12667 LVal.getAlignment().getAsAlign()); 12668 LValue LastLVal = 12669 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12670 12671 // Global loop counter. Required to handle inner parallel-for regions. 12672 // iv 12673 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12674 12675 // #pragma omp critical(a) 12676 // if (last_iv <= iv) { 12677 // last_iv = iv; 12678 // last_a = priv_a; 12679 // } 12680 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12681 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12682 Action.Enter(CGF); 12683 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12684 // (last_iv <= iv) ? Check if the variable is updated and store new 12685 // value in global var. 12686 llvm::Value *CmpRes; 12687 if (IVLVal.getType()->isSignedIntegerType()) { 12688 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12689 } else { 12690 assert(IVLVal.getType()->isUnsignedIntegerType() && 12691 "Loop iteration variable must be integer."); 12692 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12693 } 12694 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12695 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12696 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12697 // { 12698 CGF.EmitBlock(ThenBB); 12699 12700 // last_iv = iv; 12701 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12702 12703 // last_a = priv_a; 12704 switch (CGF.getEvaluationKind(LVal.getType())) { 12705 case TEK_Scalar: { 12706 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12707 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12708 break; 12709 } 12710 case TEK_Complex: { 12711 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12712 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12713 break; 12714 } 12715 case TEK_Aggregate: 12716 llvm_unreachable( 12717 "Aggregates are not supported in lastprivate conditional."); 12718 } 12719 // } 12720 CGF.EmitBranch(ExitBB); 12721 // There is no need to emit line number for unconditional branch. 12722 (void)ApplyDebugLocation::CreateEmpty(CGF); 12723 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12724 }; 12725 12726 if (CGM.getLangOpts().OpenMPSimd) { 12727 // Do not emit as a critical region as no parallel region could be emitted. 12728 RegionCodeGenTy ThenRCG(CodeGen); 12729 ThenRCG(CGF); 12730 } else { 12731 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12732 } 12733 } 12734 12735 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12736 const Expr *LHS) { 12737 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12738 return; 12739 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12740 if (!Checker.Visit(LHS)) 12741 return; 12742 const Expr *FoundE; 12743 const Decl *FoundD; 12744 StringRef UniqueDeclName; 12745 LValue IVLVal; 12746 llvm::Function *FoundFn; 12747 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12748 Checker.getFoundData(); 12749 if (FoundFn != CGF.CurFn) { 12750 // Special codegen for inner parallel regions. 12751 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12752 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12753 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12754 "Lastprivate conditional is not found in outer region."); 12755 QualType StructTy = std::get<0>(It->getSecond()); 12756 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12757 LValue PrivLVal = CGF.EmitLValue(FoundE); 12758 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12759 PrivLVal.getAddress(CGF), 12760 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12761 LValue BaseLVal = 12762 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12763 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12764 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12765 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12766 FiredLVal, llvm::AtomicOrdering::Unordered, 12767 /*IsVolatile=*/true, /*isInit=*/false); 12768 return; 12769 } 12770 12771 // Private address of the lastprivate conditional in the current context. 12772 // priv_a 12773 LValue LVal = CGF.EmitLValue(FoundE); 12774 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12775 FoundE->getExprLoc()); 12776 } 12777 12778 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12779 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12780 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12781 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12782 return; 12783 auto Range = llvm::reverse(LastprivateConditionalStack); 12784 auto It = llvm::find_if( 12785 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12786 if (It == Range.end() || It->Fn != CGF.CurFn) 12787 return; 12788 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12789 assert(LPCI != LastprivateConditionalToTypes.end() && 12790 "Lastprivates must be registered already."); 12791 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12792 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12793 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12794 for (const auto &Pair : It->DeclToUniqueName) { 12795 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12796 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12797 continue; 12798 auto I = LPCI->getSecond().find(Pair.first); 12799 assert(I != LPCI->getSecond().end() && 12800 "Lastprivate must be rehistered already."); 12801 // bool Cmp = priv_a.Fired != 0; 12802 LValue BaseLVal = std::get<3>(I->getSecond()); 12803 LValue FiredLVal = 12804 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12805 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12806 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12807 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12808 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12809 // if (Cmp) { 12810 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12811 CGF.EmitBlock(ThenBB); 12812 Address Addr = CGF.GetAddrOfLocalVar(VD); 12813 LValue LVal; 12814 if (VD->getType()->isReferenceType()) 12815 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12816 AlignmentSource::Decl); 12817 else 12818 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12819 AlignmentSource::Decl); 12820 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12821 D.getBeginLoc()); 12822 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12823 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12824 // } 12825 } 12826 } 12827 12828 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12829 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12830 SourceLocation Loc) { 12831 if (CGF.getLangOpts().OpenMP < 50) 12832 return; 12833 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12834 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12835 "Unknown lastprivate conditional variable."); 12836 StringRef UniqueName = It->second; 12837 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12838 // The variable was not updated in the region - exit. 12839 if (!GV) 12840 return; 12841 LValue LPLVal = CGF.MakeAddrLValue( 12842 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12843 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12844 CGF.EmitStoreOfScalar(Res, PrivLVal); 12845 } 12846 12847 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12848 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12849 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12850 llvm_unreachable("Not supported in SIMD-only mode"); 12851 } 12852 12853 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12854 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12855 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12856 llvm_unreachable("Not supported in SIMD-only mode"); 12857 } 12858 12859 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12860 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12861 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12862 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12863 bool Tied, unsigned &NumberOfParts) { 12864 llvm_unreachable("Not supported in SIMD-only mode"); 12865 } 12866 12867 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12868 SourceLocation Loc, 12869 llvm::Function *OutlinedFn, 12870 ArrayRef<llvm::Value *> CapturedVars, 12871 const Expr *IfCond) { 12872 llvm_unreachable("Not supported in SIMD-only mode"); 12873 } 12874 12875 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12876 CodeGenFunction &CGF, StringRef CriticalName, 12877 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12878 const Expr *Hint) { 12879 llvm_unreachable("Not supported in SIMD-only mode"); 12880 } 12881 12882 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12883 const RegionCodeGenTy &MasterOpGen, 12884 SourceLocation Loc) { 12885 llvm_unreachable("Not supported in SIMD-only mode"); 12886 } 12887 12888 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12889 const RegionCodeGenTy &MasterOpGen, 12890 SourceLocation Loc, 12891 const Expr *Filter) { 12892 llvm_unreachable("Not supported in SIMD-only mode"); 12893 } 12894 12895 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12896 SourceLocation Loc) { 12897 llvm_unreachable("Not supported in SIMD-only mode"); 12898 } 12899 12900 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12901 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12902 SourceLocation Loc) { 12903 llvm_unreachable("Not supported in SIMD-only mode"); 12904 } 12905 12906 void CGOpenMPSIMDRuntime::emitSingleRegion( 12907 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12908 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12909 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12910 ArrayRef<const Expr *> AssignmentOps) { 12911 llvm_unreachable("Not supported in SIMD-only mode"); 12912 } 12913 12914 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12915 const RegionCodeGenTy &OrderedOpGen, 12916 SourceLocation Loc, 12917 bool IsThreads) { 12918 llvm_unreachable("Not supported in SIMD-only mode"); 12919 } 12920 12921 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12922 SourceLocation Loc, 12923 OpenMPDirectiveKind Kind, 12924 bool EmitChecks, 12925 bool ForceSimpleCall) { 12926 llvm_unreachable("Not supported in SIMD-only mode"); 12927 } 12928 12929 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12930 CodeGenFunction &CGF, SourceLocation Loc, 12931 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12932 bool Ordered, const DispatchRTInput &DispatchValues) { 12933 llvm_unreachable("Not supported in SIMD-only mode"); 12934 } 12935 12936 void CGOpenMPSIMDRuntime::emitForStaticInit( 12937 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12938 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12939 llvm_unreachable("Not supported in SIMD-only mode"); 12940 } 12941 12942 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12943 CodeGenFunction &CGF, SourceLocation Loc, 12944 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12945 llvm_unreachable("Not supported in SIMD-only mode"); 12946 } 12947 12948 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12949 SourceLocation Loc, 12950 unsigned IVSize, 12951 bool IVSigned) { 12952 llvm_unreachable("Not supported in SIMD-only mode"); 12953 } 12954 12955 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12956 SourceLocation Loc, 12957 OpenMPDirectiveKind DKind) { 12958 llvm_unreachable("Not supported in SIMD-only mode"); 12959 } 12960 12961 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12962 SourceLocation Loc, 12963 unsigned IVSize, bool IVSigned, 12964 Address IL, Address LB, 12965 Address UB, Address ST) { 12966 llvm_unreachable("Not supported in SIMD-only mode"); 12967 } 12968 12969 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12970 llvm::Value *NumThreads, 12971 SourceLocation Loc) { 12972 llvm_unreachable("Not supported in SIMD-only mode"); 12973 } 12974 12975 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12976 ProcBindKind ProcBind, 12977 SourceLocation Loc) { 12978 llvm_unreachable("Not supported in SIMD-only mode"); 12979 } 12980 12981 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12982 const VarDecl *VD, 12983 Address VDAddr, 12984 SourceLocation Loc) { 12985 llvm_unreachable("Not supported in SIMD-only mode"); 12986 } 12987 12988 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12989 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12990 CodeGenFunction *CGF) { 12991 llvm_unreachable("Not supported in SIMD-only mode"); 12992 } 12993 12994 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12995 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12996 llvm_unreachable("Not supported in SIMD-only mode"); 12997 } 12998 12999 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13000 ArrayRef<const Expr *> Vars, 13001 SourceLocation Loc, 13002 llvm::AtomicOrdering AO) { 13003 llvm_unreachable("Not supported in SIMD-only mode"); 13004 } 13005 13006 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13007 const OMPExecutableDirective &D, 13008 llvm::Function *TaskFunction, 13009 QualType SharedsTy, Address Shareds, 13010 const Expr *IfCond, 13011 const OMPTaskDataTy &Data) { 13012 llvm_unreachable("Not supported in SIMD-only mode"); 13013 } 13014 13015 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13016 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13017 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13018 const Expr *IfCond, const OMPTaskDataTy &Data) { 13019 llvm_unreachable("Not supported in SIMD-only mode"); 13020 } 13021 13022 void CGOpenMPSIMDRuntime::emitReduction( 13023 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13024 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13025 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13026 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13027 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13028 ReductionOps, Options); 13029 } 13030 13031 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13032 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13033 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13034 llvm_unreachable("Not supported in SIMD-only mode"); 13035 } 13036 13037 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13038 SourceLocation Loc, 13039 bool IsWorksharingReduction) { 13040 llvm_unreachable("Not supported in SIMD-only mode"); 13041 } 13042 13043 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13044 SourceLocation Loc, 13045 ReductionCodeGen &RCG, 13046 unsigned N) { 13047 llvm_unreachable("Not supported in SIMD-only mode"); 13048 } 13049 13050 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13051 SourceLocation Loc, 13052 llvm::Value *ReductionsPtr, 13053 LValue SharedLVal) { 13054 llvm_unreachable("Not supported in SIMD-only mode"); 13055 } 13056 13057 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13058 SourceLocation Loc) { 13059 llvm_unreachable("Not supported in SIMD-only mode"); 13060 } 13061 13062 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13063 CodeGenFunction &CGF, SourceLocation Loc, 13064 OpenMPDirectiveKind CancelRegion) { 13065 llvm_unreachable("Not supported in SIMD-only mode"); 13066 } 13067 13068 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13069 SourceLocation Loc, const Expr *IfCond, 13070 OpenMPDirectiveKind CancelRegion) { 13071 llvm_unreachable("Not supported in SIMD-only mode"); 13072 } 13073 13074 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13075 const OMPExecutableDirective &D, StringRef ParentName, 13076 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13077 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13078 llvm_unreachable("Not supported in SIMD-only mode"); 13079 } 13080 13081 void CGOpenMPSIMDRuntime::emitTargetCall( 13082 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13083 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13084 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13085 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13086 const OMPLoopDirective &D)> 13087 SizeEmitter) { 13088 llvm_unreachable("Not supported in SIMD-only mode"); 13089 } 13090 13091 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13092 llvm_unreachable("Not supported in SIMD-only mode"); 13093 } 13094 13095 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13096 llvm_unreachable("Not supported in SIMD-only mode"); 13097 } 13098 13099 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13100 return false; 13101 } 13102 13103 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13104 const OMPExecutableDirective &D, 13105 SourceLocation Loc, 13106 llvm::Function *OutlinedFn, 13107 ArrayRef<llvm::Value *> CapturedVars) { 13108 llvm_unreachable("Not supported in SIMD-only mode"); 13109 } 13110 13111 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13112 const Expr *NumTeams, 13113 const Expr *ThreadLimit, 13114 SourceLocation Loc) { 13115 llvm_unreachable("Not supported in SIMD-only mode"); 13116 } 13117 13118 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13119 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13120 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13121 llvm_unreachable("Not supported in SIMD-only mode"); 13122 } 13123 13124 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13125 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13126 const Expr *Device) { 13127 llvm_unreachable("Not supported in SIMD-only mode"); 13128 } 13129 13130 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13131 const OMPLoopDirective &D, 13132 ArrayRef<Expr *> NumIterations) { 13133 llvm_unreachable("Not supported in SIMD-only mode"); 13134 } 13135 13136 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13137 const OMPDependClause *C) { 13138 llvm_unreachable("Not supported in SIMD-only mode"); 13139 } 13140 13141 const VarDecl * 13142 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13143 const VarDecl *NativeParam) const { 13144 llvm_unreachable("Not supported in SIMD-only mode"); 13145 } 13146 13147 Address 13148 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13149 const VarDecl *NativeParam, 13150 const VarDecl *TargetParam) const { 13151 llvm_unreachable("Not supported in SIMD-only mode"); 13152 } 13153